diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_sseu.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/intel_sseu.c | 541 |
1 files changed, 343 insertions, 198 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index bdf09051b8a0..656a499b2706 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -3,8 +3,14 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/string_helpers.h> + +#include <drm/drm_print.h> + #include "i915_drv.h" -#include "intel_lrc_reg.h" +#include "i915_perf_types.h" +#include "intel_engine_regs.h" +#include "intel_gt_regs.h" #include "intel_sseu.h" void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, @@ -13,11 +19,6 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, sseu->max_slices = max_slices; sseu->max_subslices = max_subslices; sseu->max_eus_per_subslice = max_eus_per_subslice; - - sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); - GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); - sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); - GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); } unsigned int @@ -25,136 +26,236 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) { unsigned int i, total = 0; - for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) - total += hweight8(sseu->subslice_mask[i]); + if (sseu->has_xehp_dss) + return bitmap_weight(sseu->subslice_mask.xehp, + XEHP_BITMAP_BITS(sseu->subslice_mask)); + + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask.hsw); i++) + total += hweight8(sseu->subslice_mask.hsw[i]); return total; } -u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +unsigned int +intel_sseu_get_hsw_subslices(const struct sseu_dev_info *sseu, u8 slice) { - int i, offset = slice * sseu->ss_stride; - u32 mask = 0; - - GEM_BUG_ON(slice >= sseu->max_slices); - - for (i = 0; i < sseu->ss_stride; i++) - mask |= (u32)sseu->subslice_mask[offset + i] << - i * BITS_PER_BYTE; + WARN_ON(sseu->has_xehp_dss); + if (WARN_ON(slice >= sseu->max_slices)) + return 0; - return mask; + return sseu->subslice_mask.hsw[slice]; } -void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u8 *subslice_mask, u32 ss_mask) +static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, + int subslice) { - int offset = slice * sseu->ss_stride; - - memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); + if (sseu->has_xehp_dss) { + WARN_ON(slice > 0); + return sseu->eu_mask.xehp[subslice]; + } else { + return sseu->eu_mask.hsw[slice][subslice]; + } } -unsigned int -intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) +static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, + u16 eu_mask) { - return hweight32(intel_sseu_get_subslices(sseu, slice)); + GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice); + if (sseu->has_xehp_dss) { + GEM_WARN_ON(slice > 0); + sseu->eu_mask.xehp[subslice] = eu_mask; + } else { + sseu->eu_mask.hsw[slice][subslice] = eu_mask; + } } -static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, - int subslice) +static u16 compute_eu_total(const struct sseu_dev_info *sseu) { - int slice_stride = sseu->max_subslices * sseu->eu_stride; + int s, ss, total = 0; + + for (s = 0; s < sseu->max_slices; s++) + for (ss = 0; ss < sseu->max_subslices; ss++) + if (sseu->has_xehp_dss) + total += hweight16(sseu->eu_mask.xehp[ss]); + else + total += hweight16(sseu->eu_mask.hsw[s][ss]); - return slice * slice_stride + subslice * sseu->eu_stride; + return total; } -static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, - int subslice) +/** + * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer + * @to: Pointer to userspace buffer to copy to + * @sseu: SSEU structure containing EU mask to copy + * + * Copies the EU mask to a userspace buffer in the format expected by + * the query ioctl's topology queries. + * + * Returns the result of the copy_to_user() operation. + */ +int intel_sseu_copy_eumask_to_user(void __user *to, + const struct sseu_dev_info *sseu) { - int i, offset = sseu_eu_idx(sseu, slice, subslice); - u16 eu_mask = 0; + u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {}; + int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); + int len = sseu->max_slices * sseu->max_subslices * eu_stride; + int s, ss, i; - for (i = 0; i < sseu->eu_stride; i++) - eu_mask |= - ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + int uapi_offset = + s * sseu->max_subslices * eu_stride + + ss * eu_stride; + u16 mask = sseu_get_eus(sseu, s, ss); + + for (i = 0; i < eu_stride; i++) + eu_mask[uapi_offset + i] = + (mask >> (BITS_PER_BYTE * i)) & 0xff; + } + } - return eu_mask; + return copy_to_user(to, eu_mask, len); } -static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, - u16 eu_mask) +/** + * intel_sseu_copy_ssmask_to_user - Copy subslice mask into a userspace buffer + * @to: Pointer to userspace buffer to copy to + * @sseu: SSEU structure containing subslice mask to copy + * + * Copies the subslice mask to a userspace buffer in the format expected by + * the query ioctl's topology queries. + * + * Returns the result of the copy_to_user() operation. + */ +int intel_sseu_copy_ssmask_to_user(void __user *to, + const struct sseu_dev_info *sseu) { - int i, offset = sseu_eu_idx(sseu, slice, subslice); + u8 ss_mask[GEN_SS_MASK_SIZE] = {}; + int ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); + int len = sseu->max_slices * ss_stride; + int s, ss, i; - for (i = 0; i < sseu->eu_stride; i++) - sseu->eu_mask[offset + i] = - (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; -} + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + i = s * ss_stride * BITS_PER_BYTE + ss; -static u16 compute_eu_total(const struct sseu_dev_info *sseu) -{ - u16 i, total = 0; + if (!intel_sseu_has_subslice(sseu, s, ss)) + continue; - for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) - total += hweight8(sseu->eu_mask[i]); + ss_mask[i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE); + } + } - return total; + return copy_to_user(to, ss_mask, len); } -static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, + u32 ss_en, u16 eu_en) { - u32 ss_mask; + u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0); + int ss; + + sseu->slice_mask |= BIT(0); + sseu->subslice_mask.hsw[0] = ss_en & valid_ss_mask; - ss_mask = ss_en >> (s * sseu->max_subslices); - ss_mask &= GENMASK(sseu->max_subslices - 1, 0); + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, 0, ss)) + sseu_set_eus(sseu, 0, ss, eu_en); - return ss_mask; + sseu->eu_per_subslice = hweight16(eu_en); + sseu->eu_total = compute_eu_total(sseu); } -static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, - u32 g_ss_en, u32 c_ss_en, u16 eu_en) +static void xehp_compute_sseu_info(struct sseu_dev_info *sseu, + u16 eu_en) { - int s, ss; + int ss; - /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ - GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > - sizeof(g_ss_en) * BITS_PER_BYTE); + sseu->slice_mask |= BIT(0); - for (s = 0; s < sseu->max_slices; s++) { - if ((s_en & BIT(s)) == 0) - continue; + bitmap_or(sseu->subslice_mask.xehp, + sseu->compute_subslice_mask.xehp, + sseu->geometry_subslice_mask.xehp, + XEHP_BITMAP_BITS(sseu->subslice_mask)); - sseu->slice_mask |= BIT(s); - - /* - * XeHP introduces the concept of compute vs geometry DSS. To - * reduce variation between GENs around subslice usage, store a - * mask for both the geometry and compute enabled masks since - * userspace will need to be able to query these masks - * independently. Also compute a total enabled subslice count - * for the purposes of selecting subslices to use in a - * particular GEM context. - */ - intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, - get_ss_stride_mask(sseu, s, c_ss_en)); - intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, - get_ss_stride_mask(sseu, s, g_ss_en)); - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, - get_ss_stride_mask(sseu, s, - g_ss_en | c_ss_en)); + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, 0, ss)) + sseu_set_eus(sseu, 0, ss, eu_en); - for (ss = 0; ss < sseu->max_subslices; ss++) - if (intel_sseu_has_subslice(sseu, s, ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } sseu->eu_per_subslice = hweight16(eu_en); sseu->eu_total = compute_eu_total(sseu); } +static void +xehp_load_dss_mask(struct intel_uncore *uncore, + intel_sseu_ss_mask_t *ssmask, + int numregs, + ...) +{ + va_list argp; + u32 fuse_val[I915_MAX_SS_FUSE_REGS] = {}; + int i; + + if (WARN_ON(numregs > I915_MAX_SS_FUSE_REGS)) + numregs = I915_MAX_SS_FUSE_REGS; + + va_start(argp, numregs); + for (i = 0; i < numregs; i++) + fuse_val[i] = intel_uncore_read(uncore, va_arg(argp, i915_reg_t)); + va_end(argp); + + bitmap_from_arr32(ssmask->xehp, fuse_val, numregs * 32); +} + +static void xehp_sseu_info_init(struct intel_gt *gt) +{ + struct sseu_dev_info *sseu = >->info.sseu; + struct intel_uncore *uncore = gt->uncore; + u16 eu_en = 0; + u8 eu_en_fuse; + int num_compute_regs, num_geometry_regs; + int eu; + + num_geometry_regs = 1; + num_compute_regs = 1; + + /* + * The concept of slice has been removed in Xe_HP. To be compatible + * with prior generations, assume a single slice across the entire + * device. Then calculate out the DSS for each workload type within + * that software slice. + */ + intel_sseu_set_info(sseu, 1, + 32 * max(num_geometry_regs, num_compute_regs), + HAS_ONE_EU_PER_FUSE_BIT(gt->i915) ? 8 : 16); + sseu->has_xehp_dss = 1; + + xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, + num_geometry_regs, + GEN12_GT_GEOMETRY_DSS_ENABLE); + xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, + num_compute_regs, + GEN12_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); + + eu_en_fuse = REG_FIELD_GET(XEHP_EU_ENA_MASK, + intel_uncore_read(uncore, XEHP_EU_ENABLE)); + + if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915)) + eu_en = eu_en_fuse; + else + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + xehp_compute_sseu_info(sseu, eu_en); +} + static void gen12_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; - u32 g_dss_en, c_dss_en = 0; + u32 g_dss_en; u16 eu_en = 0; u8 eu_en_fuse; u8 s_en; @@ -164,43 +265,28 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. * Instead of splitting these, provide userspace with an array * of DSS to more closely represent the hardware resource. - * - * In addition, the concept of slice has been removed in Xe_HP. - * To be compatible with prior generations, assume a single slice - * across the entire device. Then calculate out the DSS for each - * workload type within that software slice. */ - if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) - intel_sseu_set_info(sseu, 1, 32, 16); - else - intel_sseu_set_info(sseu, 1, 6, 16); + intel_sseu_set_info(sseu, 1, 6, 16); /* - * As mentioned above, Xe_HP does not have the concept of a slice. - * Enable one for software backwards compatibility. + * Although gen12 architecture supported multiple slices, TGL, RKL, + * DG1, and ADL only had a single slice. */ - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - s_en = 0x1; - else - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK, + intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE)); + drm_WARN_ON(>->i915->drm, s_en != 0x1); g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); /* one bit per pair of EUs */ - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; - else - eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + eu_en_fuse = ~REG_FIELD_GET(GEN11_EU_DIS_MASK, + intel_uncore_read(uncore, GEN11_EU_DISABLE)); for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); + gen11_compute_sseu_info(sseu, g_dss_en, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -214,19 +300,25 @@ static void gen11_sseu_info_init(struct intel_gt *gt) u8 eu_en; u8 s_en; - if (IS_JSL_EHL(gt->i915)) + if (IS_JASPERLAKE(gt->i915) || IS_ELKHARTLAKE(gt->i915)) intel_sseu_set_info(sseu, 1, 4, 8); else intel_sseu_set_info(sseu, 1, 8, 8); - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + /* + * Although gen11 architecture supported multiple slices, ICL and + * EHL/JSL only had a single slice in practice. + */ + s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK, + intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE)); + drm_WARN_ON(>->i915->drm, s_en != 0x1); + ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); - eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + eu_en = ~REG_FIELD_GET(GEN11_EU_DIS_MASK, + intel_uncore_read(uncore, GEN11_EU_DISABLE)); - gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); + gen11_compute_sseu_info(sseu, ss_en, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -238,7 +330,6 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; u32 fuse; - u8 subslice_mask = 0; fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); @@ -247,28 +338,22 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) if (!(fuse & CHV_FGT_DISABLE_SS0)) { u8 disabled_mask = - ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> - CHV_FGT_EU_DIS_SS0_R0_SHIFT) | - (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> - CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); + REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R0_MASK, fuse) | + REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS0_R0_MASK); - subslice_mask |= BIT(0); - sseu_set_eus(sseu, 0, 0, ~disabled_mask); + sseu->subslice_mask.hsw[0] |= BIT(0); + sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF); } if (!(fuse & CHV_FGT_DISABLE_SS1)) { u8 disabled_mask = - ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> - CHV_FGT_EU_DIS_SS1_R0_SHIFT) | - (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> - CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); + REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R0_MASK, fuse) | + REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS1_R0_MASK); - subslice_mask |= BIT(1); - sseu_set_eus(sseu, 0, 1, ~disabled_mask); + sseu->subslice_mask.hsw[0] |= BIT(1); + sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF); } - intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); - sseu->eu_total = compute_eu_total(sseu); /* @@ -292,7 +377,6 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) static void gen9_sseu_info_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; - struct intel_device_info *info = mkwrite_device_info(i915); struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; u32 fuse2, eu_disable, subslice_mask; @@ -300,7 +384,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) int s, ss; fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); - sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2); /* BXT has a single slice and at most 3 subslices. */ intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, @@ -311,8 +395,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) * to each of the enabled slices. */ subslice_mask = (1 << sseu->max_subslices) - 1; - subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> - GEN9_F2_SS_DIS_SHIFT); + subslice_mask &= ~REG_FIELD_GET(GEN9_F2_SS_DIS_MASK, fuse2); /* * Iterate through enabled slices and subslices to @@ -323,8 +406,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, - subslice_mask); + sseu->subslice_mask.hsw[s] = subslice_mask; eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { @@ -337,7 +419,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & eu_mask); eu_per_ss = sseu->max_eus_per_subslice - hweight8(eu_disabled_mask); @@ -381,11 +463,11 @@ static void gen9_sseu_info_init(struct intel_gt *gt) sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_GEN9_LP(i915)) { -#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) - info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask.hsw[0] & BIT(ss))) + RUNTIME_INFO(i915)->has_pooled_eu = hweight8(sseu->subslice_mask.hsw[0]) == 3; sseu->min_eu_in_pool = 0; - if (info->has_pooled_eu) { + if (HAS_POOLED_EU(i915)) { if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) sseu->min_eu_in_pool = 3; else if (IS_SS_DISABLED(1)) @@ -406,7 +488,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt) u32 eu_disable0, eu_disable1, eu_disable2; fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); - sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2); intel_sseu_set_info(sseu, 3, 3, 8); /* @@ -414,18 +496,18 @@ static void bdw_sseu_info_init(struct intel_gt *gt) * to each of the enabled slices. */ subslice_mask = GENMASK(sseu->max_subslices - 1, 0); - subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> - GEN8_F2_SS_DIS_SHIFT); + subslice_mask &= ~REG_FIELD_GET(GEN8_F2_SS_DIS_MASK, fuse2); eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); - eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; - eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | - ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << - (32 - GEN8_EU_DIS0_S1_SHIFT)); - eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | - ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << - (32 - GEN8_EU_DIS1_S2_SHIFT)); + eu_disable[0] = + REG_FIELD_GET(GEN8_EU_DIS0_S0_MASK, eu_disable0); + eu_disable[1] = + REG_FIELD_GET(GEN8_EU_DIS0_S1_MASK, eu_disable0) | + REG_FIELD_GET(GEN8_EU_DIS1_S1_MASK, eu_disable1) << hweight32(GEN8_EU_DIS0_S1_MASK); + eu_disable[2] = + REG_FIELD_GET(GEN8_EU_DIS1_S2_MASK, eu_disable1) | + REG_FIELD_GET(GEN8_EU_DIS2_S2_MASK, eu_disable2) << hweight32(GEN8_EU_DIS1_S2_MASK); /* * Iterate through enabled slices and subslices to @@ -436,8 +518,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, - subslice_mask); + sseu->subslice_mask.hsw[s] = subslice_mask; for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; @@ -450,7 +531,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt) eu_disabled_mask = eu_disable[s] >> (ss * sseu->max_eus_per_subslice); - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & 0xFF); n_disabled = hweight8(eu_disabled_mask); @@ -534,8 +615,7 @@ static void hsw_sseu_info_init(struct intel_gt *gt) sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, - subslice_mask); + sseu->subslice_mask.hsw[s] = subslice_mask; for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, @@ -555,18 +635,20 @@ void intel_sseu_info_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; - if (IS_HASWELL(i915)) - hsw_sseu_info_init(gt); - else if (IS_CHERRYVIEW(i915)) - cherryview_sseu_info_init(gt); - else if (IS_BROADWELL(i915)) - bdw_sseu_info_init(gt); - else if (GRAPHICS_VER(i915) == 9) - gen9_sseu_info_init(gt); - else if (GRAPHICS_VER(i915) == 11) - gen11_sseu_info_init(gt); + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) + xehp_sseu_info_init(gt); else if (GRAPHICS_VER(i915) >= 12) gen12_sseu_info_init(gt); + else if (GRAPHICS_VER(i915) >= 11) + gen11_sseu_info_init(gt); + else if (GRAPHICS_VER(i915) >= 9) + gen9_sseu_info_init(gt); + else if (IS_BROADWELL(i915)) + bdw_sseu_info_init(gt); + else if (IS_CHERRYVIEW(i915)) + cherryview_sseu_info_init(gt); + else if (IS_HASWELL(i915)) + hsw_sseu_info_init(gt); } u32 intel_sseu_make_rpcs(struct intel_gt *gt, @@ -589,8 +671,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, * If i915/perf is active, we want a stable powergating configuration * on the system. Use the configuration pinned by i915/perf. */ - if (i915->perf.exclusive_stream) - req_sseu = &i915->perf.sseu; + if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream) + req_sseu = >->perf.sseu; slices = hweight8(req_sseu->slice_mask); subslices = hweight8(req_sseu->subslice_mask); @@ -603,7 +685,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, * According to documentation software must consider the configuration * as 2x4x8 and hardware will translate this to 1x8x8. * - * Furthemore, even though SScount is three bits, maximum documented + * Furthermore, even though SScount is three bits, maximum documented * value for it is four. From this some rules/restrictions follow: * * 1. @@ -622,7 +704,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, */ if (GRAPHICS_VER(i915) == 11 && slices == 1 && - subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { + subslices > min_t(u8, 4, hweight8(sseu->subslice_mask.hsw[0]) / 2)) { GEM_BUG_ON(subslices & 1); subslice_pg = false; @@ -688,37 +770,49 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) { int s; - drm_printf(p, "slice total: %u, mask=%04x\n", - hweight8(sseu->slice_mask), sseu->slice_mask); - drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); - for (s = 0; s < sseu->max_slices; s++) { - drm_printf(p, "slice%d: %u subslices, mask=%08x\n", - s, intel_sseu_subslices_per_slice(sseu, s), - intel_sseu_get_subslices(sseu, s)); + if (sseu->has_xehp_dss) { + drm_printf(p, "subslice total: %u\n", + intel_sseu_subslice_total(sseu)); + drm_printf(p, "geometry dss mask=%*pb\n", + XEHP_BITMAP_BITS(sseu->geometry_subslice_mask), + sseu->geometry_subslice_mask.xehp); + drm_printf(p, "compute dss mask=%*pb\n", + XEHP_BITMAP_BITS(sseu->compute_subslice_mask), + sseu->compute_subslice_mask.xehp); + } else { + drm_printf(p, "slice total: %u, mask=%04x\n", + hweight8(sseu->slice_mask), sseu->slice_mask); + drm_printf(p, "subslice total: %u\n", + intel_sseu_subslice_total(sseu)); + + for (s = 0; s < sseu->max_slices; s++) { + u8 ss_mask = sseu->subslice_mask.hsw[s]; + + drm_printf(p, "slice%d: %u subslices, mask=%08x\n", + s, hweight8(ss_mask), ss_mask); + } } + drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); drm_printf(p, "has slice power gating: %s\n", - yesno(sseu->has_slice_pg)); + str_yes_no(sseu->has_slice_pg)); drm_printf(p, "has subslice power gating: %s\n", - yesno(sseu->has_subslice_pg)); - drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); + str_yes_no(sseu->has_subslice_pg)); + drm_printf(p, "has EU power gating: %s\n", + str_yes_no(sseu->has_eu_pg)); } -void intel_sseu_print_topology(const struct sseu_dev_info *sseu, - struct drm_printer *p) +static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p) { int s, ss; - if (sseu->max_slices == 0) { - drm_printf(p, "Unavailable\n"); - return; - } - for (s = 0; s < sseu->max_slices; s++) { + u8 ss_mask = sseu->subslice_mask.hsw[s]; + drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", - s, intel_sseu_subslices_per_slice(sseu, s), - intel_sseu_get_subslices(sseu, s)); + s, hweight8(ss_mask), ss_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u16 enabled_eus = sseu_get_eus(sseu, s, ss); @@ -729,20 +823,71 @@ void intel_sseu_print_topology(const struct sseu_dev_info *sseu, } } -u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) +static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p) +{ + int dss; + + for (dss = 0; dss < sseu->max_subslices; dss++) { + u16 enabled_eus = sseu_get_eus(sseu, 0, dss); + + drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss, + str_yes_no(test_bit(dss, sseu->geometry_subslice_mask.xehp)), + str_yes_no(test_bit(dss, sseu->compute_subslice_mask.xehp)), + hweight16(enabled_eus), enabled_eus); + } +} + +void intel_sseu_print_topology(struct drm_i915_private *i915, + const struct sseu_dev_info *sseu, + struct drm_printer *p) +{ + if (sseu->max_slices == 0) + drm_printf(p, "Unavailable\n"); + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) + sseu_print_xehp_topology(sseu, p); + else + sseu_print_hsw_topology(sseu, p); +} + +void intel_sseu_print_ss_info(const char *type, + const struct sseu_dev_info *sseu, + struct seq_file *m) { - u16 slice_mask = 0; + int s; + + if (sseu->has_xehp_dss) { + seq_printf(m, " %s Geometry DSS: %u\n", type, + bitmap_weight(sseu->geometry_subslice_mask.xehp, + XEHP_BITMAP_BITS(sseu->geometry_subslice_mask))); + seq_printf(m, " %s Compute DSS: %u\n", type, + bitmap_weight(sseu->compute_subslice_mask.xehp, + XEHP_BITMAP_BITS(sseu->compute_subslice_mask))); + } else { + for (s = 0; s < fls(sseu->slice_mask); s++) + seq_printf(m, " %s Slice%i subslices: %u\n", type, + s, hweight8(sseu->subslice_mask.hsw[s])); + } +} + +u16 intel_slicemask_from_xehp_dssmask(intel_sseu_ss_mask_t dss_mask, + int dss_per_slice) +{ + intel_sseu_ss_mask_t per_slice_mask = {}; + unsigned long slice_mask = 0; int i; - WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); + WARN_ON(DIV_ROUND_UP(XEHP_BITMAP_BITS(dss_mask), dss_per_slice) > + 8 * sizeof(slice_mask)); - for (i = 0; dss_mask; i++) { - if (dss_mask & GENMASK(dss_per_slice - 1, 0)) + bitmap_fill(per_slice_mask.xehp, dss_per_slice); + for (i = 0; !bitmap_empty(dss_mask.xehp, XEHP_BITMAP_BITS(dss_mask)); i++) { + if (bitmap_intersects(dss_mask.xehp, per_slice_mask.xehp, dss_per_slice)) slice_mask |= BIT(i); - dss_mask >>= dss_per_slice; + bitmap_shift_right(dss_mask.xehp, dss_mask.xehp, dss_per_slice, + XEHP_BITMAP_BITS(dss_mask)); } return slice_mask; } - |
