From 79398d24da4c9294285bdedf67018ff09fe97bdc Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Mon, 27 Jun 2022 16:03:46 -0700 Subject: drm/i915/guc/slpc: Add a new SLPC selftest This test will validate we can achieve actual frequency of RP0. Pcode grants frequencies based on what GuC is requesting. However, thermal throttling can limit what is being granted. Add a test to request for max, but don't fail the test if RP0 is not granted due to throttle reasons. Also optimize the selftest by using a common run_test function to avoid code duplication. Rename the "clamp" tests to vary_max_freq and vary_min_freq. v2: Fix compile warning v3: Review comments (Ashutosh). Added a FIXME for the media RP0 case. v4: Checkpatch (strict) fixes, remove FIXME and other comments (Ashutosh) Fixes commit 8ee2c227822e ("drm/i915/guc/slpc: Add SLPC selftest") Cc: Ashutosh Dixit Signed-off-by: Vinay Belgaumkar Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220627230346.27720-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/selftest_slpc.c | 323 +++++++++++++++----------------- 1 file changed, 155 insertions(+), 168 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index b768cea5943d..ac29691e0b1a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -8,6 +8,11 @@ #define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 10000) #define FREQUENCY_REQ_UNIT DIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \ GEN9_FREQ_SCALER) +enum test_type { + VARY_MIN, + VARY_MAX, + MAX_GRANTED +}; static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) { @@ -36,147 +41,114 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) return ret; } -static int live_slpc_clamp_min(void *arg) +static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, + u32 *max_act_freq) { - struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); - struct intel_guc_slpc *slpc = >->uc.guc.slpc; - struct intel_rps *rps = >->rps; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_spinner spin; - u32 slpc_min_freq, slpc_max_freq; + u32 step, max_freq, req_freq; + u32 act_freq; int err = 0; - if (!intel_uc_uses_guc_slpc(>->uc)) - return 0; + /* Go from max to min in 5 steps */ + step = (slpc->rp0_freq - slpc->min_freq) / NUM_STEPS; + *max_act_freq = slpc->min_freq; + for (max_freq = slpc->rp0_freq; max_freq > slpc->min_freq; + max_freq -= step) { + err = slpc_set_max_freq(slpc, max_freq); + if (err) + break; - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; + req_freq = intel_rps_read_punit_req_frequency(rps); - if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { - pr_err("Could not get SLPC max freq\n"); - return -EIO; - } + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at most %d\n", req_freq, + max_freq + FREQUENCY_REQ_UNIT); + err = -EINVAL; + } - if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { - pr_err("Could not get SLPC min freq\n"); - return -EIO; - } + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > *max_act_freq) + *max_act_freq = act_freq; - if (slpc_min_freq == slpc_max_freq) { - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; + if (err) + break; } - intel_gt_pm_wait_for_idle(gt); - intel_gt_pm_get(gt); - for_each_engine(engine, gt, id) { - struct i915_request *rq; - u32 step, min_freq, req_freq; - u32 act_freq, max_act_freq; + return err; +} - if (!intel_engine_can_store_dword(engine)) - continue; +static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, + u32 *max_act_freq) +{ + u32 step, min_freq, req_freq; + u32 act_freq; + int err = 0; - /* Go from min to max in 5 steps */ - step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; - max_act_freq = slpc_min_freq; - for (min_freq = slpc_min_freq; min_freq < slpc_max_freq; - min_freq += step) { - err = slpc_set_min_freq(slpc, min_freq); - if (err) - break; - - st_engine_heartbeat_disable(engine); - - rq = igt_spinner_create_request(&spin, - engine->kernel_context, - MI_NOOP); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - st_engine_heartbeat_enable(engine); - break; - } + /* Go from min to max in 5 steps */ + step = (slpc->rp0_freq - slpc->min_freq) / NUM_STEPS; + *max_act_freq = slpc->min_freq; + for (min_freq = slpc->min_freq; min_freq < slpc->rp0_freq; + min_freq += step) { + err = slpc_set_min_freq(slpc, min_freq); + if (err) + break; - i915_request_add(rq); + req_freq = intel_rps_read_punit_req_frequency(rps); - if (!igt_wait_for_spinner(&spin, rq)) { - pr_err("%s: Spinner did not start\n", - engine->name); - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - intel_gt_set_wedged(engine->gt); - err = -EIO; - break; - } + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at least %d\n", req_freq, + min_freq - FREQUENCY_REQ_UNIT); + err = -EINVAL; + } - /* Wait for GuC to detect business and raise - * requested frequency if necessary. - */ - delay_for_h2g(); + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > *max_act_freq) + *max_act_freq = act_freq; - req_freq = intel_rps_read_punit_req_frequency(rps); + if (err) + break; + } - /* GuC requests freq in multiples of 50/3 MHz */ - if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { - pr_err("SWReq is %d, should be at least %d\n", req_freq, - min_freq - FREQUENCY_REQ_UNIT); - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - err = -EINVAL; - break; - } + return err; +} - act_freq = intel_rps_read_actual_frequency(rps); - if (act_freq > max_act_freq) - max_act_freq = act_freq; +static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) +{ + struct intel_gt *gt = rps_to_gt(rps); + u32 perf_limit_reasons; + int err = 0; - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - } + err = slpc_set_min_freq(slpc, slpc->rp0_freq); + if (err) + return err; - pr_info("Max actual frequency for %s was %d\n", - engine->name, max_act_freq); + *max_act_freq = intel_rps_read_actual_frequency(rps); + if (*max_act_freq != slpc->rp0_freq) { + /* Check if there was some throttling by pcode */ + perf_limit_reasons = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS); - /* Actual frequency should rise above min */ - if (max_act_freq == slpc_min_freq) { - pr_err("Actual freq did not rise above min\n"); + /* If not, this is an error */ + if (!(perf_limit_reasons & GT0_PERF_LIMIT_REASONS_MASK)) { + pr_err("Pcode did not grant max freq\n"); err = -EINVAL; + } else { + pr_info("Pcode throttled frequency 0x%x\n", perf_limit_reasons); } - - if (err) - break; } - /* Restore min/max frequencies */ - slpc_set_max_freq(slpc, slpc_max_freq); - slpc_set_min_freq(slpc, slpc_min_freq); - - if (igt_flush_test(gt->i915)) - err = -EIO; - - intel_gt_pm_put(gt); - igt_spinner_fini(&spin); - intel_gt_pm_wait_for_idle(gt); - return err; } -static int live_slpc_clamp_max(void *arg) +static int run_test(struct intel_gt *gt, int test_type) { - struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); - struct intel_guc_slpc *slpc; - struct intel_rps *rps; + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_rps *rps = >->rps; struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; - int err = 0; u32 slpc_min_freq, slpc_max_freq; - - slpc = >->uc.guc.slpc; - rps = >->rps; + int err = 0; if (!intel_uc_uses_guc_slpc(>->uc)) return 0; @@ -194,7 +166,7 @@ static int live_slpc_clamp_max(void *arg) return -EIO; } - if (slpc_min_freq == slpc_max_freq) { + if (slpc->min_freq == slpc->rp0_freq) { pr_err("Min/Max are fused to the same value\n"); return -EINVAL; } @@ -203,93 +175,82 @@ static int live_slpc_clamp_max(void *arg) intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { struct i915_request *rq; - u32 max_freq, req_freq; - u32 act_freq, max_act_freq; - u32 step; + u32 max_act_freq; if (!intel_engine_can_store_dword(engine)) continue; - /* Go from max to min in 5 steps */ - step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; - max_act_freq = slpc_min_freq; - for (max_freq = slpc_max_freq; max_freq > slpc_min_freq; - max_freq -= step) { - err = slpc_set_max_freq(slpc, max_freq); - if (err) - break; - - st_engine_heartbeat_disable(engine); - - rq = igt_spinner_create_request(&spin, - engine->kernel_context, - MI_NOOP); - if (IS_ERR(rq)) { - st_engine_heartbeat_enable(engine); - err = PTR_ERR(rq); - break; - } + st_engine_heartbeat_disable(engine); - i915_request_add(rq); + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + st_engine_heartbeat_enable(engine); + break; + } - if (!igt_wait_for_spinner(&spin, rq)) { - pr_err("%s: SLPC spinner did not start\n", - engine->name); - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - intel_gt_set_wedged(engine->gt); - err = -EIO; - break; - } + i915_request_add(rq); - delay_for_h2g(); + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: Spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } - /* Verify that SWREQ indeed was set to specific value */ - req_freq = intel_rps_read_punit_req_frequency(rps); + switch (test_type) { + case VARY_MIN: + err = vary_min_freq(slpc, rps, &max_act_freq); + break; - /* GuC requests freq in multiples of 50/3 MHz */ - if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { - pr_err("SWReq is %d, should be at most %d\n", req_freq, - max_freq + FREQUENCY_REQ_UNIT); + case VARY_MAX: + err = vary_max_freq(slpc, rps, &max_act_freq); + break; + + case MAX_GRANTED: + /* Media engines have a different RP0 */ + if (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS) { igt_spinner_end(&spin); st_engine_heartbeat_enable(engine); - err = -EINVAL; - break; + err = 0; + continue; } - act_freq = intel_rps_read_actual_frequency(rps); - if (act_freq > max_act_freq) - max_act_freq = act_freq; - - st_engine_heartbeat_enable(engine); - igt_spinner_end(&spin); - - if (err) - break; + err = max_granted_freq(slpc, rps, &max_act_freq); + break; } pr_info("Max actual frequency for %s was %d\n", engine->name, max_act_freq); /* Actual frequency should rise above min */ - if (max_act_freq == slpc_min_freq) { + if (max_act_freq <= slpc_min_freq) { pr_err("Actual freq did not rise above min\n"); + pr_err("Perf Limit Reasons: 0x%x\n", + intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS)); err = -EINVAL; } - if (igt_flush_test(gt->i915)) { - err = -EIO; - break; - } + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); if (err) break; } - /* Restore min/max freq */ + /* Restore min/max frequencies */ slpc_set_max_freq(slpc, slpc_max_freq); slpc_set_min_freq(slpc, slpc_min_freq); + if (igt_flush_test(gt->i915)) + err = -EIO; + intel_gt_pm_put(gt); igt_spinner_fini(&spin); intel_gt_pm_wait_for_idle(gt); @@ -297,11 +258,37 @@ static int live_slpc_clamp_max(void *arg) return err; } +static int live_slpc_vary_min(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = to_gt(i915); + + return run_test(gt, VARY_MIN); +} + +static int live_slpc_vary_max(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = to_gt(i915); + + return run_test(gt, VARY_MAX); +} + +/* check if pcode can grant RP0 */ +static int live_slpc_max_granted(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = to_gt(i915); + + return run_test(gt, MAX_GRANTED); +} + int intel_slpc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(live_slpc_clamp_max), - SUBTEST(live_slpc_clamp_min), + SUBTEST(live_slpc_vary_max), + SUBTEST(live_slpc_vary_min), + SUBTEST(live_slpc_max_granted), }; if (intel_gt_is_wedged(to_gt(i915))) -- cgit From 971e4a9781742aaad1587e25fd5582b2dd595ef8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 21 Jun 2022 16:30:05 -0700 Subject: drm/i915/guc: ADL-N should use the same GuC FW as ADL-S The only difference between the ADL S and P GuC FWs is the HWConfig support. ADL-N does not support HWConfig, so we should use the same binary as ADL-S, otherwise the GuC might attempt to fetch a config table that does not exist. ADL-N is internally identified as an ADL-P, so we need to special-case it in the FW selection code. Fixes: 7e28d0b26759 ("drm/i915/adl-n: Enable ADL-N platform") Cc: John Harrison Cc: Tejas Upadhyay Cc: Anusha Srivatsa Cc: Jani Nikula Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220621233005.3952293-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index c06e83872c34..27363091e1af 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -162,6 +162,15 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) u8 rev = INTEL_REVID(i915); int i; + /* + * The only difference between the ADL GuC FWs is the HWConfig support. + * ADL-N does not support HWConfig, so we should use the same binary as + * ADL-S, otherwise the GuC might attempt to fetch a config table that + * does not exist. + */ + if (IS_ADLP_N(i915)) + p = INTEL_ALDERLAKE_S; + GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); fw_blobs = blobs_all[uc_fw->type].blobs; fw_count = blobs_all[uc_fw->type].count; -- cgit From eb1c535f0d69e3ec7679d4d714bb2a9765ceda69 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 29 Jun 2022 18:43:50 +0100 Subject: drm/i915: turn on small BAR support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the uAPI in place we should now have enough in place to ensure a working system on small BAR configurations. v2: (Nirmoy & Thomas): - s/full BAR/Resizable BAR/ which is hopefully more easily understood by users. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Lionel Landwerlin Cc: Tvrtko Ursulin Cc: Jon Bloomfield Cc: Daniel Vetter Cc: Jordan Justen Cc: Kenneth Graunke Cc: Akeem G Abodunrin Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220629174350.384910-13-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index d09b996a9759..fa7b86f83e7b 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -112,12 +112,6 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) flat_ccs_base = intel_gt_mcr_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); flat_ccs_base = (flat_ccs_base >> XEHP_CCS_BASE_SHIFT) * SZ_64K; - /* FIXME: Remove this when we have small-bar enabled */ - if (pci_resource_len(pdev, 2) < lmem_size) { - drm_err(&i915->drm, "System requires small-BAR support, which is currently unsupported on this kernel\n"); - return ERR_PTR(-EINVAL); - } - if (GEM_WARN_ON(lmem_size < flat_ccs_base)) return ERR_PTR(-EIO); @@ -170,6 +164,10 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) drm_info(&i915->drm, "Local memory available: %pa\n", &lmem_size); + if (io_size < lmem_size) + drm_info(&i915->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' or similar, if available in the BIOS.\n", + (u64)io_size >> 20); + return mem; err_region_put: -- cgit From 3b05c960788439dbb47d0e62335f23869696b079 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 30 Jun 2022 17:14:07 -0300 Subject: drm/i915/pvc: Implement w/a 16016694945 A new PVC-specific workaround has just been added to the BSpec. BSpec: 64027 Signed-off-by: Gustavo Sousa Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220630201407.16770-1-gustavo.sousa@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 ++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 2 files changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 37c1095d8603..e6bb24dc7b99 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -918,6 +918,10 @@ #define GEN7_L3CNTLREG1 _MMIO(0xb01c) #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C #define GEN7_L3AGDIS (1 << 19) + +#define XEHPC_LNCFMISCCFGREG0 _MMIO(0xb01c) +#define XEHPC_OVRLSCCC REG_BIT(0) + #define GEN7_L3CNTLREG2 _MMIO(0xb020) /* MOCS (Memory Object Control State) registers */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3213c593a55f..dcc1ee392c0d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2687,6 +2687,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li * performance guide section. */ wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + + /* Wa_16016694945 */ + wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); } if (IS_XEHPSDV(i915)) { -- cgit From b94a1a207de5e06a55b5a8259073fd8d1637f093 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Mon, 6 Jun 2022 17:23:14 -0700 Subject: drm/i915/guc: Asynchronous flush of GuC log regions Both error-capture and relay-logging mechanism use the GuC log infrastructure. That means the KMD must send a log flush complete notification back to GuC after reading the data out. This call is currently being sent synchronously. However, synchronous H2Gs cause problems when the system is backed up. There is no need for this to be synchronous. The KMD wasn't even looking at the return status from it. So make it asynchronous and then there is no issue about time outs. Signed-off-by: Alan Previn Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220607002314.1451656-2-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 3 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 97a32e610c30..ffcd7a35f8df 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -1261,7 +1261,8 @@ static int __guc_capture_flushlog_complete(struct intel_guc *guc) GUC_CAPTURE_LOG_BUFFER }; - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); + } static void __guc_capture_process_output(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 78d2989fe917..7f1bc8af6b82 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -31,7 +31,7 @@ static int guc_action_flush_log_complete(struct intel_guc *guc) GUC_DEBUG_LOG_BUFFER }; - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); } static int guc_action_flush_log(struct intel_guc *guc) -- cgit From 9a92732f040ae3aeac017d0e80501cad1127a13d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 1 Jul 2022 16:20:06 -0700 Subject: drm/i915/gt: Add general DSS steering iterator to intel_gt_mcr Although all DSS belong to a single pool on Xe_HP platforms (i.e., they're not organized into slices from a topology point of view), we do still need to pass 'group' and 'instance' targets when steering register accesses to a specific instance of a per-DSS multicast register. The rules for how to determine group and instance IDs (which previously used legacy terms "slice" and "subslice") varies by platform. Some platforms determine steering by gslice membership, some platforms by cslice membership, and future platforms may have other rules. Since looping over each DSS and performing steered unicast register accesses is a relatively common pattern, let's add a dedicated iteration macro to handle this (and replace the platform-specific "instdone" loop we were using previously. This will avoid the calling code needing to figure out the details about how to obtain steering IDs for a specific DSS. Most of the places where we use this new loop are in the GPU errorstate code at the moment, but we do have some additional features coming in the future that will also need to loop over each DSS and steer some register accesses accordingly. Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220701232006.1016135-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 34 ++++++++------------------ drivers/gpu/drm/i915/gt/intel_engine_types.h | 22 ----------------- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 25 +++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 24 ++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 13 +++++----- 5 files changed, 65 insertions(+), 53 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 283870c65991..37fa813af766 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1517,7 +1517,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; - const struct sseu_dev_info *sseu = &engine->gt->info.sseu; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -1542,32 +1541,19 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { - instdone->sampler[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, - slice, subslice); - instdone->row[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, - slice, subslice); - } - } else { - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { - instdone->sampler[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, - slice, subslice); - instdone->row[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, - slice, subslice); - } + for_each_ss_steering(iter, engine->gt, slice, subslice) { + instdone->sampler[slice][subslice] = + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); + instdone->row[slice][subslice] = + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice); } if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) + for_each_ss_steering(iter, engine->gt, slice, subslice) instdone->geom_svg[slice][subslice] = intel_gt_mcr_read(engine->gt, XEHPG_INSTDONE_GEOM_SVG, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 2286f96f5f87..633a7e5dba3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -647,26 +647,4 @@ intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; } -#define instdone_has_slice(dev_priv___, sseu___, slice___) \ - ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) - -#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ - (GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \ - intel_sseu_has_subslice(sseu__, 0, subslice__)) - -#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ - for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ - (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ - (slice_) += ((subslice_) == 0)) \ - for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ - (instdone_has_subslice(dev_priv_, sseu_, slice_, \ - subslice_))) - -#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ - for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ - (iter_) < GEN_SS_MASK_SIZE; \ - (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ - (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ - for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) - #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 777025d5bd66..f8c64ab9d3ca 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -495,3 +495,28 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, } } +/** + * intel_gt_mcr_get_ss_steering - returns the group/instance steering for a SS + * @gt: GT structure + * @dss: DSS ID to obtain steering for + * @group: pointer to storage for steering group ID + * @instance: pointer to storage for steering instance ID + * + * Returns the steering IDs (via the @group and @instance parameters) that + * correspond to a specific subslice/DSS ID. + */ +void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, + unsigned int *group, unsigned int *instance) +{ + if (IS_PONTEVECCHIO(gt->i915)) { + *group = dss / GEN_DSS_PER_CSLICE; + *instance = dss % GEN_DSS_PER_CSLICE; + } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) { + *group = dss / GEN_DSS_PER_GSLICE; + *instance = dss % GEN_DSS_PER_GSLICE; + } else { + *group = dss / GEN_MAX_HSW_SLICES; + *instance = dss % GEN_MAX_SS_PER_HSW_SLICE; + return; + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 506b0cbc8db3..77a8b11c287d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -31,4 +31,28 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, bool dump_table); +void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, + unsigned int *group, unsigned int *instance); + +/* + * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice + * presence is determined by using the group/instance as direct lookups in the + * slice/subslice topology. On Xe_HP and beyond, the steering is unrelated to + * the topology, so we lookup the DSS ID directly in "slice 0." + */ +#define _HAS_SS(ss_, gt_, group_, instance_) ( \ + GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 50) ? \ + intel_sseu_has_subslice(&(gt_)->info.sseu, 0, ss_) : \ + intel_sseu_has_subslice(&(gt_)->info.sseu, group_, instance_)) + +/* + * Loop over each subslice/DSS and determine the group and instance IDs that + * should be used to steer MCR accesses toward this DSS. + */ +#define for_each_ss_steering(ss_, gt_, group_, instance_) \ + for (ss_ = 0, intel_gt_mcr_get_ss_steering(gt_, 0, &group_, &instance_); \ + ss_ < I915_MAX_SS_FUSE_BITS; \ + ss_++, intel_gt_mcr_get_ss_steering(gt_, ss_, &group_, &instance_)) \ + for_each_if(_HAS_SS(ss_, gt_, group_, instance_)) + #endif /* __INTEL_GT_MCR__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index ffcd7a35f8df..75257bd20ff0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -9,6 +9,7 @@ #include "gt/intel_engine_regs.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc.h" #include "guc_capture_fwif.h" @@ -281,8 +282,7 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, const struct __guc_mmio_reg_descr_group *lists) { struct intel_gt *gt = guc_to_gt(guc); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; - int slice, subslice, i, num_steer_regs, num_tot_regs = 0; + int slice, subslice, iter, i, num_steer_regs, num_tot_regs = 0; const struct __guc_mmio_reg_descr_group *list; struct __guc_mmio_reg_descr_group *extlists; struct __guc_mmio_reg_descr *extarray; @@ -298,7 +298,7 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, num_steer_regs = ARRAY_SIZE(xe_extregs); sseu = >->info.sseu; - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) + for_each_ss_steering(iter, gt, slice, subslice) num_tot_regs += num_steer_regs; if (!num_tot_regs) @@ -315,7 +315,7 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, } extarray = extlists[0].extlist; - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + for_each_ss_steering(iter, gt, slice, subslice) { for (i = 0; i < num_steer_regs; ++i) { __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); ++extarray; @@ -359,9 +359,8 @@ guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc, num_steer_regs += ARRAY_SIZE(xehpg_extregs); sseu = >->info.sseu; - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + for_each_ss_steering(iter, gt, slice, subslice) num_tot_regs += num_steer_regs; - } if (!num_tot_regs) return; @@ -377,7 +376,7 @@ guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc, } extarray = extlists[0].extlist; - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + for_each_ss_steering(iter, gt, slice, subslice) { for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) { __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); ++extarray; -- cgit From d50f5a109cf4ed50c5b575c1bb5fc3bd17b23308 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Jul 2022 12:41:04 +0300 Subject: drm/i915/selftests: fix a couple IS_ERR() vs NULL tests The shmem_pin_map() function doesn't return error pointers, it returns NULL. Fixes: be1cb55a07bf ("drm/i915/gt: Keep a no-frills swappable copy of the default context state") Signed-off-by: Dan Carpenter Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220708094104.GL2316@kadam --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8b2c11dbe354..1109088fe8f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -176,8 +176,8 @@ static int live_lrc_layout(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); @@ -365,8 +365,8 @@ static int live_lrc_fixed(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); -- cgit From b7580e669ca0d624b122455058aa6fe62c0fef44 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 8 Jul 2022 14:58:03 -0700 Subject: drm/i915/dg2: Add Wa_15010599737 This workaround may need to be extended to other platforms soon, but for now it's marked as DG2-specific. Signed-off-by: Matt Roper Reviewed-by: Arun R Murthy Link: https://patchwork.freedesktop.org/patch/msgid/20220708215804.2889246-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index e6bb24dc7b99..60d6eb5f245b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -371,6 +371,9 @@ #define GEN9_WM_CHICKEN3 _MMIO(0x5588) #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) +#define CHICKEN_RASTER_1 _MMIO(0x6204) +#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) + #define VFLSKPD _MMIO(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index dcc1ee392c0d..e8111fce56d0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -689,6 +689,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + + /* Wa_15010599737:dg2 */ + wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, -- cgit From 336561a914fc0c6f1218228718f633b31b7af1c3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jul 2022 16:21:32 +0100 Subject: drm/i915/gt: Serialize GRDOM access between multiple engine resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't allow two engines to be reset in parallel, as they would both try to select a reset bit (and send requests to common registers) and wait on that register, at the same time. Serialize control of the reset requests/acks using the uncore->lock, which will also ensure that no other GT state changes at the same time as the actual reset. Cc: stable@vger.kernel.org # v4.4 and upper Reported-by: Mika Kuoppala Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Acked-by: Thomas Hellström Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7dbac3011729.1657639152.git.mchehab@kernel.org --- drivers/gpu/drm/i915/gt/intel_reset.c | 37 ++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index a5338c3fde7a..c68d36fb5bbd 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) return err; } -static int gen6_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; u32 hw_mask; @@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } +static int gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(>->uncore->lock, flags); + ret = __gen6_reset_engines(gt, engine_mask, retry); + spin_unlock_irqrestore(>->uncore->lock, flags); + + return ret; +} + static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) { int vecs_id; @@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } -static int gen11_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen11_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt, struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; intel_engine_mask_t tmp; + unsigned long flags; int ret; + spin_lock_irqsave(>->uncore->lock, flags); + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) @@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt, * This is best effort, so ignore any error from the initial reset. */ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) - gen11_reset_engines(gt, gt->info.engine_mask, 0); + __gen11_reset_engines(gt, gt->info.engine_mask, 0); if (GRAPHICS_VER(gt->i915) >= 11) - ret = gen11_reset_engines(gt, engine_mask, retry); + ret = __gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(gt, engine_mask, retry); + ret = __gen6_reset_engines(gt, engine_mask, retry); skip_reset: for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); + spin_unlock_irqrestore(>->uncore->lock, flags); + return ret; } -- cgit From 33da97894758737895e90c909f16786052680ef4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jul 2022 16:21:33 +0100 Subject: drm/i915/gt: Serialize TLB invalidates with GT resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid trying to invalidate the TLB in the middle of performing an engine reset, as this may result in the reset timing out. Currently, the TLB invalidate is only serialised by its own mutex, forgoing the uncore lock, but we can take the uncore->lock as well to serialise the mmio access, thereby serialising with the GDRST. Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with i915 selftest/hangcheck. Cc: stable@vger.kernel.org # v4.4 and upper Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") Reported-by: Mauro Carvalho Chehab Tested-by: Mauro Carvalho Chehab Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Andi Shyti Acked-by: Thomas Hellström Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721ac6ea86c0677.1657639152.git.mchehab@kernel.org --- drivers/gpu/drm/i915/gt/intel_gt.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8da3314bb6bf..68c2b0d8f187 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + for_each_engine(engine, gt, id) { /* * HW architecture suggest typical invalidation time at 40us, @@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, -- cgit From c877bed82e1017c102c137d432933ccbba92c119 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Jul 2022 16:20:13 +0200 Subject: drm/i915/gt: Only kick the signal worker if there's been an update One impact of commit 047a1b877ed4 ("dma-buf & drm/amdgpu: remove dma_resv workaround") is that it stores many, many more fences. Whereas adding an exclusive fence used to remove the shared fence list, that list is now preserved and the write fences included into the list. Not just a single write fence, but now a write/read fence per context. That causes us to have to track more fences than before (albeit half of those are redundant), and we trigger more interrupts for multi-engine workloads. As part of reducing the impact from handling more signaling, we observe we only need to kick the signal worker after adding a fence iff we have good cause to believe that there is work to be done in processing the fence i.e. we either need to enable the interrupt or the request is already complete but we don't know if we saw the interrupt and so need to check signaling. References: 047a1b877ed4 ("dma-buf & drm/amdgpu: remove dma_resv workaround") Signed-off-by: Chris Wilson Signed-off-by: Karolina Drobnik Reviewed-by: Andi Shyti Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/d7b953c7a4ba747c8196a164e2f8c5aef468d048.1657289332.git.karolina.drobnik@intel.com --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 9dc9dccf7b09..ecc990ec1b95 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -399,7 +399,8 @@ static void insert_breadcrumb(struct i915_request *rq) * the request as it may have completed and raised the interrupt as * we were attaching it into the lists. */ - irq_work_queue(&b->irq_work); + if (!b->irq_armed || __i915_request_is_complete(rq)) + irq_work_queue(&b->irq_work); } bool i915_request_enable_breadcrumb(struct i915_request *rq) -- cgit From a5e4a53818ad585416a214b894fdf568443d5293 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 12 Jul 2022 15:05:13 -0700 Subject: drm/i915: Correct ss -> steering calculation for pre-Xe_HP platforms Accidental use of a "SLICE" macro where a "SUBSLICE" macro was intended causes the group ID for steering to be calculated incorrectly on pre-Xe_HP platforms. Fixes: 9a92732f040a ("drm/i915/gt: Add general DSS steering iterator to intel_gt_mcr") Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220712220513.3451794-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index f8c64ab9d3ca..e79405a45312 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -515,7 +515,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, *group = dss / GEN_DSS_PER_GSLICE; *instance = dss % GEN_DSS_PER_GSLICE; } else { - *group = dss / GEN_MAX_HSW_SLICES; + *group = dss / GEN_MAX_SS_PER_HSW_SLICE; *instance = dss % GEN_MAX_SS_PER_HSW_SLICE; return; } -- cgit From a91d1a17cd341548fd9535e33c331a2756acdfae Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Wed, 13 Jul 2022 18:32:08 +0530 Subject: drm/i915: Add support for LMEM PCIe resizable bar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the local memory PICe resizable bar, so that local memory can be resized to the maximum size supported by the device, and mapped correctly to the PCIe memory bar. It is usual that GPU devices expose only 256MB BARs primarily to be compatible with 32-bit systems. So, those devices cannot claim larger memory BAR windows size due to the system BIOS limitation. With this change, it would be possible to reprogram the windows of the bridge directly above the requesting device on the same BAR type. v2:Moved code to gt/intel_region_lmem.c and used only single underscore for function names.(Jani) v3: Optimised code. Signed-off-by: Akeem G Abodunrin Signed-off-by: Michał Winiarski Cc: Stuart Summers Cc: Michael J Ruhl Cc: Prathap Kumar Valsan Cc: Jani Nikula Signed-off-by: Priyanka Dandamudi Reviewed-by: Matthew Auld Reviewed-by: Nirmoy Das Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220713130209.2573233-2-priyanka.dandamudi@intel.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 75 +++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index fa7b86f83e7b..0cce4b2fcf60 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -15,6 +15,79 @@ #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" +static void _release_bars(struct pci_dev *pdev) +{ + int resno; + + for (resno = PCI_STD_RESOURCES; resno < PCI_STD_RESOURCE_END; resno++) { + if (pci_resource_len(pdev, resno)) + pci_release_resource(pdev, resno); + } +} + +static void +_resize_bar(struct drm_i915_private *i915, int resno, resource_size_t size) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + int bar_size = pci_rebar_bytes_to_size(size); + int ret; + + _release_bars(pdev); + + ret = pci_resize_resource(pdev, resno, bar_size); + if (ret) { + drm_info(&i915->drm, "Failed to resize BAR%d to %dM (%pe)\n", + resno, 1 << bar_size, ERR_PTR(ret)); + return; + } + + drm_info(&i915->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); +} + +#define LMEM_BAR_NUM 2 +static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_bus *root = pdev->bus; + struct resource *root_res; + resource_size_t rebar_size; + u32 pci_cmd; + int i; + + rebar_size = roundup_pow_of_two(pci_resource_len(pdev, LMEM_BAR_NUM)); + + if (rebar_size != roundup_pow_of_two(lmem_size)) + rebar_size = lmem_size; + else + return; + + /* Find out if root bus contains 64bit memory addressing */ + while (root->parent) + root = root->parent; + + pci_bus_for_each_resource(root, root_res, i) { + if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && + root_res->start > 0x100000000ull) + break; + } + + /* pci_resize_resource will fail anyways */ + if (!root_res) { + drm_info(&i915->drm, "Can't resize LMEM BAR - platform support is missing\n"); + return; + } + + /* First disable PCI memory decoding references */ + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); + pci_write_config_dword(pdev, PCI_COMMAND, + pci_cmd & ~PCI_COMMAND_MEMORY); + + _resize_bar(i915, LMEM_BAR_NUM, rebar_size); + + pci_assign_unassigned_bus_resources(pdev->bus); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); +} + static int region_lmem_release(struct intel_memory_region *mem) { @@ -128,6 +201,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE); } + i915_resize_lmem_bar(i915, lmem_size); + if (i915->params.lmem_size > 0) { lmem_size = min_t(resource_size_t, lmem_size, mul_u32_u32(i915->params.lmem_size, SZ_1M)); -- cgit From 17cd10a44a8962860ff4ba351b2a290e752dbbde Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Wed, 13 Jul 2022 18:32:09 +0530 Subject: drm/i915: Add lmem_bar_size modparam For testing purposes, support forcing the lmem_bar_size through a new modparam. In CI we only have a limited number of configurations for DG2, but we still need to be reasonably sure we get a usable device (also verifying we report the correct values for things like probed_cpu_visible_size etc) with all the potential lmem_bar sizes that we might expect see in the wild. v2: Update commit message and a minor modification.(Matt) v3: Optimised lmem bar size code and modified code to resize bar maximum upto lmem_size instead of maximum supported size.(Nirmoy) v4: Optimised lmem bar size code.(Nirmoy) Signed-off-by: Priyanka Dandamudi Cc: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220713130209.2573233-3-priyanka.dandamudi@intel.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 34 ++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 0cce4b2fcf60..6e90032e12e9 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -51,15 +51,39 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t struct pci_bus *root = pdev->bus; struct resource *root_res; resource_size_t rebar_size; + resource_size_t current_size; u32 pci_cmd; int i; - rebar_size = roundup_pow_of_two(pci_resource_len(pdev, LMEM_BAR_NUM)); + current_size = roundup_pow_of_two(pci_resource_len(pdev, LMEM_BAR_NUM)); - if (rebar_size != roundup_pow_of_two(lmem_size)) - rebar_size = lmem_size; - else - return; + if (i915->params.lmem_bar_size) { + u32 bar_sizes; + + rebar_size = i915->params.lmem_bar_size * + (resource_size_t)SZ_1M; + bar_sizes = pci_rebar_get_possible_sizes(pdev, + LMEM_BAR_NUM); + + if (rebar_size == current_size) + return; + + if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) || + rebar_size >= roundup_pow_of_two(lmem_size)) { + rebar_size = lmem_size; + + drm_info(&i915->drm, + "Given bar size is not within supported size, setting it to default: %llu\n", + (u64)lmem_size >> 20); + } + } else { + rebar_size = current_size; + + if (rebar_size != roundup_pow_of_two(lmem_size)) + rebar_size = lmem_size; + else + return; + } /* Find out if root bus contains 64bit memory addressing */ while (root->parent) -- cgit