From e50c951ea6ff17e7b1f2a5c118506b58b474ded3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Mar 2020 10:36:40 +0000 Subject: drm/i915/gt: Restrict gen7 w/a batch to Haswell The residual w/a batch is causing system instablity on Ivybridge and Baytrail under some workloads, so disable until resolved. Closes: https://gitlab.freedesktop.org/drm/intel/issues/1405 Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Prathap Kumar Valsan Cc: Akeem G Abodunrin Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Acked-by: Mika Kuoppala Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20200311103640.26572-1-chris@chris-wilson.co.uk (cherry picked from commit a62774782b994026ac3198bf115717d55d536166) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 1424582e4a9b..fdc3f10e12aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -2088,7 +2088,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); - if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) { + if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { err = gen7_ctx_switch_bb_init(engine); if (err) goto err_ring_unpin; -- cgit From 2e46a2a0b0149f951b63be1b5df6514676fed213 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Mar 2020 17:07:06 +0000 Subject: drm/i915: Use explicit flag to mark unreachable intel_context I need to keep the GEM context around a bit longer so adding an explicit flag for syncing execbuf with closed/abandonded contexts. v2: * Use already available context flags. (Chris) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200319170707.8262-1-chris@chris-wilson.co.uk (cherry picked from commit 207e4a71fb53e761be72daaeb78a49225bc31c69) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_context.c | 2 ++ drivers/gpu/drm/i915/gt/intel_context.h | 5 +++++ drivers/gpu/drm/i915/gt/intel_context_types.h | 9 +++++---- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 01474d3a558b..aea992e46c42 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -97,6 +97,8 @@ int __intel_context_do_pin(struct intel_context *ce) { int err; + GEM_BUG_ON(intel_context_is_closed(ce)); + if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { err = intel_context_alloc_state(ce); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 18efad255124..07be021882cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -173,6 +173,11 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); } +static inline bool intel_context_is_closed(const struct intel_context *ce) +{ + return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); +} + static inline bool intel_context_use_semaphores(const struct intel_context *ce) { return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 0f3b68b95c56..07cb83a0d017 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -62,10 +62,11 @@ struct intel_context { #define CONTEXT_BARRIER_BIT 0 #define CONTEXT_ALLOC_BIT 1 #define CONTEXT_VALID_BIT 2 -#define CONTEXT_USE_SEMAPHORES 3 -#define CONTEXT_BANNED 4 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 5 -#define CONTEXT_NOPREEMPT 6 +#define CONTEXT_CLOSED_BIT 3 +#define CONTEXT_USE_SEMAPHORES 4 +#define CONTEXT_BANNED 5 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 6 +#define CONTEXT_NOPREEMPT 7 u32 *lrc_reg_state; u64 lrc_desc; -- cgit From a24c57d0b3ed9be01f2a4e1a3e807645b71ad648 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Mar 2020 17:07:07 +0000 Subject: drm/i915/gt: Cancel a hung context if already closed Use the restored ability to check if a context is closed to decide whether or not to immediately ban the context from further execution after a hang. Fixes: be90e344836a ("drm/i915/gt: Cancel banned contexts after GT reset") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200319170707.8262-2-chris@chris-wilson.co.uk (cherry picked from commit 8e37d699139128139c0468e005c2f0d6215b0c55) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_reset.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 8b170c1876b3..80db3c9d785e 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -88,6 +88,11 @@ static bool mark_guilty(struct i915_request *rq) bool banned; int i; + if (intel_context_is_closed(rq->context)) { + intel_context_set_banned(rq->context); + return true; + } + rcu_read_lock(); ctx = rcu_dereference(rq->context->gem_context); if (ctx && !kref_get_unless_zero(&ctx->ref)) -- cgit From 98479ada421a8fd2123b98efd398a6f1379307ab Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 22 Mar 2020 16:32:24 +0000 Subject: drm/i915/gt: Treat idling as a RPS downclock event If we park/unpark faster than we can respond to RPS events, we never will process a downclock event after expiring a waitboost, and thus we will forever restart the GPU at max clocks even if the workload switches and doesn't justify full power. Closes: https://gitlab.freedesktop.org/drm/intel/issues/1500 Fixes: 3e7abf814193 ("drm/i915: Extract GT render power state management") Signed-off-by: Chris Wilson Cc: Andi Shyti Cc: Lyude Paul Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20200322163225.28791-1-chris@chris-wilson.co.uk Cc: # v5.5+ (cherry picked from commit 21abf0bf168dffff1192e0f072af1dc74ae1ff0e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_rps.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 87f9638d2cbf..cfaf141bac4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -770,6 +770,19 @@ void intel_rps_park(struct intel_rps *rps) intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); rps_set(rps, rps->idle_freq, false); intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); + + /* + * Since we will try and restart from the previously requested + * frequency on unparking, treat this idle point as a downclock + * interrupt and reduce the frequency for resume. If we park/unpark + * more frequently than the rps worker can run, we will not respond + * to any EI and never see a change in frequency. + * + * (Note we accommodate Cherryview's limitation of only using an + * even bin by applying it to all.) + */ + rps->cur_freq = + max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq); } void intel_rps_boost(struct i915_request *rq) -- cgit From c1ed2fb9d9c0e6a2245f013df76587ee3e78be46 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Mar 2020 13:42:32 +0000 Subject: drm/i915/gt: Select the deepest available parking mode for rc6 On Ivybridge, we can go lower than rc6 to rc6p. And this is required for Ivybridge to hit the same minimum power consumption as rc6 on other platforms, so make it so. v2: Update selftest to include all rc6 residency counters Note that Andi did mention that we should be converting the magic numbers into opaque magic macros, so if they ever get reused (unlikely given only Ivybridge used the extra modes) we'll need to pay back the technical debt. Closes: https://gitlab.freedesktop.org/drm/intel/issues/1518 Fixes: 730eaeb52426 ("drm/i915/gt: Manual rc6 entry upon parking") Testcase: igt/i915_pm_rc6_residency/rc6-idle Signed-off-by: Chris Wilson Cc: Andi Shyti Cc: Mika Kuoppala Cc: Imre Deak Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20200324134232.8773-1-chris@chris-wilson.co.uk (cherry picked from commit 13c5a577b342d80ea06b7300ce69420a2d0928ca) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_rc6.c | 10 +++++++++- drivers/gpu/drm/i915/gt/selftest_rc6.c | 23 +++++++++++++++++++---- 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 66c07c32745c..3847ee44b181 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -603,6 +603,7 @@ void intel_rc6_unpark(struct intel_rc6 *rc6) void intel_rc6_park(struct intel_rc6 *rc6) { struct intel_uncore *uncore = rc6_to_uncore(rc6); + unsigned int target; if (!rc6->enabled) return; @@ -617,7 +618,14 @@ void intel_rc6_park(struct intel_rc6 *rc6) /* Turn off the HW timers and go directly to rc6 */ set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); - set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT); + + if (HAS_RC6pp(rc6_to_i915(rc6))) + target = 0x6; /* deepest rc6 */ + else if (HAS_RC6p(rc6_to_i915(rc6))) + target = 0x5; /* deep rc6 */ + else + target = 0x4; /* normal rc6 */ + set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT); } void intel_rc6_disable(struct intel_rc6 *rc6) diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 5f7e2dcf5686..95b165faeba7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -12,6 +12,21 @@ #include "selftests/i915_random.h" +static u64 rc6_residency(struct intel_rc6 *rc6) +{ + u64 result; + + /* XXX VLV_GT_MEDIA_RC6? */ + + result = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + if (HAS_RC6p(rc6_to_i915(rc6))) + result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6p); + if (HAS_RC6pp(rc6_to_i915(rc6))) + result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6pp); + + return result; +} + int live_rc6_manual(void *arg) { struct intel_gt *gt = arg; @@ -38,9 +53,9 @@ int live_rc6_manual(void *arg) __intel_rc6_disable(rc6); msleep(1); /* wakeup is not immediate, takes about 100us on icl */ - res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[0] = rc6_residency(rc6); msleep(250); - res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[1] = rc6_residency(rc6); if ((res[1] - res[0]) >> 10) { pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n", (res[1] - res[0]) >> 10); @@ -51,9 +66,9 @@ int live_rc6_manual(void *arg) /* Manually enter RC6 */ intel_rc6_park(rc6); - res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[0] = rc6_residency(rc6); msleep(100); - res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[1] = rc6_residency(rc6); if (res[1] == res[0]) { pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n", -- cgit From a97b786bfac6929223cc9a01683effe84146e394 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Mar 2020 13:00:59 +0000 Subject: drm/i915/gt: Stage the transfer of the virtual breadcrumb We move the virtual breadcrumb from one physical engine to the next, if the next virtual request is scheduled on a new physical engine. Since the virtual context can only be in one signal queue, we need it to track the current physical engine for the new breadcrumbs. However, to move the list we need both breadcrumb locks -- and since we cannot take both at the same time (unless we are careful and always ensure consistent ordering) stage the movement of the signaler via the current virtual request. Closes: https://gitlab.freedesktop.org/drm/intel/issues/1510 Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200325130059.30600-1-chris@chris-wilson.co.uk (cherry picked from commit 6c81e21a4742385c00713137c6fdcade0412e93c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 112531b29f59..683014e7bc51 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1663,7 +1663,7 @@ static bool virtual_matches(const struct virtual_engine *ve, } static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, - struct intel_engine_cs *engine) + struct i915_request *rq) { struct intel_engine_cs *old = ve->siblings[0]; @@ -1671,9 +1671,19 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, spin_lock(&old->breadcrumbs.irq_lock); if (!list_empty(&ve->context.signal_link)) { - list_move_tail(&ve->context.signal_link, - &engine->breadcrumbs.signalers); - intel_engine_signal_breadcrumbs(engine); + list_del_init(&ve->context.signal_link); + + /* + * We cannot acquire the new engine->breadcrumbs.irq_lock + * (as we are holding a breadcrumbs.irq_lock already), + * so attach this request to the signaler on submission. + * The queued irq_work will occur when we finally drop + * the engine->active.lock after dequeue. + */ + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags); + + /* Also transfer the pending irq_work for the old breadcrumb. */ + intel_engine_signal_breadcrumbs(rq->engine); } spin_unlock(&old->breadcrumbs.irq_lock); } @@ -2045,7 +2055,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) engine); if (!list_empty(&ve->context.signals)) - virtual_xfer_breadcrumbs(ve, engine); + virtual_xfer_breadcrumbs(ve, rq); /* * Move the bound engine to the top of the list -- cgit From 0b72a251bf92ca2378530fa1f9b35a71830ab51c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Mar 2020 16:23:48 +0100 Subject: drm/i915/gt: Fill all the unused space in the GGTT When we allocate space in the GGTT we may have to allocate a larger region than will be populated by the object to accommodate fencing. Make sure that this space beyond the end of the buffer points safely into scratch space, in case the HW tries to access it anyway (e.g. fenced access to the last tile row). v2: Preemptively / conservatively guard gen6 ggtt as well. Reported-by: Imre Deak References: https://gitlab.freedesktop.org/drm/intel/-/issues/1554 Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Imre Deak Cc: stable@vger.kernel.org Reviewed-by: Matthew Auld Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20200331152348.26946-1-chris@chris-wilson.co.uk (cherry picked from commit 4d6c18590870fbac1e65dde5e01e621c8e0ca096) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 37 ++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index aed498a0d032..4c5a209cb669 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -191,10 +191,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen8_pte_t __iomem *gtt_entries; const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *gte; + gen8_pte_t __iomem *end; + struct sgt_iter iter; dma_addr_t addr; /* @@ -202,10 +203,17 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, * not to allow the user to override access to a read only page. */ - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; - gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; - for_each_sgt_daddr(addr, sgt_iter, vma->pages) - gen8_set_pte(gtt_entries++, pte_encode | addr); + gte = (gen8_pte_t __iomem *)ggtt->gsm; + gte += vma->node.start / I915_GTT_PAGE_SIZE; + end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + + for_each_sgt_daddr(addr, iter, vma->pages) + gen8_set_pte(gte++, pte_encode | addr); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + gen8_set_pte(gte++, vm->scratch[0].encode); /* * We want to flush the TLBs only after we're certain all the PTE @@ -241,13 +249,22 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; - unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; + gen6_pte_t __iomem *gte; + gen6_pte_t __iomem *end; struct sgt_iter iter; dma_addr_t addr; + gte = (gen6_pte_t __iomem *)ggtt->gsm; + gte += vma->node.start / I915_GTT_PAGE_SIZE; + end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + for_each_sgt_daddr(addr, iter, vma->pages) - iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + iowrite32(vm->pte_encode(addr, level, flags), gte++); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + iowrite32(vm->scratch[0].encode, gte++); /* * We want to flush the TLBs only after we're certain all the PTE -- cgit