From 4f1cb5875ca0e9386ff2f6545cd386d47ab8441b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 08:56:27 +0100 Subject: drm/i915: Verify workarounds immediately after application Immediately after writing the workaround, verify that it stuck in the register. References: https://bugs.freedesktop.org/show_bug.cgi?id=108954 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417075657.19456-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_workarounds.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index ccaf63679435..ea9292ee755a 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -913,6 +913,20 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) return fw; } +static bool +wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) +{ + if ((cur ^ wa->val) & wa->mask) { + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", + name, from, i915_mmio_reg_offset(wa->reg), cur, + cur & wa->mask, wa->val, wa->mask); + + return false; + } + + return true; +} + static void wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) { @@ -931,6 +945,10 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + wa_verify(wa, + intel_uncore_read_fw(uncore, wa->reg), + wal->name, "application"); } intel_uncore_forcewake_put__locked(uncore, fw); @@ -942,20 +960,6 @@ void intel_gt_apply_workarounds(struct drm_i915_private *i915) wa_list_apply(&i915->uncore, &i915->gt_wa_list); } -static bool -wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) -{ - if ((cur ^ wa->val) & wa->mask) { - DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), cur, - cur & wa->mask, wa->val, wa->mask); - - return false; - } - - return true; -} - static bool wa_list_verify(struct intel_uncore *uncore, const struct i915_wa_list *wal, const char *from) -- cgit From 254e11864a36a1d3b362bf5727e89382f1540015 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 08:56:28 +0100 Subject: drm/i915: Verify the engine workarounds stick on application Read the engine workarounds back using the GPU after loading the initial context state to verify that we are setting them correctly, and bail if it fails. v2: Break out the verification into its own loop Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417075657.19456-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 21 ++++ drivers/gpu/drm/i915/intel_workarounds.c | 120 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_workarounds.h | 2 + drivers/gpu/drm/i915/selftests/intel_workarounds.c | 53 ++------- 4 files changed, 149 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0a818a60ad31..a5412323fee1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4842,6 +4842,23 @@ static void i915_gem_fini_scratch(struct drm_i915_private *i915) i915_vma_unpin_and_release(&i915->gt.scratch, 0); } +static int intel_engines_verify_workarounds(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return 0; + + for_each_engine(engine, i915, id) { + if (intel_engine_verify_workarounds(engine, "load")) + err = -EIO; + } + + return err; +} + int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -4927,6 +4944,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); + ret = intel_engines_verify_workarounds(dev_priv); + if (ret) + goto err_init_hw; + ret = __intel_engines_record_defaults(dev_priv); if (ret) goto err_init_hw; diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index ea9292ee755a..89e2c603e34b 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -1259,6 +1259,126 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->uncore, &engine->wa_list); } +static struct i915_vma * +create_scratch(struct i915_address_space *vm, int count) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int size; + int err; + + size = round_up(count * sizeof(u32), PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, + i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); + if (err) + goto err_obj; + + return vma; + +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int +wa_list_srm(struct i915_request *rq, + const struct i915_wa_list *wal, + struct i915_vma *vma) +{ + const struct i915_wa *wa; + unsigned int i; + u32 srm, *cs; + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(rq->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * wal->count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + return 0; +} + +static int engine_wa_list_verify(struct intel_engine_cs *engine, + const struct i915_wa_list * const wal, + const char *from) +{ + const struct i915_wa *wa; + struct i915_request *rq; + struct i915_vma *vma; + unsigned int i; + u32 *results; + int err; + + if (!wal->count) + return 0; + + vma = create_scratch(&engine->i915->ggtt.vm, wal->count); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_request_alloc(engine, engine->kernel_context->gem_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma; + } + + err = wa_list_srm(rq, wal, vma); + if (err) + goto err_vma; + + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + err = -ETIME; + goto err_vma; + } + + results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(results)) { + err = PTR_ERR(results); + goto err_vma; + } + + err = 0; + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + if (!wa_verify(wa, results[i], wal->name, from)) + err = -ENXIO; + + i915_gem_object_unpin_map(vma->obj); + +err_vma: + i915_vma_unpin(vma); + i915_vma_put(vma); + return err; +} + +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from) +{ + return engine_wa_list_verify(engine, &engine->wa_list, from); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_workarounds.c" #endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index 34eee5ec511e..fdf7ebb90f28 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -30,5 +30,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine); void intel_engine_init_workarounds(struct intel_engine_cs *engine); void intel_engine_apply_workarounds(struct intel_engine_cs *engine); +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from); #endif diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 567b6f8dae86..a363748a7a4f 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -340,49 +340,6 @@ out: return err; } -static struct i915_vma *create_scratch(struct i915_gem_context *ctx) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - void *ptr; - int err; - - obj = i915_gem_object_create_internal(ctx->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - goto err_obj; - } - memset(ptr, 0xc5, PAGE_SIZE); - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err_obj; - - err = i915_gem_object_set_to_cpu_domain(obj, false); - if (err) - goto err_obj; - - return vma; - -err_obj: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - static struct i915_vma *create_batch(struct i915_gem_context *ctx) { struct drm_i915_gem_object *obj; @@ -475,7 +432,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(ctx); + scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -752,9 +709,11 @@ static bool verify_gt_engine_wa(struct drm_i915_private *i915, ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); - for_each_engine(engine, i915, id) - ok &= wa_list_verify(engine->uncore, - &lists->engine[id].wa_list, str); + for_each_engine(engine, i915, id) { + ok &= engine_wa_list_verify(engine, + &lists->engine[id].wa_list, + str) == 0; + } return ok; } -- cgit From 769f0dab622c58e3158fc55d761b62a61e7fa2e5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 08:56:29 +0100 Subject: drm/i915: Make workaround verification *optional* Sometimes the HW doesn't even play fair, and completely forgets about register writes. Skip verifying known troublemakers. References: https://bugs.freedesktop.org/show_bug.cgi?id=108954 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417075657.19456-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_workarounds.c | 40 +++++++++++++++++++------- drivers/gpu/drm/i915/intel_workarounds_types.h | 7 +++-- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 89e2c603e34b..b3cbed1ee1c9 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -122,6 +122,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) wal->wa_count++; wa_->val |= wa->val; wa_->mask |= wa->mask; + wa_->read |= wa->read; return; } } @@ -146,9 +147,10 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) { struct i915_wa wa = { - .reg = reg, + .reg = reg, .mask = mask, - .val = val + .val = val, + .read = mask, }; _wa_add(wal, &wa); @@ -172,6 +174,19 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_write_masked_or(wal, reg, val, val); } +static void +ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val, + /* Bonkers HW, skip verifying */ + }; + + _wa_add(wal, &wa); +} + #define WA_SET_BIT_MASKED(addr, mask) \ wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) @@ -916,10 +931,11 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) static bool wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) { - if ((cur ^ wa->val) & wa->mask) { + if ((cur ^ wa->val) & wa->read) { DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), cur, - cur & wa->mask, wa->val, wa->mask); + name, from, i915_mmio_reg_offset(wa->reg), + cur, cur & wa->read, + wa->val, wa->mask); return false; } @@ -1122,9 +1138,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); /* WaPipelineFlushCoherentLines:icl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES, + GEN8_LQSC_FLUSH_COHERENT_LINES); /* * Wa_1405543622:icl @@ -1151,9 +1168,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE); + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE, + GEN11_LQSC_CLEAN_EVICT_DISABLE); /* WaForwardProgressSoftReset:icl */ wa_write_or(wal, diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h index 30918da180ff..42ac1fb99572 100644 --- a/drivers/gpu/drm/i915/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/intel_workarounds_types.h @@ -12,9 +12,10 @@ #include "i915_reg.h" struct i915_wa { - i915_reg_t reg; - u32 mask; - u32 val; + i915_reg_t reg; + u32 mask; + u32 val; + u32 read; }; struct i915_wa_list { -- cgit From 99534023490686ce4453c45e5cb813535b9bff95 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 14:25:07 +0100 Subject: drm/i915: Avoid use-after-free in reporting create.size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have to avoid chasing after a userspace race! <3>[ 473.114328] BUG: KASAN: use-after-free in i915_gem_create+0x1d2/0x1f0 [i915] <3>[ 473.114389] Read of size 8 at addr ffff88815bf1d840 by task gem_flink_race/1541 <4>[ 473.114464] CPU: 1 PID: 1541 Comm: gem_flink_race Tainted: G U 5.1.0-rc4-g7d07e025e786-kasan_88+ #1 <4>[ 473.114469] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016 <4>[ 473.114474] Call Trace: <4>[ 473.114488] dump_stack+0x7c/0xbb <4>[ 473.114612] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.114621] print_address_description+0x65/0x270 <4>[ 473.114728] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.114839] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.114848] kasan_report+0x149/0x18d <4>[ 473.114962] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.115069] i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.115176] ? i915_gem_object_create.part.28+0x4b0/0x4b0 [i915] <4>[ 473.115289] ? i915_gem_dumb_create+0x1a0/0x1a0 [i915] <4>[ 473.115297] drm_ioctl_kernel+0x192/0x260 <4>[ 473.115306] ? drm_ioctl_permit+0x280/0x280 <4>[ 473.115326] drm_ioctl+0x67c/0x960 <4>[ 473.115438] ? i915_gem_dumb_create+0x1a0/0x1a0 [i915] <4>[ 473.115448] ? drm_getstats+0x20/0x20 <4>[ 473.115459] ? __lock_acquire+0xa66/0x3fe0 <4>[ 473.115474] ? _raw_spin_unlock_irqrestore+0x39/0x60 <4>[ 473.115485] ? debug_object_active_state+0x2ea/0x4e0 <4>[ 473.115496] ? debug_show_all_locks+0x2d0/0x2d0 <4>[ 473.115513] do_vfs_ioctl+0x18d/0xfa0 <4>[ 473.115522] ? check_flags.part.27+0x440/0x440 <4>[ 473.115532] ? ioctl_preallocate+0x1a0/0x1a0 <4>[ 473.115547] ? __fget+0x2ac/0x410 <4>[ 473.115561] ? __ia32_sys_dup3+0xb0/0xb0 <4>[ 473.115569] ? rwlock_bug.part.0+0x90/0x90 <4>[ 473.115590] ksys_ioctl+0x35/0x70 <4>[ 473.115597] ? lockdep_hardirqs_off+0x1cb/0x2b0 <4>[ 473.115608] __x64_sys_ioctl+0x6a/0xb0 <4>[ 473.115614] ? lockdep_hardirqs_on+0x342/0x590 <4>[ 473.115623] do_syscall_64+0x97/0x400 <4>[ 473.115633] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 473.115641] RIP: 0033:0x7fce590d55d7 <4>[ 473.115649] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48 <4>[ 473.115655] RSP: 002b:00007fce4d525ba8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 <4>[ 473.115662] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fce590d55d7 <4>[ 473.115667] RDX: 00007fce4d525c10 RSI: 00000000c010645b RDI: 0000000000000007 <4>[ 473.115672] RBP: 00007fce4d525c10 R08: 00007fce4d526700 R09: 00007fce4d526700 <4>[ 473.115677] R10: 0000000000000054 R11: 0000000000000246 R12: 00000000c010645b <4>[ 473.115682] R13: 0000000000000007 R14: 0000000000000000 R15: 00007ffe0e4a7450 <3>[ 473.115731] Allocated by task 1541: <4>[ 473.115766] kmem_cache_alloc+0xce/0x290 <4>[ 473.115895] i915_gem_object_create.part.28+0x1c/0x4b0 [i915] <4>[ 473.116000] i915_gem_create+0xe3/0x1f0 [i915] <4>[ 473.116008] drm_ioctl_kernel+0x192/0x260 <4>[ 473.116013] drm_ioctl+0x67c/0x960 <4>[ 473.116020] do_vfs_ioctl+0x18d/0xfa0 <4>[ 473.116026] ksys_ioctl+0x35/0x70 <4>[ 473.116032] __x64_sys_ioctl+0x6a/0xb0 <4>[ 473.116038] do_syscall_64+0x97/0x400 <4>[ 473.116044] entry_SYSCALL_64_after_hwframe+0x49/0xbe <3>[ 473.116071] Freed by task 1542: <4>[ 473.116101] kmem_cache_free+0xb7/0x2f0 <4>[ 473.116205] __i915_gem_free_objects+0x7d4/0xe10 [i915] <4>[ 473.116311] i915_gem_create_ioctl+0xaa/0xd0 [i915] <4>[ 473.116318] drm_ioctl_kernel+0x192/0x260 <4>[ 473.116323] drm_ioctl+0x67c/0x960 <4>[ 473.116330] do_vfs_ioctl+0x18d/0xfa0 <4>[ 473.116335] ksys_ioctl+0x35/0x70 <4>[ 473.116341] __x64_sys_ioctl+0x6a/0xb0 <4>[ 473.116347] do_syscall_64+0x97/0x400 <4>[ 473.116354] entry_SYSCALL_64_after_hwframe+0x49/0xbe Testcase: igt/gem_flink_race/flink_close Fixes: e163484afa8d ("drm/i915: Update size upon return from GEM_CREATE") Signed-off-by: Chris Wilson Cc: Michał Winiarski Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417132507.27133-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a5412323fee1..e5462639de0b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -647,7 +647,7 @@ i915_gem_create(struct drm_file *file, return ret; *handle_p = handle; - *size_p = obj->base.size; + *size_p = size; return 0; } -- cgit From dfe2c8ed23d7524dd363e1941039da63e3982e98 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Apr 2019 14:27:19 +0100 Subject: drm/i915: Stop overwriting RING_IMR in rcs resume We store the engine->imr mask and set up the RING_IMR register on restarting the engine. We do not then want to overwrite it with an incomplete mask later as we may then lose interrupts! Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190418132720.3716-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 029fd8ec1857..00bd9eeb053d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -873,9 +873,6 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN_RANGE(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - if (INTEL_GEN(dev_priv) >= 6) - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); - return 0; } -- cgit From 26ddc068de47e2a7cbbd06c915dca7a0dc22c499 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Apr 2019 14:27:20 +0100 Subject: drm/i915: Setup the RCS ring prior to execution We need to set the various ring registers prior to restarting the engine, or else we may restart it after reset/resume in an ill-defined state. Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190418132720.3716-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 00bd9eeb053d..3844581f622c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -831,9 +831,6 @@ static int intel_rcs_ctx_init(struct i915_request *rq) static int init_render_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - int ret = init_ring_common(engine); - if (ret) - return ret; /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ if (IS_GEN_RANGE(dev_priv, 4, 6)) @@ -873,7 +870,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN_RANGE(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return 0; + return init_ring_common(engine); } static void cancel_requests(struct intel_engine_cs *engine) -- cgit From 844e33135d3a17686e167af2b6e653a4721c26e5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Apr 2019 21:53:58 +0100 Subject: drm/i915: Remove unwarranted clamping for hsw/bdw We always start off at an "efficient frequency" and can let the system autotune from there, eliminating the need to clamp the available range. Signed-off-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190418205358.11450-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 44be676fabd6..87f6fc6d5502 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8528,18 +8528,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) gen6_init_rps_frequencies(dev_priv); /* Derive initial user preferences/limits from the hardware limits */ - rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; - rps->max_freq_softlimit = rps->max_freq; rps->min_freq_softlimit = rps->min_freq; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - rps->min_freq_softlimit = - max_t(int, - rps->efficient_freq, - intel_freq_opcode(dev_priv, 450)); - /* After setting max-softlimit, find the overclock max freq */ if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { @@ -8556,6 +8547,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) /* Finally allow us to boost to max by default */ rps->boost_freq = rps->max_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; mutex_unlock(&dev_priv->pcu_lock); } -- cgit From d69990e0c399e4f7f9b50505d3285e5de991148a Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Fri, 5 Apr 2019 15:02:34 +0200 Subject: drm/i915: Use drm_dev_unplug() The driver does not currently support unbinding from a device which is in use. Since open file descriptors may still be pointing into kernel memory where the device structures used to be, entirely correct kernel panics protect the driver from being unbound as we should not be unbinding it before those dangling pointers have been made safe. According to the documentation found inside drivers/gpu/drm/drm_drv.c, drm_dev_unplug() should be used instead of drm_dev_unregister() in order to make a device inaccessible to users as soon as it is unpluged. Follow that advice to make those possibly dangling pointers safe, protected by DRM layer from a user who is otherwise left pointing into possibly reused kernel memory after the driver has been unbound from the device. Once done, also cancel inflight operations immediately by calling i915_gem_set_wedged(). Signed-off-by: Janusz Krzysztofik Reviewed-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190405130235.7707-2-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1ad88e6d7c04..5e2ae2300454 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1760,7 +1760,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); - drm_dev_unregister(&dev_priv->drm); + drm_dev_unplug(&dev_priv->drm); i915_gem_shrinker_unregister(dev_priv); } -- cgit From 91cbdb83d3aee84b6aaaa3fc3b4a6084a35e19c1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 14:48:36 +0100 Subject: drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info For consistency (and elegance!), add intel_device_info.has_rps. The immediate boon is that RPS support is now emitted along the other capabilities in the debug log and after errors. Signed-off-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190419134836.5626-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 5 +++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 7 +++++-- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 066fd2a12851..71612e7fc8bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2585,6 +2585,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RC6p(dev_priv) (INTEL_INFO(dev_priv)->has_rc6p) #define HAS_RC6pp(dev_priv) (false) /* HW was never validated */ +#define HAS_RPS(dev_priv) (INTEL_INFO(dev_priv)->has_rps) + #define HAS_CSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_csr) #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f893c2cbce15..ffa2ee70a03d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -370,6 +370,7 @@ static const struct intel_device_info intel_ironlake_m_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_rps = true, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ I9XX_PIPE_OFFSETS, \ @@ -417,6 +418,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_rps = true, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ @@ -470,6 +472,7 @@ static const struct intel_device_info intel_valleyview_info = { .num_pipes = 2, .has_runtime_pm = 1, .has_rc6 = 1, + .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, .ppgtt_type = INTEL_PPGTT_FULL, @@ -565,6 +568,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_64bit_reloc = 1, .has_runtime_pm = 1, .has_rc6 = 1, + .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, .ppgtt_type = INTEL_PPGTT_FULL, @@ -640,6 +644,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { .has_runtime_pm = 1, \ .display.has_csr = 1, \ .has_rc6 = 1, \ + .has_rps = true, \ .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ .has_logical_ring_preemption = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 0e579f158016..7a2f14eff699 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -118,6 +118,7 @@ enum intel_ppgtt_type { func(has_pooled_eu); \ func(has_rc6); \ func(has_rc6p); \ + func(has_rps); \ func(has_runtime_pm); \ func(has_snoop); \ func(has_coherent_ggtt); \ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 87f6fc6d5502..7aa9a8c12b54 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7013,8 +7013,10 @@ static bool sanitize_rc6(struct drm_i915_private *i915) struct intel_device_info *info = mkwrite_device_info(i915); /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(i915)) + if (intel_vgpu_active(i915)) { info->has_rc6 = 0; + info->has_rps = false; + } if (info->has_rc6 && IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) { @@ -8716,7 +8718,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (HAS_RC6(dev_priv)) intel_enable_rc6(dev_priv); - intel_enable_rps(dev_priv); + if (HAS_RPS(dev_priv)) + intel_enable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_enable_llc_pstate(dev_priv); -- cgit From 7ce99d24ed7265b8f83e0213252aa4f65755f872 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 19:26:25 +0100 Subject: drm/i915: Expose the busyspin durations for i915_wait_request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An interesting discussion regarding "hybrid interrupt polling" for NVMe came to the conclusion that the ideal busyspin before sleeping was half of the expected request latency (and better if it was already halfway through that request). This suggested that we too should look again at our tradeoff between spinning and waiting. Currently, our spin simply tries to hide the cost of enabling the interrupt, which is good to avoid penalising nop requests (i.e. test throughput) and not much else. Studying real world workloads suggests that a spin of upto 500us can dramatically boost performance, but the suggestion is that this is not from avoiding interrupt latency per-se, but from secondary effects of sleeping such as allowing the CPU reduce cstate and context switch away. In a truly hybrid interrupt polling scheme, we would aim to sleep until just before the request completed and then wake up in advance of the interrupt and do a quick poll to handle completion. This is tricky for ourselves at the moment as we are not recording request times, and since we allow preemption, our requests are not on as a nicely ordered timeline as IO. However, the idea is interesting, for it will certainly help us decide when busyspinning is worthwhile. v2: Expose the spin setting via Kconfig options for easier adjustment and testing. v3: Don't get caught sneaking in a change to the busyspin parameters. v4: Explain more about the "hybrid interrupt polling" scheme that we want to migrate towards. Suggested-by: Sagar Kamble References: http://events.linuxfoundation.org/sites/events/files/slides/lemoal-nvme-polling-vault-2017-final_0.pdf Signed-off-by: Chris Wilson Cc: Sagar Kamble Cc: Eero Tamminen Cc: Tvrtko Ursulin Cc: Ben Widawsky Cc: Joonas Lahtinen Cc: Michał Winiarski Reviewed-by: Sagar Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190419182625.11186-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig | 6 ++++++ drivers/gpu/drm/i915/Kconfig.profile | 13 +++++++++++++ drivers/gpu/drm/i915/i915_request.c | 27 +++++++++++++++++++++++++-- 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/Kconfig.profile diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 148be8e1a090..f0556310b851 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -133,3 +133,9 @@ depends on DRM_I915 depends on EXPERT source "drivers/gpu/drm/i915/Kconfig.debug" endmenu + +menu "drm/i915 Profile Guided Optimisation" + visible if EXPERT + depends on DRM_I915 + source "drivers/gpu/drm/i915/Kconfig.profile" +endmenu diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile new file mode 100644 index 000000000000..0e5db98da8f3 --- /dev/null +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -0,0 +1,13 @@ +config DRM_I915_SPIN_REQUEST + int + default 5 # microseconds + help + Before sleeping waiting for a request (GPU operation) to complete, + we may spend some time polling for its completion. As the IRQ may + take a non-negligible time to setup, we do a short spin first to + check if the request will complete in the time it would have taken + us to enable the interrupt. + + May be 0 to disable the initial spin. In practice, we estimate + the cost of enabling the interrupt (if currently disabled) to be + a few microseconds. diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b836721d3b13..b1f00b59bb95 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1340,8 +1340,31 @@ long i915_request_wait(struct i915_request *rq, trace_i915_request_wait_begin(rq, flags); - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(rq, state, 5)) + /* + * Optimistic spin before touching IRQs. + * + * We may use a rather large value here to offset the penalty of + * switching away from the active task. Frequently, the client will + * wait upon an old swapbuffer to throttle itself to remain within a + * frame of the gpu. If the client is running in lockstep with the gpu, + * then it should not be waiting long at all, and a sleep now will incur + * extra scheduler latency in producing the next frame. To try to + * avoid adding the cost of enabling/disabling the interrupt to the + * short wait, we first spin to see if the request would have completed + * in the time taken to setup the interrupt. + * + * We need upto 5us to enable the irq, and upto 20us to hide the + * scheduler latency of a context switch, ignoring the secondary + * impacts from a context switch such as cache eviction. + * + * The scheme used for low-latency IO is called "hybrid interrupt + * polling". The suggestion there is to sleep until just before you + * expect to be woken by the device interrupt and then poll for its + * completion. That requires having a good predictor for the request + * duration, which we currently lack. + */ + if (CONFIG_DRM_I915_SPIN_REQUEST && + __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) goto out; /* -- cgit From b972fffa114b18a120a7bbde105d69a080d24970 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 17 Apr 2019 13:25:24 +0200 Subject: drm/i915: remove DRM_AUTH from IOCTLs which also have DRM_RENDER_ALLOW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is to work around problems with libva and vainfo. Signed-off-by: Christian König Reviewed-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190417112525.16848-1-christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_drv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5e2ae2300454..6354c68c94b3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -3098,7 +3098,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, drm_noop, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_SETPARAM, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_ALLOC, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH), @@ -3111,13 +3111,13 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer_ioctl, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHING, i915_gem_set_caching_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHING, i915_gem_get_caching_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_RENDER_ALLOW), @@ -3136,7 +3136,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER), - DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW), -- cgit From 267e80ee6a341bc694406ef7c4f30fa2721610b7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 21:12:07 +0100 Subject: drm/i915/gtt: Skip clearing the GGTT under gen6+ full-ppgtt If we know that the user cannot access the GGTT, by virtue of having a segregated memory area, we can skip clearing the unused entries as they cannot be accessed. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190419201207.5477-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8f460cc4cc1f..10558bc8bf90 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3280,7 +3280,9 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; - ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.clear_range = nop_clear_range; + if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) + ggtt->vm.clear_range = gen6_ggtt_clear_range; ggtt->vm.insert_page = gen6_ggtt_insert_page; ggtt->vm.insert_entries = gen6_ggtt_insert_entries; ggtt->vm.cleanup = gen6_gmch_remove; -- cgit From 95ebcda3ef4fa2c928e2e0dbe0f707ca90852110 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:11 -0700 Subject: drm/i915/uc: Rename uC firmware init/fini functions he uC firmware init function is called during GuC/HuC init early phases. Rename to include "_early" and properly reflect which phase we are at. The uC firmware fini function is cleaning up the state set/created on firmware fetch. Replace "_fini" with "_cleanup_fetch". v2: also rename uC fw fini function Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-2-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 6 +++--- drivers/gpu/drm/i915/intel_guc_fw.c | 2 +- drivers/gpu/drm/i915/intel_huc.h | 2 +- drivers/gpu/drm/i915/intel_huc_fw.c | 2 +- drivers/gpu/drm/i915/intel_uc_fw.c | 4 ++-- drivers/gpu/drm/i915/intel_uc_fw.h | 5 +++-- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 3aabfa2d9198..d81a02b0f525 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -154,7 +154,7 @@ int intel_guc_init_misc(struct intel_guc *guc) void intel_guc_fini_misc(struct intel_guc *guc) { - intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); guc_fini_wq(guc); } @@ -221,7 +221,7 @@ err_log: err_shared: guc_shared_data_destroy(guc); err_fetch: - intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); return ret; } @@ -237,7 +237,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); guc_shared_data_destroy(guc); - intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); } static u32 guc_ctl_debug_flags(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 792a551450c7..4385d9ef02bb 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -90,7 +90,7 @@ void intel_guc_fw_init_early(struct intel_guc *guc) { struct intel_uc_fw *guc_fw = &guc->fw; - intel_uc_fw_init(guc_fw, INTEL_UC_FW_TYPE_GUC); + intel_uc_fw_init_early(guc_fw, INTEL_UC_FW_TYPE_GUC); guc_fw_select(guc_fw); } diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index 7e41d870b509..ce129e301961 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -42,7 +42,7 @@ int intel_huc_check_status(struct intel_huc *huc); static inline void intel_huc_fini_misc(struct intel_huc *huc) { - intel_uc_fw_fini(&huc->fw); + intel_uc_fw_cleanup_fetch(&huc->fw); } static inline int intel_huc_sanitize(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c index 68d47c105939..80a176d91edc 100644 --- a/drivers/gpu/drm/i915/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -89,7 +89,7 @@ void intel_huc_fw_init_early(struct intel_huc *huc) { struct intel_uc_fw *huc_fw = &huc->fw; - intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC); + intel_uc_fw_init_early(huc_fw, INTEL_UC_FW_TYPE_HUC); huc_fw_select(huc_fw); } diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index becf05ebae4d..e3e74207a102 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -274,13 +274,13 @@ fail: } /** - * intel_uc_fw_fini - cleanup uC firmware + * intel_uc_fw_cleanup_fetch - cleanup uC firmware * * @uc_fw: uC firmware * * Cleans up uC firmware by releasing the firmware GEM obj. */ -void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index 0e3bd580e267..e6fa8599757c 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -102,7 +102,8 @@ static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) } static inline -void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) +void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_type type) { uc_fw->path = NULL; uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; @@ -144,10 +145,10 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, int (*xfer)(struct intel_uc_fw *uc_fw, struct i915_vma *vma)); -void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); #endif -- cgit From 911800765ef6cdcb9103da7557aa5dd9ebb4cda0 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:12 -0700 Subject: drm/i915/uc: Reserve upper range of GGTT GuC and HuC depend on struct_mutex for device reinitialization. Moving away from this dependency requires perma-pinning the firmware images in GGTT. The upper portion of the GuC address space has a sizeable hole (several MB) that is inaccessible by GuC. Reserve this range within GGTT as it can comfortably hold GuC/HuC firmware images. v2: Reserve node rather than insert (Chris) Simpler determination of node start/size (Daniele) Move reserve/release out to intel_guc.* files v3: Reserve starting at GUC_GGTT_TOP only and bail if this fails (Chris) Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-3-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 25 ++++++++++++------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/intel_guc.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 2 ++ 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 10558bc8bf90..3557233de0f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2752,6 +2752,12 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (ret) return ret; + if (USES_GUC(dev_priv)) { + ret = intel_guc_reserve_ggtt_top(&dev_priv->guc); + if (ret) + goto err_reserve; + } + /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", @@ -2766,12 +2772,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) { ret = i915_gem_init_aliasing_ppgtt(dev_priv); if (ret) - goto err; + goto err_appgtt; } return 0; -err: +err_appgtt: + intel_guc_release_ggtt_top(&dev_priv->guc); +err_reserve: drm_mm_remove_node(&ggtt->error_capture); return ret; } @@ -2797,6 +2805,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); + intel_guc_release_ggtt_top(&dev_priv->guc); + if (drm_mm_initialized(&ggtt->vm.mm)) { intel_vgt_deballoon(dev_priv); i915_address_space_fini(&ggtt->vm); @@ -3371,17 +3381,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) if (ret) return ret; - /* Trim the GGTT to fit the GuC mappable upper range (when enabled). - * This is easier than doing range restriction on the fly, as we - * currently don't have any bits spare to pass in this upper - * restriction! - */ - if (USES_GUC(dev_priv)) { - ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP); - ggtt->mappable_end = - min_t(u64, ggtt->mappable_end, ggtt->vm.total); - } - if ((ggtt->vm.total - 1) >> 32) { DRM_ERROR("We never expected a Global GTT with more than 32bits" " of address space! Found %lldM!\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f597f35b109b..b51e779732c3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -384,6 +384,7 @@ struct i915_ggtt { u32 pin_bias; struct drm_mm_node error_capture; + struct drm_mm_node uc_fw; }; struct i915_hw_ppgtt { diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index d81a02b0f525..a10a68e0ffce 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -721,3 +721,30 @@ u32 intel_guc_reserved_gtt_size(struct intel_guc *guc) { return guc_to_i915(guc)->wopcm.guc.size; } + +int intel_guc_reserve_ggtt_top(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_i915(guc); + struct i915_ggtt *ggtt = &i915->ggtt; + u64 size; + int ret; + + size = ggtt->vm.total - GUC_GGTT_TOP; + + ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, + GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, + PIN_NOEVICT); + if (ret) + DRM_DEBUG_DRIVER("GuC: failed to reserve top of ggtt\n"); + + return ret; +} + +void intel_guc_release_ggtt_top(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_i915(guc); + struct i915_ggtt *ggtt = &i915->ggtt; + + if (drm_mm_node_allocated(&ggtt->uc_fw)) + drm_mm_remove_node(&ggtt->uc_fw); +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 2c59ff8d9f39..2494e84831a2 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -173,6 +173,8 @@ int intel_guc_suspend(struct intel_guc *guc); int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); u32 intel_guc_reserved_gtt_size(struct intel_guc *guc); +int intel_guc_reserve_ggtt_top(struct intel_guc *guc); +void intel_guc_release_ggtt_top(struct intel_guc *guc); static inline int intel_guc_sanitize(struct intel_guc *guc) { -- cgit From fc488b59034aa4519f4971f4b2b842718e56af79 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:13 -0700 Subject: drm/i915/uc: Place uC firmware in upper range of GGTT Currently we pin the GuC or HuC firmware image just before uploading. Perma-pin during uC initialization instead and use the range reserved at the top of the address space. Moving the firmware resulted in needing to: - use an additional pinning for the rsa signature which will be used during HuC auth as addresses above GUC_GGTT_TOP do not map through GTT. v2: Remove call to set to gtt domain Do not restore fw gtt mapping unconditionally Separate out pin/unpin functions and drop usage of pin/unpin Use uc_fw init/fini functions to bind/unbind fw object v3: Bind is only needed during xfer (Chris) Remove attempts to bind outside of xfer (Chris) Mark fw bind/unbind static Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-4-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 9 +++- drivers/gpu/drm/i915/intel_guc_fw.c | 18 ++++--- drivers/gpu/drm/i915/intel_huc.c | 74 +++++++++++++++++++++------ drivers/gpu/drm/i915/intel_huc.h | 4 ++ drivers/gpu/drm/i915/intel_huc_fw.c | 47 +++++++++++++----- drivers/gpu/drm/i915/intel_uc.c | 23 +++++++-- drivers/gpu/drm/i915/intel_uc_fw.c | 99 ++++++++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_uc_fw.h | 7 +-- 8 files changed, 208 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index a10a68e0ffce..c4ac29309fcc 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -189,9 +189,13 @@ int intel_guc_init(struct intel_guc *guc) struct drm_i915_private *dev_priv = guc_to_i915(guc); int ret; - ret = guc_shared_data_create(guc); + ret = intel_uc_fw_init(&guc->fw); if (ret) goto err_fetch; + + ret = guc_shared_data_create(guc); + if (ret) + goto err_fw; GEM_BUG_ON(!guc->shared_data); ret = intel_guc_log_create(&guc->log); @@ -220,6 +224,8 @@ err_log: intel_guc_log_destroy(&guc->log); err_shared: guc_shared_data_destroy(guc); +err_fw: + intel_uc_fw_fini(&guc->fw); err_fetch: intel_uc_fw_cleanup_fetch(&guc->fw); return ret; @@ -237,6 +243,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); guc_shared_data_destroy(guc); + intel_uc_fw_fini(&guc->fw); intel_uc_fw_cleanup_fetch(&guc->fw); } diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 4385d9ef02bb..8b2dcc70b956 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -122,14 +122,16 @@ static void guc_prepare_xfer(struct intel_guc *guc) } /* Copy RSA signature from the fw image to HW for verification */ -static void guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma) +static void guc_xfer_rsa(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_uc_fw *fw = &guc->fw; + struct sg_table *pages = fw->obj->mm.pages; u32 rsa[UOS_RSA_SCRATCH_COUNT]; int i; - sg_pcopy_to_buffer(vma->pages->sgl, vma->pages->nents, - rsa, sizeof(rsa), guc->fw.rsa_offset); + sg_pcopy_to_buffer(pages->sgl, pages->nents, + rsa, sizeof(rsa), fw->rsa_offset); for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); @@ -201,7 +203,7 @@ static int guc_wait_ucode(struct intel_guc *guc) * transfer between GTT locations. This functionality is left out of the API * for now as there is no need for it. */ -static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) +static int guc_xfer_ucode(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_uc_fw *guc_fw = &guc->fw; @@ -214,7 +216,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ - offset = intel_guc_ggtt_offset(guc, vma) + guc_fw->header_offset; + offset = intel_uc_fw_ggtt_offset(guc_fw) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); @@ -233,7 +235,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) /* * Load the GuC firmware blob into the MinuteIA. */ -static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) +static int guc_fw_xfer(struct intel_uc_fw *guc_fw) { struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -250,9 +252,9 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) * by the DMA engine in one operation, whereas the RSA signature is * loaded via MMIO. */ - guc_xfer_rsa(guc, vma); + guc_xfer_rsa(guc); - ret = guc_xfer_ucode(guc, vma); + ret = guc_xfer_ucode(guc); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 94c04f16a2ad..1ff1fb015e58 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -40,6 +40,61 @@ int intel_huc_init_misc(struct intel_huc *huc) return 0; } +static int intel_huc_rsa_data_create(struct intel_huc *huc) +{ + struct drm_i915_private *i915 = huc_to_i915(huc); + struct intel_guc *guc = &i915->guc; + struct i915_vma *vma; + void *vaddr; + + /* + * HuC firmware will sit above GUC_GGTT_TOP and will not map + * through GTT. Unfortunately, this means GuC cannot perform + * the HuC auth. as the rsa offset now falls within the GuC + * inaccessible range. We resort to perma-pinning an additional + * vma within the accessible range that only contains the rsa + * signature. The GuC can use this extra pinning to perform + * the authentication since its GGTT offset will be GuC + * accessible. + */ + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + huc->rsa_data = vma; + huc->rsa_data_vaddr = vaddr; + + return 0; +} + +static void intel_huc_rsa_data_destroy(struct intel_huc *huc) +{ + i915_vma_unpin_and_release(&huc->rsa_data, I915_VMA_RELEASE_MAP); +} + +int intel_huc_init(struct intel_huc *huc) +{ + int err; + + err = intel_huc_rsa_data_create(huc); + if (err) + return err; + + return intel_uc_fw_init(&huc->fw); +} + +void intel_huc_fini(struct intel_huc *huc) +{ + intel_uc_fw_fini(&huc->fw); + intel_huc_rsa_data_destroy(huc); +} + /** * intel_huc_auth() - Authenticate HuC uCode * @huc: intel_huc structure @@ -55,27 +110,17 @@ int intel_huc_auth(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_i915(huc); struct intel_guc *guc = &i915->guc; - struct i915_vma *vma; u32 status; int ret; if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) return -ENOEXEC; - vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0, - PIN_OFFSET_BIAS | i915->ggtt.pin_bias); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); - goto fail; - } - ret = intel_guc_auth_huc(guc, - intel_guc_ggtt_offset(guc, vma) + - huc->fw.rsa_offset); + intel_guc_ggtt_offset(guc, huc->rsa_data)); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); - goto fail_unpin; + goto fail; } /* Check authentication status, it should be done by now */ @@ -86,14 +131,11 @@ int intel_huc_auth(struct intel_huc *huc) 2, 50, &status); if (ret) { DRM_ERROR("HuC: Firmware not verified %#x\n", status); - goto fail_unpin; + goto fail; } - i915_vma_unpin(vma); return 0; -fail_unpin: - i915_vma_unpin(vma); fail: huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL; diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index ce129e301961..a0c21ae02a99 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -33,10 +33,14 @@ struct intel_huc { struct intel_uc_fw fw; /* HuC-specific additions */ + struct i915_vma *rsa_data; + void *rsa_data_vaddr; }; void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init_misc(struct intel_huc *huc); +int intel_huc_init(struct intel_huc *huc); +void intel_huc_fini(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); int intel_huc_check_status(struct intel_huc *huc); diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c index 80a176d91edc..44c559526072 100644 --- a/drivers/gpu/drm/i915/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -93,18 +93,24 @@ void intel_huc_fw_init_early(struct intel_huc *huc) huc_fw_select(huc_fw); } -/** - * huc_fw_xfer() - DMA's the firmware - * @huc_fw: the firmware descriptor - * @vma: the firmware image (bound into the GGTT) - * - * Transfer the firmware image to RAM for execution by the microcontroller. - * - * Return: 0 on success, non-zero on failure - */ -static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) +static void huc_xfer_rsa(struct intel_huc *huc) { - struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct intel_uc_fw *fw = &huc->fw; + struct sg_table *pages = fw->obj->mm.pages; + + /* + * HuC firmware image is outside GuC accessible range. + * Copy the RSA signature out of the image into + * the perma-pinned region set aside for it + */ + sg_pcopy_to_buffer(pages->sgl, pages->nents, + huc->rsa_data_vaddr, fw->rsa_size, + fw->rsa_offset); +} + +static int huc_xfer_ucode(struct intel_huc *huc) +{ + struct intel_uc_fw *huc_fw = &huc->fw; struct drm_i915_private *dev_priv = huc_to_i915(huc); struct intel_uncore *uncore = &dev_priv->uncore; unsigned long offset = 0; @@ -116,7 +122,7 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); /* Set the source address for the uCode */ - offset = intel_guc_ggtt_offset(&dev_priv->guc, vma) + + offset = intel_uc_fw_ggtt_offset(huc_fw) + huc_fw->header_offset; intel_uncore_write(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset)); @@ -150,6 +156,23 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) return ret; } +/** + * huc_fw_xfer() - DMA's the firmware + * @huc_fw: the firmware descriptor + * + * Transfer the firmware image to RAM for execution by the microcontroller. + * + * Return: 0 on success, non-zero on failure + */ +static int huc_fw_xfer(struct intel_uc_fw *huc_fw) +{ + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + + huc_xfer_rsa(huc); + + return huc_xfer_ucode(huc); +} + /** * intel_huc_fw_upload() - load HuC uCode to device * @huc: intel_huc structure diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 25b80ffe71ad..488dffba04d2 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -280,6 +280,7 @@ void intel_uc_fini_misc(struct drm_i915_private *i915) int intel_uc_init(struct drm_i915_private *i915) { struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; int ret; if (!USES_GUC(i915)) @@ -292,19 +293,30 @@ int intel_uc_init(struct drm_i915_private *i915) if (ret) return ret; + if (USES_HUC(i915)) { + ret = intel_huc_init(huc); + if (ret) + goto err_guc; + } + if (USES_GUC_SUBMISSION(i915)) { /* * This is stuff we need to have available at fw load time * if we are planning to enable submission later */ ret = intel_guc_submission_init(guc); - if (ret) { - intel_guc_fini(guc); - return ret; - } + if (ret) + goto err_huc; } return 0; + +err_huc: + if (USES_HUC(i915)) + intel_huc_fini(huc); +err_guc: + intel_guc_fini(guc); + return ret; } void intel_uc_fini(struct drm_i915_private *i915) @@ -319,6 +331,9 @@ void intel_uc_fini(struct drm_i915_private *i915) if (USES_GUC_SUBMISSION(i915)) intel_guc_submission_fini(guc); + if (USES_HUC(i915)) + intel_huc_fini(&i915->huc); + intel_guc_fini(guc); } diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index e3e74207a102..b9cb6fea9332 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -191,6 +191,35 @@ fail: release_firmware(fw); /* OK even if fw is NULL */ } +static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; + struct i915_vma dummy = { + .node.start = intel_uc_fw_ggtt_offset(uc_fw), + .node.size = obj->base.size, + .pages = obj->mm.pages, + .vm = &ggtt->vm, + }; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size); + + /* uc_fw->obj cache domains were not controlled across suspend */ + drm_clflush_sg(dummy.pages); + + ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); +} + +static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; + u64 start = intel_uc_fw_ggtt_offset(uc_fw); + + ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size); +} + /** * intel_uc_fw_upload - load uC firmware using custom loader * @uc_fw: uC firmware @@ -201,11 +230,8 @@ fail: * Return: 0 on success, non-zero on failure. */ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, - int (*xfer)(struct intel_uc_fw *uc_fw, - struct i915_vma *vma)) + int (*xfer)(struct intel_uc_fw *uc_fw)) { - struct i915_vma *vma; - u32 ggtt_pin_bias; int err; DRM_DEBUG_DRIVER("%s fw load %s\n", @@ -219,36 +245,15 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, intel_uc_fw_type_repr(uc_fw->type), intel_uc_fw_status_repr(uc_fw->load_status)); - /* Pin object with firmware */ - err = i915_gem_object_set_to_gtt_domain(uc_fw->obj, false); - if (err) { - DRM_DEBUG_DRIVER("%s fw set-domain err=%d\n", - intel_uc_fw_type_repr(uc_fw->type), err); - goto fail; - } - - ggtt_pin_bias = to_i915(uc_fw->obj->base.dev)->ggtt.pin_bias; - vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | ggtt_pin_bias); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n", - intel_uc_fw_type_repr(uc_fw->type), err); - goto fail; - } + intel_uc_fw_ggtt_bind(uc_fw); /* Call custom loader */ - err = xfer(uc_fw, vma); - - /* - * We keep the object pages for reuse during resume. But we can unpin it - * now that DMA has completed, so it doesn't continue to take up space. - */ - i915_vma_unpin(vma); - + err = xfer(uc_fw); if (err) goto fail; + intel_uc_fw_ggtt_unbind(uc_fw); + uc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; DRM_DEBUG_DRIVER("%s fw load %s\n", intel_uc_fw_type_repr(uc_fw->type), @@ -273,6 +278,42 @@ fail: return err; } +int intel_uc_fw_init(struct intel_uc_fw *uc_fw) +{ + int err; + + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return -ENOEXEC; + + err = i915_gem_object_pin_pages(uc_fw->obj); + if (err) + DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + + return err; +} + +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return; + + i915_gem_object_unpin_pages(uc_fw->obj); +} + +u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_private *i915 = to_i915(uc_fw->obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node *node = &ggtt->uc_fw; + + GEM_BUG_ON(!node->allocated); + GEM_BUG_ON(upper_32_bits(node->start)); + GEM_BUG_ON(upper_32_bits(node->start + node->size - 1)); + + return lower_32_bits(node->start); +} + /** * intel_uc_fw_cleanup_fetch - cleanup uC firmware * diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index e6fa8599757c..ff98f8661d72 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -27,7 +27,6 @@ struct drm_printer; struct drm_i915_private; -struct i915_vma; /* Home of GuC, HuC and DMC firmwares */ #define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" @@ -147,8 +146,10 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, - int (*xfer)(struct intel_uc_fw *uc_fw, - struct i915_vma *vma)); + int (*xfer)(struct intel_uc_fw *uc_fw)); +int intel_uc_fw_init(struct intel_uc_fw *uc_fw); +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); +u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw); void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); #endif -- cgit From 40d211ef62de3efdfb0a78894fc0bc3b24061b40 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:14 -0700 Subject: Revert "drm/i915/guc: Disable global reset" We have now prepared the guc reset paths to avoid taking struct_mutex, or any other lock, and so it is now safe to re-enable. References: fe62365f9f80 ("drm/i915/guc: Disable global reset") Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-5-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/i915_reset.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 677d59304e78..1092d16c289c 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -641,9 +641,6 @@ int intel_gpu_reset(struct drm_i915_private *i915, bool intel_has_gpu_reset(struct drm_i915_private *i915) { - if (USES_GUC(i915)) - return false; - if (!i915_modparams.reset) return NULL; -- cgit From f3c2b76ef25e73e2065614108fe33bf2d790cac3 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:15 -0700 Subject: drm/i915/selftests: Check that gpu reset is usable from atomic context GPU reset is now available with GuC enabled, so re-enable our check that this reset is usable from atomic context. Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-6-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 050bd1e19e02..2fd33aad8683 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -1814,9 +1814,6 @@ static int igt_atomic_reset(void *arg) /* Check that the resets are usable from atomic context */ - if (USES_GUC_SUBMISSION(i915)) - return 0; /* guc is dead; long live the guc */ - igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); @@ -1846,6 +1843,9 @@ static int igt_atomic_reset(void *arg) force_reset(i915); } + if (USES_GUC_SUBMISSION(i915)) + goto unlock; + if (intel_has_reset_engine(i915)) { struct intel_engine_cs *engine; enum intel_engine_id id; -- cgit From 2d6692e642e7ca02883524350038e2a431ef44e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 20 Apr 2019 12:55:39 +0100 Subject: drm/i915: Start writeback from the shrinker When we are called to relieve mempressue via the shrinker, the only way we can make progress is either by discarding unwanted pages (those objects that userspace has marked MADV_DONTNEED) or by reclaiming the dirty objects via swap. As we know that is the only way to make further progress, we can initiate the writeback as we invalidate the objects. This means the objects we put onto the inactive anon lru list are already marked for reclaim+writeback and so will trigger a wait upon the writeback inside direct reclaim, greatly improving the success rate of direct reclaim on i915 objects. The corollary is that we may start a slow swap on opportunistic mempressure from the likes of the compaction + migration kthreads. This is limited by those threads only being allowed to shrink idle pages, but also that if we reactivate the page before it is swapped out by gpu activity, we only page the cost of repinning the page. The cost is most felt when an object is reused after mempressure, which hopefully excludes the latency sensitive tasks (as we are just extending the impact of swap thrashing to them). Apparently this is not the first time we've had this idea. Back in commit 5537252b6b6d ("drm/i915: Invalidate our pages under memory pressure") we wanted to start writeback but settled on invalidate after Hugh Dickins warned us about a possibility of a deadlock within shmemfs if we started writeback from shrink_slab. Looking at the callchain, using writeback from i915_gem_shrink should be equivalent to the pageout also employed by shrink_slab, i.e. it should not be any riskier afaict. v2: Leave mmapings intact. At this point, the only mmapings of our objects will be via CPU mmaps on the shmemfs filp, which are out-of-scope for our LRU tracking. Instead leave those pages to the inactive anon LRU page list for aging and pageout as normal. v3: Be selective on which paths trigger writeback, in particular excluding paths shrinking just to reclaim vm space (e.g. mmap, vmap reapers) and avoid starting writeback on the entire process space from within the pm freezer. References: https://bugs.freedesktop.org/show_bug.cgi?id=108686 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Matthew Auld Cc: Daniel Vetter Cc: Michal Hocko Reviewed-by: Joonas Lahtinen #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190420115539.29081-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 13 +++--- drivers/gpu/drm/i915/i915_gem.c | 27 +----------- drivers/gpu/drm/i915/i915_gem_shrinker.c | 75 +++++++++++++++++++++++++++++--- 3 files changed, 79 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 71612e7fc8bc..dc74d33c20aa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3007,7 +3007,7 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass); -void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj); +void __i915_gem_object_truncate(struct drm_i915_gem_object *obj); enum i915_map_type { I915_MAP_WB = 0, @@ -3268,11 +3268,12 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags); -#define I915_SHRINK_PURGEABLE 0x1 -#define I915_SHRINK_UNBOUND 0x2 -#define I915_SHRINK_BOUND 0x4 -#define I915_SHRINK_ACTIVE 0x8 -#define I915_SHRINK_VMAPS 0x10 +#define I915_SHRINK_PURGEABLE BIT(0) +#define I915_SHRINK_UNBOUND BIT(1) +#define I915_SHRINK_BOUND BIT(2) +#define I915_SHRINK_ACTIVE BIT(3) +#define I915_SHRINK_VMAPS BIT(4) +#define I915_SHRINK_WRITEBACK BIT(5) unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); void i915_gem_shrinker_register(struct drm_i915_private *i915); void i915_gem_shrinker_unregister(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5462639de0b..a2bf94c3cfca 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2143,8 +2143,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, } /* Immediately discard the backing storage */ -static void -i915_gem_object_truncate(struct drm_i915_gem_object *obj) +void __i915_gem_object_truncate(struct drm_i915_gem_object *obj) { i915_gem_object_free_mmap_offset(obj); @@ -2161,28 +2160,6 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) obj->mm.pages = ERR_PTR(-EFAULT); } -/* Try to discard unwanted pages */ -void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) -{ - struct address_space *mapping; - - lockdep_assert_held(&obj->mm.lock); - GEM_BUG_ON(i915_gem_object_has_pages(obj)); - - switch (obj->mm.madv) { - case I915_MADV_DONTNEED: - i915_gem_object_truncate(obj); - case __I915_MADV_PURGED: - return; - } - - if (obj->base.filp == NULL) - return; - - mapping = obj->base.filp->f_mapping, - invalidate_mapping_pages(mapping, 0, (loff_t)-1); -} - /* * Move pages to appropriate lru and release the pagevec, decrementing the * ref count of those pages. @@ -4023,7 +4000,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, /* if the object is no longer attached, discard its backing storage */ if (obj->mm.madv == I915_MADV_DONTNEED && !i915_gem_object_has_pages(obj)) - i915_gem_object_truncate(obj); + __i915_gem_object_truncate(obj); args->retained = obj->mm.madv != __I915_MADV_PURGED; mutex_unlock(&obj->mm.lock); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6da795c7e62e..588e3898b120 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -114,6 +114,67 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) return !i915_gem_object_has_pages(obj); } +static void __start_writeback(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct address_space *mapping; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = SWAP_CLUSTER_MAX, + .range_start = 0, + .range_end = LLONG_MAX, + .for_reclaim = 1, + }; + unsigned long i; + + lockdep_assert_held(&obj->mm.lock); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); + + switch (obj->mm.madv) { + case I915_MADV_DONTNEED: + __i915_gem_object_truncate(obj); + case __I915_MADV_PURGED: + return; + } + + if (!obj->base.filp) + return; + + if (!(flags & I915_SHRINK_WRITEBACK)) + return; + + /* + * Leave mmapings intact (GTT will have been revoked on unbinding, + * leaving only CPU mmapings around) and add those pages to the LRU + * instead of invoking writeback so they are aged and paged out + * as normal. + */ + mapping = obj->base.filp->f_mapping; + + /* Begin writeback on each dirty page */ + for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { + struct page *page; + + page = find_lock_entry(mapping, i); + if (!page || xa_is_value(page)) + continue; + + if (!page_mapped(page) && clear_page_dirty_for_io(page)) { + int ret; + + SetPageReclaim(page); + ret = mapping->a_ops->writepage(page, &wbc); + if (!PageWriteback(page)) + ClearPageReclaim(page); + if (!ret) + goto put; + } + unlock_page(page); +put: + put_page(page); + } +} + /** * i915_gem_shrink - Shrink buffer object caches * @i915: i915 device @@ -254,7 +315,7 @@ i915_gem_shrink(struct drm_i915_private *i915, mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER); if (!i915_gem_object_has_pages(obj)) { - __i915_gem_object_invalidate(obj); + __start_writeback(obj, flags); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); @@ -366,13 +427,15 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); + I915_SHRINK_PURGEABLE | + I915_SHRINK_WRITEBACK); if (sc->nr_scanned < sc->nr_to_scan) freed += i915_gem_shrink(i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -382,7 +445,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) &sc->nr_scanned, I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); } } @@ -404,7 +468,8 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) with_intel_runtime_pm(i915, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not -- cgit From 9c11b12184bb01d8ba2c48e655509b184f02c769 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 19 Apr 2019 10:10:26 +0300 Subject: drm/i915/icl: Fix MG_DP_MODE() register programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the order of lane, port parameters passed to the register macro. Note that this was already partly fixed by commit 37fc7845df7b6 ("drm/i915: Call MG_DP_MODE() macro with the right parameters order") While at it simplify things by using the macro directly instead of an unnecessary redirection via an array. v2: - Add a note the commit message about simplifying things. (José) Fixes: 58106b7d816e1 ("drm/i915: Make MG PHY macros semantically consistent") Cc: José Roberto de Souza Cc: Lucas De Marchi Cc: Aditya Swarup Signed-off-by: Imre Deak Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190419071026.32370-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 24f9106efcc6..f181c26f62fd 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2905,21 +2905,20 @@ static void icl_enable_phy_clock_gating(struct intel_digital_port *dig_port) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum port port = dig_port->base.port; enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - i915_reg_t mg_regs[2] = { MG_DP_MODE(0, port), MG_DP_MODE(1, port) }; u32 val; - int i; + int ln; if (tc_port == PORT_TC_NONE) return; - for (i = 0; i < ARRAY_SIZE(mg_regs); i++) { - val = I915_READ(mg_regs[i]); + for (ln = 0; ln < 2; ln++) { + val = I915_READ(MG_DP_MODE(ln, port)); val |= MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING | MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING | MG_DP_MODE_CFG_GAONPWR_GATING; - I915_WRITE(mg_regs[i], val); + I915_WRITE(MG_DP_MODE(ln, port), val); } val = I915_READ(MG_MISC_SUS0(tc_port)); @@ -2938,21 +2937,20 @@ static void icl_disable_phy_clock_gating(struct intel_digital_port *dig_port) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum port port = dig_port->base.port; enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - i915_reg_t mg_regs[2] = { MG_DP_MODE(port, 0), MG_DP_MODE(port, 1) }; u32 val; - int i; + int ln; if (tc_port == PORT_TC_NONE) return; - for (i = 0; i < ARRAY_SIZE(mg_regs); i++) { - val = I915_READ(mg_regs[i]); + for (ln = 0; ln < 2; ln++) { + val = I915_READ(MG_DP_MODE(ln, port)); val &= ~(MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING | MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING | MG_DP_MODE_CFG_GAONPWR_GATING); - I915_WRITE(mg_regs[i], val); + I915_WRITE(MG_DP_MODE(ln, port), val); } val = I915_READ(MG_MISC_SUS0(tc_port)); -- cgit From 372b9ffb5799a3d5f4a9c51ac35cf65ed8f40101 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 18 Apr 2019 22:59:07 +0300 Subject: drm/i915: Fix skl+ max plane width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spec has changed since skl_max_plane_width() was written. Now the SKL limits are lower than what they were initially, and GLK and ICL have different limits. Update the code to match the spec. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190418195907.23912-1-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/intel_display.c | 72 +++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3bd40a4a6739..da9285228557 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2964,41 +2964,55 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: - switch (cpp) { - case 8: - return 4096; - case 4: - case 2: - case 1: - return 8192; - default: - MISSING_CASE(cpp); - break; - } - break; + return 4096; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: /* FIXME AUX plane? */ case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: - switch (cpp) { - case 8: + if (cpp == 8) return 2048; - case 4: + else return 4096; - case 2: - case 1: - return 8192; - default: - MISSING_CASE(cpp); - break; - } - break; default: MISSING_CASE(fb->modifier); + return 2048; } +} - return 2048; +static int glk_max_plane_width(const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) +{ + int cpp = fb->format->cpp[color_plane]; + + switch (fb->modifier) { + case DRM_FORMAT_MOD_LINEAR: + case I915_FORMAT_MOD_X_TILED: + if (cpp == 8) + return 4096; + else + return 5120; + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: + /* FIXME AUX plane? */ + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Yf_TILED: + if (cpp == 8) + return 2048; + else + return 5120; + default: + MISSING_CASE(fb->modifier); + return 2048; + } +} + +static int icl_max_plane_width(const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) +{ + return 5120; } static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, @@ -3041,16 +3055,24 @@ static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state static int skl_check_main_surface(struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; int x = plane_state->base.src.x1 >> 16; int y = plane_state->base.src.y1 >> 16; int w = drm_rect_width(&plane_state->base.src) >> 16; int h = drm_rect_height(&plane_state->base.src) >> 16; - int max_width = skl_max_plane_width(fb, 0, rotation); + int max_width; int max_height = 4096; u32 alignment, offset, aux_offset = plane_state->color_plane[1].offset; + if (INTEL_GEN(dev_priv) >= 11) + max_width = icl_max_plane_width(fb, 0, rotation); + else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + max_width = glk_max_plane_width(fb, 0, rotation); + else + max_width = skl_max_plane_width(fb, 0, rotation); + if (w > max_width || h > max_height) { DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n", w, h, max_width, max_height); -- cgit From 51eb1a1de7a92a812a3834986260834d5f52e566 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 17 Apr 2019 11:59:01 -0700 Subject: drm/i915/icl: Fix clockgating issue when using scalers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the clock-gating issue when pipe scaling is enabled. (Lineage #2006604312) V2: Fix typo in headline(Chris) Handle the non double buffered nature of the register(Ville) V3: Fix checkpatch warning. BAT failure for V2 on gen3 looks unrelated. V4: Split the icl and skl wa's(Ville) V5: Split the checks for icl and skl(Ville) V6: Correct the flipped checks in intel_pre_plane_update(Ville) V7: Use enum for pipe and extend the WA for plane scalers(Ville) V8: Eliminate the redundant use of pch_pfit(Ville) Cc: Chris Wilson Cc: Ville Syrjala Cc: Rodrigo Vivi Cc: Clint Taylor Cc: Aditya Swarup Signed-off-by: Radhakrishna Sripada Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190417185901.14833-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/intel_display.c | 40 +++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index da9285228557..62d663e506ab 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -476,6 +476,7 @@ static const struct intel_limit intel_limits_bxt = { .p2 = { .p2_slow = 1, .p2_fast = 20 }, }; +/* WA Display #0827: Gen9:all */ static void skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable) { @@ -489,6 +490,19 @@ skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable) ~(DUPS1_GATING_DIS | DUPS2_GATING_DIS)); } +/* Wa_2006604312:icl */ +static void +icl_wa_scalerclkgating(struct drm_i915_private *dev_priv, enum pipe pipe, + bool enable) +{ + if (enable) + I915_WRITE(CLKGATE_DIS_PSL(pipe), + I915_READ(CLKGATE_DIS_PSL(pipe)) | DPFR_GATING_DIS); + else + I915_WRITE(CLKGATE_DIS_PSL(pipe), + I915_READ(CLKGATE_DIS_PSL(pipe)) & ~DPFR_GATING_DIS); +} + static bool needs_modeset(const struct drm_crtc_state *state) { @@ -5527,6 +5541,16 @@ static bool needs_nv12_wa(struct drm_i915_private *dev_priv, return false; } +static bool needs_scalerclk_wa(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *crtc_state) +{ + /* Wa_2006604312:icl */ + if (crtc_state->scaler_state.scaler_users > 0 && IS_ICELAKE(dev_priv)) + return true; + + return false; +} + static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); @@ -5560,11 +5584,13 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) intel_post_enable_primary(&crtc->base, pipe_config); } - /* Display WA 827 */ if (needs_nv12_wa(dev_priv, old_crtc_state) && - !needs_nv12_wa(dev_priv, pipe_config)) { + !needs_nv12_wa(dev_priv, pipe_config)) skl_wa_827(dev_priv, crtc->pipe, false); - } + + if (needs_scalerclk_wa(dev_priv, old_crtc_state) && + !needs_scalerclk_wa(dev_priv, pipe_config)) + icl_wa_scalerclkgating(dev_priv, crtc->pipe, false); } static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, @@ -5601,9 +5627,13 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, /* Display WA 827 */ if (!needs_nv12_wa(dev_priv, old_crtc_state) && - needs_nv12_wa(dev_priv, pipe_config)) { + needs_nv12_wa(dev_priv, pipe_config)) skl_wa_827(dev_priv, crtc->pipe, true); - } + + /* Wa_2006604312:icl */ + if (!needs_scalerclk_wa(dev_priv, old_crtc_state) && + needs_scalerclk_wa(dev_priv, pipe_config)) + icl_wa_scalerclkgating(dev_priv, crtc->pipe, true); /* * Vblank time updates from the shadow to live plane control register -- cgit From 09407579abf55a8f472c221325dda81cca324326 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 10:51:34 +0100 Subject: drm/i915: Store the default sseu setup on the engine As we push for better compartmentalisation, it is more convenient to copy the default sseu configuration from the engine into the derived logical context, than it is to dig it out from i915->runtime_info. v2: Use intel_sseu_from_device_info() to describe the converter Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424095134.30249-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/Makefile.header-test | 1 + drivers/gpu/drm/i915/i915_drv.h | 14 --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/intel_context.c | 4 +- drivers/gpu/drm/i915/intel_context_types.h | 11 +- drivers/gpu/drm/i915/intel_device_info.h | 28 +---- drivers/gpu/drm/i915/intel_engine_cs.c | 4 + drivers/gpu/drm/i915/intel_engine_types.h | 3 + drivers/gpu/drm/i915/intel_lrc.c | 134 +------------------- drivers/gpu/drm/i915/intel_lrc.h | 2 - drivers/gpu/drm/i915/intel_sseu.c | 142 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_sseu.h | 67 ++++++++++ drivers/gpu/drm/i915/selftests/i915_gem_context.c | 5 +- 15 files changed, 226 insertions(+), 194 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_sseu.c create mode 100644 drivers/gpu/drm/i915/intel_sseu.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index fbcb0904f4a8..53ff209b91bb 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -95,6 +95,7 @@ i915-y += \ intel_lrc.o \ intel_mocs.o \ intel_ringbuffer.o \ + intel_sseu.o \ intel_uncore.o \ intel_wopcm.o diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index c1c391816fa7..5bcc78d7ac96 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -33,6 +33,7 @@ header_test := \ intel_psr.h \ intel_sdvo.h \ intel_sprite.h \ + intel_sseu.h \ intel_tv.h \ intel_workarounds_types.h diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dc74d33c20aa..e6f9a5ddac3d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3390,20 +3390,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)INTEL_INFO(dev_priv); } -static inline struct intel_sseu -intel_device_default_sseu(struct drm_i915_private *i915) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - struct intel_sseu value = { - .slice_mask = sseu->slice_mask, - .subslice_mask = sseu->subslice_mask[0], - .min_eus_per_subslice = sseu->max_eus_per_subslice, - .max_eus_per_subslice = sseu->max_eus_per_subslice, - }; - - return value; -} - /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); extern int intel_modeset_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index dd728b26b5aa..c02a30612df9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1156,7 +1156,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = gen8_make_rpcs(rq->i915, &sseu); + *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); intel_ring_advance(rq, cs); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 39a4804091d7..56da457bed21 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1679,7 +1679,7 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - gen8_make_rpcs(i915, &ce->sseu)); + intel_sseu_make_rpcs(i915, &ce->sseu)); } /* diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c index 8931e0fee873..961d1445833d 100644 --- a/drivers/gpu/drm/i915/intel_context.c +++ b/drivers/gpu/drm/i915/intel_context.c @@ -230,15 +230,13 @@ intel_context_init(struct intel_context *ce, ce->gem_context = ctx; ce->engine = engine; ce->ops = engine->cops; + ce->sseu = engine->sseu; INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signals); mutex_init(&ce->pin_mutex); - /* Use the whole device by default */ - ce->sseu = intel_device_default_sseu(ctx->i915); - i915_active_request_init(&ce->active_tracker, NULL, intel_context_retire); } diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h index 68b4ca1611e0..9ec4f787c908 100644 --- a/drivers/gpu/drm/i915/intel_context_types.h +++ b/drivers/gpu/drm/i915/intel_context_types.h @@ -14,6 +14,7 @@ #include #include "i915_active_types.h" +#include "intel_sseu.h" struct i915_gem_context; struct i915_vma; @@ -28,16 +29,6 @@ struct intel_context_ops { void (*destroy)(struct kref *kref); }; -/* - * Powergating configuration for a particular (context,engine). - */ -struct intel_sseu { - u8 slice_mask; - u8 subslice_mask; - u8 min_eus_per_subslice; - u8 max_eus_per_subslice; -}; - struct intel_context { struct kref ref; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 7a2f14eff699..1598c7079ffd 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -29,6 +29,7 @@ #include "intel_engine_types.h" #include "intel_display.h" +#include "intel_sseu.h" struct drm_printer; struct drm_i915_private; @@ -140,33 +141,6 @@ enum intel_ppgtt_type { func(overlay_needs_physical); \ func(supports_tv); -#define GEN_MAX_SLICES (6) /* CNL upper bound */ -#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ - -struct sseu_dev_info { - u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES]; - u16 eu_total; - u8 eu_per_subslice; - u8 min_eu_in_pool; - /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ - u8 subslice_7eu[3]; - u8 has_slice_pg:1; - u8 has_subslice_pg:1; - u8 has_eu_pg:1; - - /* Topology fields */ - u8 max_slices; - u8 max_subslices; - u8 max_eus_per_subslice; - - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; -}; - struct intel_device_info { u16 gen_mask; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index eea9bec04f1b..202b4b7a24f1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -588,6 +588,10 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); + /* Use the whole device by default */ + engine->sseu = + intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); + return 0; err_hwsp: diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h index 1f970c76b6a6..d07a01b3ed0b 100644 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ b/drivers/gpu/drm/i915/intel_engine_types.h @@ -17,6 +17,7 @@ #include "i915_priolist_types.h" #include "i915_selftest.h" #include "i915_timeline_types.h" +#include "intel_sseu.h" #include "intel_workarounds_types.h" #include "i915_gem_batch_pool.h" @@ -278,6 +279,8 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; + struct intel_sseu sseu; + struct intel_ring *buffer; struct i915_timeline timeline; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4e0a351bfbca..18a9dc6ca877 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1232,7 +1232,7 @@ __execlists_update_reg_state(struct intel_context *ce, /* RPCS */ if (engine->class == RENDER_CLASS) regs[CTX_R_PWR_CLK_STATE + 1] = - gen8_make_rpcs(engine->i915, &ce->sseu); + intel_sseu_make_rpcs(engine->i915, &ce->sseu); } static int @@ -2551,138 +2551,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine) return logical_ring_init(engine); } -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - bool subslice_pg = sseu->has_subslice_pg; - struct intel_sseu ctx_sseu; - u8 slices, subslices; - u32 rpcs = 0; - - /* - * No explicit RPCS request is needed to ensure full - * slice/subslice/EU enablement prior to Gen9. - */ - if (INTEL_GEN(i915) < 9) - return 0; - - /* - * If i915/perf is active, we want a stable powergating configuration - * on the system. - * - * We could choose full enablement, but on ICL we know there are use - * cases which disable slices for functional, apart for performance - * reasons. So in this case we select a known stable subset. - */ - if (!i915->perf.oa.exclusive_stream) { - ctx_sseu = *req_sseu; - } else { - ctx_sseu = intel_device_default_sseu(i915); - - if (IS_GEN(i915, 11)) { - /* - * We only need subslice count so it doesn't matter - * which ones we select - just turn off low bits in the - * amount of half of all available subslices per slice. - */ - ctx_sseu.subslice_mask = - ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); - ctx_sseu.slice_mask = 0x1; - } - } - - slices = hweight8(ctx_sseu.slice_mask); - subslices = hweight8(ctx_sseu.subslice_mask); - - /* - * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits - * wide and Icelake has up to eight subslices, specfial programming is - * needed in order to correctly enable all subslices. - * - * According to documentation software must consider the configuration - * as 2x4x8 and hardware will translate this to 1x8x8. - * - * Furthemore, even though SScount is three bits, maximum documented - * value for it is four. From this some rules/restrictions follow: - * - * 1. - * If enabled subslice count is greater than four, two whole slices must - * be enabled instead. - * - * 2. - * When more than one slice is enabled, hardware ignores the subslice - * count altogether. - * - * From these restrictions it follows that it is not possible to enable - * a count of subslices between the SScount maximum of four restriction, - * and the maximum available number on a particular SKU. Either all - * subslices are enabled, or a count between one and four on the first - * slice. - */ - if (IS_GEN(i915, 11) && - slices == 1 && - subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { - GEM_BUG_ON(subslices & 1); - - subslice_pg = false; - slices *= 2; - } - - /* - * Starting in Gen9, render power gating can leave - * slice/subslice/EU in a partially enabled state. We - * must make an explicit request through RPCS for full - * enablement. - */ - if (sseu->has_slice_pg) { - u32 mask, val = slices; - - if (INTEL_GEN(i915) >= 11) { - mask = GEN11_RPCS_S_CNT_MASK; - val <<= GEN11_RPCS_S_CNT_SHIFT; - } else { - mask = GEN8_RPCS_S_CNT_MASK; - val <<= GEN8_RPCS_S_CNT_SHIFT; - } - - GEM_BUG_ON(val & ~mask); - val &= mask; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; - } - - if (subslice_pg) { - u32 val = subslices; - - val <<= GEN8_RPCS_SS_CNT_SHIFT; - - GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); - val &= GEN8_RPCS_SS_CNT_MASK; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; - } - - if (sseu->has_eu_pg) { - u32 val; - - val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); - val &= GEN8_RPCS_EU_MIN_MASK; - - rpcs |= val; - - val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); - val &= GEN8_RPCS_EU_MAX_MASK; - - rpcs |= val; - - rpcs |= GEN8_RPCS_ENABLE; - } - - return rpcs; -} - static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) { u32 indirect_ctx_offset; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 84aa230ea27b..99f75ee9d087 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -115,6 +115,4 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, const char *prefix), unsigned int max); -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu); - #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/intel_sseu.c new file mode 100644 index 000000000000..7f448f3bea0b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_sseu.c @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_lrc_reg.h" +#include "intel_sseu.h" + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + bool subslice_pg = sseu->has_subslice_pg; + struct intel_sseu ctx_sseu; + u8 slices, subslices; + u32 rpcs = 0; + + /* + * No explicit RPCS request is needed to ensure full + * slice/subslice/EU enablement prior to Gen9. + */ + if (INTEL_GEN(i915) < 9) + return 0; + + /* + * If i915/perf is active, we want a stable powergating configuration + * on the system. + * + * We could choose full enablement, but on ICL we know there are use + * cases which disable slices for functional, apart for performance + * reasons. So in this case we select a known stable subset. + */ + if (!i915->perf.oa.exclusive_stream) { + ctx_sseu = *req_sseu; + } else { + ctx_sseu = intel_sseu_from_device_info(sseu); + + if (IS_GEN(i915, 11)) { + /* + * We only need subslice count so it doesn't matter + * which ones we select - just turn off low bits in the + * amount of half of all available subslices per slice. + */ + ctx_sseu.subslice_mask = + ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); + ctx_sseu.slice_mask = 0x1; + } + } + + slices = hweight8(ctx_sseu.slice_mask); + subslices = hweight8(ctx_sseu.subslice_mask); + + /* + * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits + * wide and Icelake has up to eight subslices, specfial programming is + * needed in order to correctly enable all subslices. + * + * According to documentation software must consider the configuration + * as 2x4x8 and hardware will translate this to 1x8x8. + * + * Furthemore, even though SScount is three bits, maximum documented + * value for it is four. From this some rules/restrictions follow: + * + * 1. + * If enabled subslice count is greater than four, two whole slices must + * be enabled instead. + * + * 2. + * When more than one slice is enabled, hardware ignores the subslice + * count altogether. + * + * From these restrictions it follows that it is not possible to enable + * a count of subslices between the SScount maximum of four restriction, + * and the maximum available number on a particular SKU. Either all + * subslices are enabled, or a count between one and four on the first + * slice. + */ + if (IS_GEN(i915, 11) && + slices == 1 && + subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { + GEM_BUG_ON(subslices & 1); + + subslice_pg = false; + slices *= 2; + } + + /* + * Starting in Gen9, render power gating can leave + * slice/subslice/EU in a partially enabled state. We + * must make an explicit request through RPCS for full + * enablement. + */ + if (sseu->has_slice_pg) { + u32 mask, val = slices; + + if (INTEL_GEN(i915) >= 11) { + mask = GEN11_RPCS_S_CNT_MASK; + val <<= GEN11_RPCS_S_CNT_SHIFT; + } else { + mask = GEN8_RPCS_S_CNT_MASK; + val <<= GEN8_RPCS_S_CNT_SHIFT; + } + + GEM_BUG_ON(val & ~mask); + val &= mask; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; + } + + if (subslice_pg) { + u32 val = subslices; + + val <<= GEN8_RPCS_SS_CNT_SHIFT; + + GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); + val &= GEN8_RPCS_SS_CNT_MASK; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; + } + + if (sseu->has_eu_pg) { + u32 val; + + val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); + val &= GEN8_RPCS_EU_MIN_MASK; + + rpcs |= val; + + val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); + val &= GEN8_RPCS_EU_MAX_MASK; + + rpcs |= val; + + rpcs |= GEN8_RPCS_ENABLE; + } + + return rpcs; +} diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/intel_sseu.h new file mode 100644 index 000000000000..73bc824094e8 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_sseu.h @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_SSEU_H__ +#define __INTEL_SSEU_H__ + +#include + +struct drm_i915_private; + +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ + +struct sseu_dev_info { + u8 slice_mask; + u8 subslice_mask[GEN_MAX_SLICES]; + u16 eu_total; + u8 eu_per_subslice; + u8 min_eu_in_pool; + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ + u8 subslice_7eu[3]; + u8 has_slice_pg:1; + u8 has_subslice_pg:1; + u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; +}; + +/* + * Powergating configuration for a particular (context,engine). + */ +struct intel_sseu { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; +}; + +static inline struct intel_sseu +intel_sseu_from_device_info(const struct sseu_dev_info *sseu) +{ + struct intel_sseu value = { + .slice_mask = sseu->slice_mask, + .subslice_mask = sseu->subslice_mask[0], + .min_eus_per_subslice = sseu->max_eus_per_subslice, + .max_eus_per_subslice = sseu->max_eus_per_subslice, + }; + + return value; +} + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu); + +#endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 4e1b6efc6b22..e1cb22f03e8e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -962,8 +962,7 @@ __sseu_finish(struct drm_i915_private *i915, unsigned int expected, struct igt_spinner *spin) { - unsigned int slices = - hweight32(intel_device_default_sseu(i915).slice_mask); + unsigned int slices = hweight32(engine->sseu.slice_mask); u32 rpcs = 0; int ret = 0; @@ -1047,8 +1046,8 @@ __igt_ctx_sseu(struct drm_i915_private *i915, const char *name, unsigned int flags) { - struct intel_sseu default_sseu = intel_device_default_sseu(i915); struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_sseu default_sseu = engine->sseu; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; struct intel_sseu pg_sseu; -- cgit From 86554f48e511faa58f729cc077b1733179882804 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 12:09:41 +0100 Subject: drm/i915/selftests: Verify whitelist of context registers The RING_NONPRIV allows us to add registers to a whitelist that allows userspace to modify them. Ideally such registers should be safe and saved within the context such that they do not impact system behaviour for other users. This selftest verifies that those registers we do add are (a) then writable by userspace and (b) only affect a single client. Opens: - Is GEN9_SLICE_COMMON_ECO_CHICKEN1 really write-only? v2: Remove the blatant copy-paste. v3: Emulate userspace register writes via the batch again. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424110941.9869-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_workarounds.c | 312 +++++++++++++++++++++ 1 file changed, 312 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index a363748a7a4f..aa841e4d3031 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -700,6 +700,317 @@ out: return err; } +static int read_whitelisted_registers(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct i915_vma *results) +{ + intel_wakeref_t wakeref; + struct i915_request *rq; + int i, err = 0; + u32 srm, *cs; + + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + srm = MI_STORE_REGISTER_MEM; + if (INTEL_GEN(ctx->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * engine->whitelist.count); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_req; + } + + for (i = 0; i < engine->whitelist.count; i++) { + u64 offset = results->node.start + sizeof(u32) * i; + + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + } + intel_ring_advance(rq, cs); + +err_req: + i915_request_add(rq); + + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + err = -EIO; + + return err; +} + +static int scrub_whitelisted_registers(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_wakeref_t wakeref; + struct i915_request *rq; + struct i915_vma *batch; + int i, err = 0; + u32 *cs; + + batch = create_batch(ctx); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_batch; + } + + *cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count); + for (i = 0; i < engine->whitelist.count; i++) { + *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + *cs++ = 0xffffffff; + } + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(batch->obj); + i915_gem_chipset_flush(ctx->i915); + + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + if (engine->emit_init_breadcrumb) { /* Be nice if we hang */ + err = engine->emit_init_breadcrumb(rq); + if (err) + goto err_request; + } + + /* Perform the writes from an unprivileged "user" batch */ + err = engine->emit_bb_start(rq, batch->node.start, 0, 0); + +err_request: + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + err = -EIO; + +err_unpin: + i915_gem_object_unpin_map(batch->obj); +err_batch: + i915_vma_unpin_and_release(&batch, 0); + return err; +} + +struct regmask { + i915_reg_t reg; + unsigned long gen_mask; +}; + +static bool find_reg(struct drm_i915_private *i915, + i915_reg_t reg, + const struct regmask *tbl, + unsigned long count) +{ + u32 offset = i915_mmio_reg_offset(reg); + + while (count--) { + if (INTEL_INFO(i915)->gen_mask & tbl->gen_mask && + i915_mmio_reg_offset(tbl->reg) == offset) + return true; + tbl++; + } + + return false; +} + +static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg) +{ + /* Alas, we must pardon some whitelists. Mistakes already made */ + static const struct regmask pardon[] = { + { GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) }, + { GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) }, + }; + + return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon)); +} + +static bool result_eq(struct intel_engine_cs *engine, + u32 a, u32 b, i915_reg_t reg) +{ + if (a != b && !pardon_reg(engine->i915, reg)) { + pr_err("Whitelisted register 0x%4x not context saved: A=%08x, B=%08x\n", + i915_mmio_reg_offset(reg), a, b); + return false; + } + + return true; +} + +static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg) +{ + /* Some registers do not seem to behave and our writes unreadable */ + static const struct regmask wo[] = { + { GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) }, + }; + + return find_reg(i915, reg, wo, ARRAY_SIZE(wo)); +} + +static bool result_neq(struct intel_engine_cs *engine, + u32 a, u32 b, i915_reg_t reg) +{ + if (a == b && !writeonly_reg(engine->i915, reg)) { + pr_err("Whitelist register 0x%4x:%08x was unwritable\n", + i915_mmio_reg_offset(reg), a); + return false; + } + + return true; +} + +static int +check_whitelisted_registers(struct intel_engine_cs *engine, + struct i915_vma *A, + struct i915_vma *B, + bool (*fn)(struct intel_engine_cs *engine, + u32 a, u32 b, + i915_reg_t reg)) +{ + u32 *a, *b; + int i, err; + + a = i915_gem_object_pin_map(A->obj, I915_MAP_WB); + if (IS_ERR(a)) + return PTR_ERR(a); + + b = i915_gem_object_pin_map(B->obj, I915_MAP_WB); + if (IS_ERR(b)) { + err = PTR_ERR(b); + goto err_a; + } + + err = 0; + for (i = 0; i < engine->whitelist.count; i++) { + if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) + err = -EINVAL; + } + + i915_gem_object_unpin_map(B->obj); +err_a: + i915_gem_object_unpin_map(A->obj); + return err; +} + +static int live_isolated_whitelist(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct { + struct i915_gem_context *ctx; + struct i915_vma *scratch[2]; + } client[2] = {}; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int i, err = 0; + + /* + * Check that a write into a whitelist register works, but + * invisible to a second context. + */ + + if (!intel_engines_has_context_isolation(i915)) + return 0; + + if (!i915->kernel_context->ppgtt) + return 0; + + for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_gem_context *c; + + c = kernel_context(i915); + if (IS_ERR(c)) { + err = PTR_ERR(c); + goto err; + } + + client[i].scratch[0] = create_scratch(&c->ppgtt->vm, 1024); + if (IS_ERR(client[i].scratch[0])) { + err = PTR_ERR(client[i].scratch[0]); + kernel_context_close(c); + goto err; + } + + client[i].scratch[1] = create_scratch(&c->ppgtt->vm, 1024); + if (IS_ERR(client[i].scratch[1])) { + err = PTR_ERR(client[i].scratch[1]); + i915_vma_unpin_and_release(&client[i].scratch[0], 0); + kernel_context_close(c); + goto err; + } + + client[i].ctx = c; + } + + for_each_engine(engine, i915, id) { + if (!engine->whitelist.count) + continue; + + /* Read default values */ + err = read_whitelisted_registers(client[0].ctx, engine, + client[0].scratch[0]); + if (err) + goto err; + + /* Try to overwrite registers (should only affect ctx0) */ + err = scrub_whitelisted_registers(client[0].ctx, engine); + if (err) + goto err; + + /* Read values from ctx1, we expect these to be defaults */ + err = read_whitelisted_registers(client[1].ctx, engine, + client[1].scratch[0]); + if (err) + goto err; + + /* Verify that both reads return the same default values */ + err = check_whitelisted_registers(engine, + client[0].scratch[0], + client[1].scratch[0], + result_eq); + if (err) + goto err; + + /* Read back the updated values in ctx0 */ + err = read_whitelisted_registers(client[0].ctx, engine, + client[0].scratch[1]); + if (err) + goto err; + + /* User should be granted privilege to overwhite regs */ + err = check_whitelisted_registers(engine, + client[0].scratch[0], + client[0].scratch[1], + result_neq); + if (err) + goto err; + } + +err: + for (i = 0; i < ARRAY_SIZE(client); i++) { + if (!client[i].ctx) + break; + + i915_vma_unpin_and_release(&client[i].scratch[1], 0); + i915_vma_unpin_and_release(&client[i].scratch[0], 0); + kernel_context_close(client[i].ctx); + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + return err; +} + static bool verify_gt_engine_wa(struct drm_i915_private *i915, struct wa_lists *lists, const char *str) { @@ -844,6 +1155,7 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(live_dirty_whitelist), SUBTEST(live_reset_whitelist), + SUBTEST(live_isolated_whitelist), SUBTEST(live_gpu_reset_gt_engine_workarounds), SUBTEST(live_engine_reset_gt_engine_workarounds), }; -- cgit From 112ed2d31a46f4704085ad925435b77e62b8abee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 18:48:39 +0100 Subject: drm/i915: Move GraphicsTechnology files under gt/ Start partitioning off the code that talks to the hardware (GT) from the uapi layers and move the device facing code under gt/ One casualty is s/intel_ringbuffer.h/intel_engine.h/ with the plan to subdivide that header and body further (and split out the submission code from the ringbuffer and logical context handling). This patch aims to be simple motion so git can fixup inflight patches with little mess. Signed-off-by: Chris Wilson Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20190424174839.7141-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 46 +- drivers/gpu/drm/i915/Makefile.header-test | 6 +- drivers/gpu/drm/i915/gt/Makefile | 2 + drivers/gpu/drm/i915/gt/Makefile.header-test | 16 + drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 337 +++ drivers/gpu/drm/i915/gt/intel_context.c | 268 ++ drivers/gpu/drm/i915/gt/intel_context.h | 87 + drivers/gpu/drm/i915/gt/intel_context_types.h | 65 + drivers/gpu/drm/i915/gt/intel_engine.h | 583 ++++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1761 ++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 548 ++++ drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 278 ++ drivers/gpu/drm/i915/gt/intel_hangcheck.c | 334 +++ drivers/gpu/drm/i915/gt/intel_lrc.c | 2908 +++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_lrc.h | 116 + drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 68 + drivers/gpu/drm/i915/gt/intel_mocs.c | 566 ++++ drivers/gpu/drm/i915/gt/intel_mocs.h | 60 + drivers/gpu/drm/i915/gt/intel_reset.c | 1471 ++++++++++ drivers/gpu/drm/i915/gt/intel_reset.h | 69 + drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 2338 ++++++++++++++++ drivers/gpu/drm/i915/gt/intel_sseu.c | 142 + drivers/gpu/drm/i915/gt/intel_sseu.h | 67 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 1402 ++++++++++ drivers/gpu/drm/i915/gt/intel_workarounds.h | 40 + drivers/gpu/drm/i915/gt/intel_workarounds_types.h | 28 + drivers/gpu/drm/i915/gt/mock_engine.c | 325 +++ drivers/gpu/drm/i915/gt/mock_engine.h | 49 + drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 58 + drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 1919 +++++++++++++ drivers/gpu/drm/i915/gt/selftest_lrc.c | 1324 +++++++++ drivers/gpu/drm/i915/gt/selftest_workarounds.c | 1172 ++++++++ drivers/gpu/drm/i915/i915_cmd_parser.c | 3 +- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- drivers/gpu/drm/i915/i915_drv.c | 5 +- drivers/gpu/drm/i915/i915_drv.h | 7 +- drivers/gpu/drm/i915/i915_gem.c | 7 +- drivers/gpu/drm/i915/i915_gem_context.c | 7 +- drivers/gpu/drm/i915/i915_gem_context.h | 3 +- drivers/gpu/drm/i915/i915_gem_context_types.h | 3 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 - drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/i915_gpu_error.h | 3 +- drivers/gpu/drm/i915/i915_perf.c | 3 +- drivers/gpu/drm/i915/i915_pmu.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 1 - drivers/gpu/drm/i915/i915_reset.c | 1471 ---------- drivers/gpu/drm/i915/i915_reset.h | 69 - drivers/gpu/drm/i915/i915_scheduler_types.h | 2 +- drivers/gpu/drm/i915/i915_trace.h | 3 +- drivers/gpu/drm/i915/i915_vma.c | 3 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 337 --- drivers/gpu/drm/i915/intel_context.c | 267 -- drivers/gpu/drm/i915/intel_context.h | 87 - drivers/gpu/drm/i915/intel_context_types.h | 65 - drivers/gpu/drm/i915/intel_device_info.h | 6 +- drivers/gpu/drm/i915/intel_display.c | 1 - drivers/gpu/drm/i915/intel_engine_cs.c | 1761 ------------ drivers/gpu/drm/i915/intel_engine_types.h | 549 ---- drivers/gpu/drm/i915/intel_gpu_commands.h | 278 -- drivers/gpu/drm/i915/intel_guc_submission.c | 3 +- drivers/gpu/drm/i915/intel_guc_submission.h | 3 +- drivers/gpu/drm/i915/intel_hangcheck.c | 334 --- drivers/gpu/drm/i915/intel_lrc.c | 2909 -------------------- drivers/gpu/drm/i915/intel_lrc.h | 118 - drivers/gpu/drm/i915/intel_lrc_reg.h | 68 - drivers/gpu/drm/i915/intel_mocs.c | 564 ---- drivers/gpu/drm/i915/intel_mocs.h | 58 - drivers/gpu/drm/i915/intel_ringbuffer.c | 2339 ---------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 583 ---- drivers/gpu/drm/i915/intel_sseu.c | 142 - drivers/gpu/drm/i915/intel_sseu.h | 67 - drivers/gpu/drm/i915/intel_uc.c | 2 +- drivers/gpu/drm/i915/intel_workarounds.c | 1402 ---------- drivers/gpu/drm/i915/intel_workarounds.h | 36 - drivers/gpu/drm/i915/intel_workarounds_types.h | 28 - drivers/gpu/drm/i915/selftests/i915_gem_context.c | 5 +- drivers/gpu/drm/i915/selftests/igt_reset.c | 3 +- drivers/gpu/drm/i915/selftests/igt_spinner.h | 3 +- drivers/gpu/drm/i915/selftests/intel_engine_cs.c | 58 - drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 1919 ------------- drivers/gpu/drm/i915/selftests/intel_lrc.c | 1326 --------- drivers/gpu/drm/i915/selftests/intel_workarounds.c | 1172 -------- drivers/gpu/drm/i915/selftests/mock_engine.c | 321 --- drivers/gpu/drm/i915/selftests/mock_engine.h | 49 - drivers/gpu/drm/i915/selftests/mock_gem_device.c | 3 +- drivers/gpu/drm/i915/selftests/mock_request.c | 3 +- 87 files changed, 18488 insertions(+), 18434 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/Makefile create mode 100644 drivers/gpu/drm/i915/gt/Makefile.header-test create mode 100644 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c create mode 100644 drivers/gpu/drm/i915/gt/intel_context.c create mode 100644 drivers/gpu/drm/i915/gt/intel_context.h create mode 100644 drivers/gpu/drm/i915/gt/intel_context_types.h create mode 100644 drivers/gpu/drm/i915/gt/intel_engine.h create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_cs.c create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_types.h create mode 100644 drivers/gpu/drm/i915/gt/intel_gpu_commands.h create mode 100644 drivers/gpu/drm/i915/gt/intel_hangcheck.c create mode 100644 drivers/gpu/drm/i915/gt/intel_lrc.c create mode 100644 drivers/gpu/drm/i915/gt/intel_lrc.h create mode 100644 drivers/gpu/drm/i915/gt/intel_lrc_reg.h create mode 100644 drivers/gpu/drm/i915/gt/intel_mocs.c create mode 100644 drivers/gpu/drm/i915/gt/intel_mocs.h create mode 100644 drivers/gpu/drm/i915/gt/intel_reset.c create mode 100644 drivers/gpu/drm/i915/gt/intel_reset.h create mode 100644 drivers/gpu/drm/i915/gt/intel_ringbuffer.c create mode 100644 drivers/gpu/drm/i915/gt/intel_sseu.c create mode 100644 drivers/gpu/drm/i915/gt/intel_sseu.h create mode 100644 drivers/gpu/drm/i915/gt/intel_workarounds.c create mode 100644 drivers/gpu/drm/i915/gt/intel_workarounds.h create mode 100644 drivers/gpu/drm/i915/gt/intel_workarounds_types.h create mode 100644 drivers/gpu/drm/i915/gt/mock_engine.c create mode 100644 drivers/gpu/drm/i915/gt/mock_engine.h create mode 100644 drivers/gpu/drm/i915/gt/selftest_engine_cs.c create mode 100644 drivers/gpu/drm/i915/gt/selftest_hangcheck.c create mode 100644 drivers/gpu/drm/i915/gt/selftest_lrc.c create mode 100644 drivers/gpu/drm/i915/gt/selftest_workarounds.c delete mode 100644 drivers/gpu/drm/i915/i915_reset.c delete mode 100644 drivers/gpu/drm/i915/i915_reset.h delete mode 100644 drivers/gpu/drm/i915/intel_breadcrumbs.c delete mode 100644 drivers/gpu/drm/i915/intel_context.c delete mode 100644 drivers/gpu/drm/i915/intel_context.h delete mode 100644 drivers/gpu/drm/i915/intel_context_types.h delete mode 100644 drivers/gpu/drm/i915/intel_engine_cs.c delete mode 100644 drivers/gpu/drm/i915/intel_engine_types.h delete mode 100644 drivers/gpu/drm/i915/intel_gpu_commands.h delete mode 100644 drivers/gpu/drm/i915/intel_hangcheck.c delete mode 100644 drivers/gpu/drm/i915/intel_lrc.c delete mode 100644 drivers/gpu/drm/i915/intel_lrc.h delete mode 100644 drivers/gpu/drm/i915/intel_lrc_reg.h delete mode 100644 drivers/gpu/drm/i915/intel_mocs.c delete mode 100644 drivers/gpu/drm/i915/intel_mocs.h delete mode 100644 drivers/gpu/drm/i915/intel_ringbuffer.c delete mode 100644 drivers/gpu/drm/i915/intel_ringbuffer.h delete mode 100644 drivers/gpu/drm/i915/intel_sseu.c delete mode 100644 drivers/gpu/drm/i915/intel_sseu.h delete mode 100644 drivers/gpu/drm/i915/intel_workarounds.c delete mode 100644 drivers/gpu/drm/i915/intel_workarounds.h delete mode 100644 drivers/gpu/drm/i915/intel_workarounds_types.h delete mode 100644 drivers/gpu/drm/i915/selftests/intel_engine_cs.c delete mode 100644 drivers/gpu/drm/i915/selftests/intel_hangcheck.c delete mode 100644 drivers/gpu/drm/i915/selftests/intel_lrc.c delete mode 100644 drivers/gpu/drm/i915/selftests/intel_workarounds.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_engine.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_engine.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 53ff209b91bb..40130cf5c003 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -35,32 +35,53 @@ subdir-ccflags-y += \ # Extra header tests include $(src)/Makefile.header-test +subdir-ccflags-y += -I$(src) + # Please keep these build lists sorted! # core driver code i915-y += i915_drv.o \ i915_irq.o \ - i915_memcpy.o \ - i915_mm.o \ i915_params.o \ i915_pci.o \ - i915_reset.o \ i915_suspend.o \ - i915_sw_fence.o \ - i915_syncmap.o \ i915_sysfs.o \ - i915_user_extensions.o \ intel_csr.o \ intel_device_info.o \ intel_pm.o \ intel_runtime_pm.o \ - intel_workarounds.o + intel_uncore.o + +# core library code +i915-y += \ + i915_memcpy.o \ + i915_mm.o \ + i915_sw_fence.o \ + i915_syncmap.o \ + i915_user_extensions.o i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o -# GEM code +# "Graphics Technology" (aka we talk to the gpu) +obj-y += gt/ +gt-y += \ + gt/intel_breadcrumbs.o \ + gt/intel_context.o \ + gt/intel_engine_cs.o \ + gt/intel_hangcheck.o \ + gt/intel_lrc.o \ + gt/intel_reset.o \ + gt/intel_ringbuffer.o \ + gt/intel_mocs.o \ + gt/intel_sseu.o \ + gt/intel_workarounds.o +gt-$(CONFIG_DRM_I915_SELFTEST) += \ + gt/mock_engine.o +i915-y += $(gt-y) + +# GEM (Graphics Execution Management) code i915-y += \ i915_active.o \ i915_cmd_parser.o \ @@ -88,15 +109,6 @@ i915-y += \ i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ - intel_breadcrumbs.o \ - intel_context.o \ - intel_engine_cs.o \ - intel_hangcheck.o \ - intel_lrc.o \ - intel_mocs.o \ - intel_ringbuffer.o \ - intel_sseu.o \ - intel_uncore.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index 5bcc78d7ac96..96a5d90629ec 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -13,13 +13,11 @@ header_test := \ intel_cdclk.h \ intel_color.h \ intel_connector.h \ - intel_context_types.h \ intel_crt.h \ intel_csr.h \ intel_ddi.h \ intel_dp.h \ intel_dvo.h \ - intel_engine_types.h \ intel_fbc.h \ intel_fbdev.h \ intel_frontbuffer.h \ @@ -33,9 +31,7 @@ header_test := \ intel_psr.h \ intel_sdvo.h \ intel_sprite.h \ - intel_sseu.h \ - intel_tv.h \ - intel_workarounds_types.h + intel_tv.h quiet_cmd_header_test = HDRTEST $@ cmd_header_test = echo "\#include \"$( $@ diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile new file mode 100644 index 000000000000..1c75b5c9790c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/Makefile @@ -0,0 +1,2 @@ +# Extra header tests +include $(src)/Makefile.header-test diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test new file mode 100644 index 000000000000..61e06cbb4b32 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/Makefile.header-test @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: MIT +# Copyright © 2019 Intel Corporation + +# Test the headers are compilable as standalone units +header_test := $(notdir $(wildcard $(src)/*.h)) + +quiet_cmd_header_test = HDRTEST $@ + cmd_header_test = echo "\#include \"$( $@ + +header_test_%.c: %.h + $(call cmd,header_test) + +extra-$(CONFIG_DRM_I915_WERROR) += \ + $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h))) + +clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h))) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c new file mode 100644 index 000000000000..3cbffd400b1b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -0,0 +1,337 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include + +#include "i915_drv.h" + +static void irq_enable(struct intel_engine_cs *engine) +{ + if (!engine->irq_enable) + return; + + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->i915->irq_lock); +} + +static void irq_disable(struct intel_engine_cs *engine) +{ + if (!engine->irq_disable) + return; + + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->i915->irq_lock); +} + +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +{ + lockdep_assert_held(&b->irq_lock); + + GEM_BUG_ON(!b->irq_enabled); + if (!--b->irq_enabled) + irq_disable(container_of(b, + struct intel_engine_cs, + breadcrumbs)); + + b->irq_armed = false; +} + +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (!b->irq_armed) + return; + + spin_lock_irq(&b->irq_lock); + if (b->irq_armed) + __intel_breadcrumbs_disarm_irq(b); + spin_unlock_irq(&b->irq_lock); +} + +static inline bool __request_completed(const struct i915_request *rq) +{ + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); +} + +void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_context *ce, *cn; + struct list_head *pos, *next; + LIST_HEAD(signal); + + spin_lock(&b->irq_lock); + + if (b->irq_armed && list_empty(&b->signalers)) + __intel_breadcrumbs_disarm_irq(b); + + list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { + GEM_BUG_ON(list_empty(&ce->signals)); + + list_for_each_safe(pos, next, &ce->signals) { + struct i915_request *rq = + list_entry(pos, typeof(*rq), signal_link); + + if (!__request_completed(rq)) + break; + + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, + &rq->fence.flags)); + + /* + * Queue for execution after dropping the signaling + * spinlock as the callback chain may end up adding + * more signalers to the same context or engine. + */ + i915_request_get(rq); + + /* + * We may race with direct invocation of + * dma_fence_signal(), e.g. i915_request_retire(), + * so we need to acquire our reference to the request + * before we cancel the breadcrumb. + */ + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + list_add_tail(&rq->signal_link, &signal); + } + + /* + * We process the list deletion in bulk, only using a list_add + * (not list_move) above but keeping the status of + * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit. + */ + if (!list_is_first(pos, &ce->signals)) { + /* Advance the list to the first incomplete request */ + __list_del_many(&ce->signals, pos); + if (&ce->signals == pos) /* now empty */ + list_del_init(&ce->signal_link); + } + } + + spin_unlock(&b->irq_lock); + + list_for_each_safe(pos, next, &signal) { + struct i915_request *rq = + list_entry(pos, typeof(*rq), signal_link); + + dma_fence_signal(&rq->fence); + i915_request_put(rq); + } +} + +void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) +{ + local_irq_disable(); + intel_engine_breadcrumbs_irq(engine); + local_irq_enable(); +} + +static void signal_irq_work(struct irq_work *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), breadcrumbs.irq_work); + + intel_engine_breadcrumbs_irq(engine); +} + +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock_irq(&b->irq_lock); + if (!b->irq_enabled++) + irq_enable(engine); + GEM_BUG_ON(!b->irq_enabled); /* no overflow! */ + spin_unlock_irq(&b->irq_lock); +} + +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock_irq(&b->irq_lock); + GEM_BUG_ON(!b->irq_enabled); /* no underflow! */ + if (!--b->irq_enabled) + irq_disable(engine); + spin_unlock_irq(&b->irq_lock); +} + +static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) +{ + struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + + lockdep_assert_held(&b->irq_lock); + if (b->irq_armed) + return; + + /* + * The breadcrumb irq will be disarmed on the interrupt after the + * waiters are signaled. This gives us a single interrupt window in + * which we can add a new waiter and avoid the cost of re-enabling + * the irq. + */ + b->irq_armed = true; + + /* + * Since we are waiting on a request, the GPU should be busy + * and should have its own rpm reference. This is tracked + * by i915->gt.awake, we can forgo holding our own wakref + * for the interrupt as before i915->gt.awake is released (when + * the driver is idle) we disarm the breadcrumbs. + */ + + if (!b->irq_enabled++) + irq_enable(engine); +} + +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock_init(&b->irq_lock); + INIT_LIST_HEAD(&b->signalers); + + init_irq_work(&b->irq_work, signal_irq_work); +} + +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + + spin_lock_irqsave(&b->irq_lock, flags); + + if (b->irq_enabled) + irq_enable(engine); + else + irq_disable(engine); + + spin_unlock_irqrestore(&b->irq_lock, flags); +} + +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) +{ +} + +bool i915_request_enable_breadcrumb(struct i915_request *rq) +{ + struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; + + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) + return true; + + spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) && + !__request_completed(rq)) { + struct intel_context *ce = rq->hw_context; + struct list_head *pos; + + __intel_breadcrumbs_arm_irq(b); + + /* + * We keep the seqno in retirement order, so we can break + * inside intel_engine_breadcrumbs_irq as soon as we've passed + * the last completed request (or seen a request that hasn't + * event started). We could iterate the timeline->requests list, + * but keeping a separate signalers_list has the advantage of + * hopefully being much smaller than the full list and so + * provides faster iteration and detection when there are no + * more interrupts required for this context. + * + * We typically expect to add new signalers in order, so we + * start looking for our insertion point from the tail of + * the list. + */ + list_for_each_prev(pos, &ce->signals) { + struct i915_request *it = + list_entry(pos, typeof(*it), signal_link); + + if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) + break; + } + list_add(&rq->signal_link, pos); + if (pos == &ce->signals) /* catch transitions from empty list */ + list_move_tail(&ce->signal_link, &b->signalers); + + set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + } + spin_unlock(&b->irq_lock); + + return !__request_completed(rq); +} + +void i915_request_cancel_breadcrumb(struct i915_request *rq) +{ + struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; + + if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + return; + + spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { + struct intel_context *ce = rq->hw_context; + + list_del(&rq->signal_link); + if (list_empty(&ce->signals)) + list_del_init(&ce->signal_link); + + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + } + spin_unlock(&b->irq_lock); +} + +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_context *ce; + struct i915_request *rq; + + if (list_empty(&b->signalers)) + return; + + drm_printf(p, "Signals:\n"); + + spin_lock_irq(&b->irq_lock); + list_for_each_entry(ce, &b->signalers, signal_link) { + list_for_each_entry(rq, &ce->signals, signal_link) { + drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", + rq->fence.context, rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + jiffies_to_msecs(jiffies - rq->emitted_jiffies)); + } + } + spin_unlock_irq(&b->irq_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c new file mode 100644 index 000000000000..ebd1e5919a4a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -0,0 +1,268 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_gem_context.h" +#include "i915_globals.h" + +#include "intel_context.h" +#include "intel_engine.h" + +static struct i915_global_context { + struct i915_global base; + struct kmem_cache *slab_ce; +} global; + +struct intel_context *intel_context_alloc(void) +{ + return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); +} + +void intel_context_free(struct intel_context *ce) +{ + kmem_cache_free(global.slab_ce, ce); +} + +struct intel_context * +intel_context_lookup(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct intel_context *ce = NULL; + struct rb_node *p; + + spin_lock(&ctx->hw_contexts_lock); + p = ctx->hw_contexts.rb_node; + while (p) { + struct intel_context *this = + rb_entry(p, struct intel_context, node); + + if (this->engine == engine) { + GEM_BUG_ON(this->gem_context != ctx); + ce = this; + break; + } + + if (this->engine < engine) + p = p->rb_right; + else + p = p->rb_left; + } + spin_unlock(&ctx->hw_contexts_lock); + + return ce; +} + +struct intel_context * +__intel_context_insert(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_context *ce) +{ + struct rb_node **p, *parent; + int err = 0; + + spin_lock(&ctx->hw_contexts_lock); + + parent = NULL; + p = &ctx->hw_contexts.rb_node; + while (*p) { + struct intel_context *this; + + parent = *p; + this = rb_entry(parent, struct intel_context, node); + + if (this->engine == engine) { + err = -EEXIST; + ce = this; + break; + } + + if (this->engine < engine) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + if (!err) { + rb_link_node(&ce->node, parent, p); + rb_insert_color(&ce->node, &ctx->hw_contexts); + } + + spin_unlock(&ctx->hw_contexts_lock); + + return ce; +} + +void __intel_context_remove(struct intel_context *ce) +{ + struct i915_gem_context *ctx = ce->gem_context; + + spin_lock(&ctx->hw_contexts_lock); + rb_erase(&ce->node, &ctx->hw_contexts); + spin_unlock(&ctx->hw_contexts_lock); +} + +static struct intel_context * +intel_context_instance(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct intel_context *ce, *pos; + + ce = intel_context_lookup(ctx, engine); + if (likely(ce)) + return ce; + + ce = intel_context_alloc(); + if (!ce) + return ERR_PTR(-ENOMEM); + + intel_context_init(ce, ctx, engine); + + pos = __intel_context_insert(ctx, engine, ce); + if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */ + intel_context_free(ce); + + GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos); + return pos; +} + +struct intel_context * +intel_context_pin_lock(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) + __acquires(ce->pin_mutex) +{ + struct intel_context *ce; + + ce = intel_context_instance(ctx, engine); + if (IS_ERR(ce)) + return ce; + + if (mutex_lock_interruptible(&ce->pin_mutex)) + return ERR_PTR(-EINTR); + + return ce; +} + +struct intel_context * +intel_context_pin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct intel_context *ce; + int err; + + ce = intel_context_instance(ctx, engine); + if (IS_ERR(ce)) + return ce; + + if (likely(atomic_inc_not_zero(&ce->pin_count))) + return ce; + + if (mutex_lock_interruptible(&ce->pin_mutex)) + return ERR_PTR(-EINTR); + + if (likely(!atomic_read(&ce->pin_count))) { + err = ce->ops->pin(ce); + if (err) + goto err; + + i915_gem_context_get(ctx); + GEM_BUG_ON(ce->gem_context != ctx); + + mutex_lock(&ctx->mutex); + list_add(&ce->active_link, &ctx->active_engines); + mutex_unlock(&ctx->mutex); + + intel_context_get(ce); + smp_mb__before_atomic(); /* flush pin before it is visible */ + } + + atomic_inc(&ce->pin_count); + GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ + + mutex_unlock(&ce->pin_mutex); + return ce; + +err: + mutex_unlock(&ce->pin_mutex); + return ERR_PTR(err); +} + +void intel_context_unpin(struct intel_context *ce) +{ + if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) + return; + + /* We may be called from inside intel_context_pin() to evict another */ + intel_context_get(ce); + mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); + + if (likely(atomic_dec_and_test(&ce->pin_count))) { + ce->ops->unpin(ce); + + mutex_lock(&ce->gem_context->mutex); + list_del(&ce->active_link); + mutex_unlock(&ce->gem_context->mutex); + + i915_gem_context_put(ce->gem_context); + intel_context_put(ce); + } + + mutex_unlock(&ce->pin_mutex); + intel_context_put(ce); +} + +static void intel_context_retire(struct i915_active_request *active, + struct i915_request *rq) +{ + struct intel_context *ce = + container_of(active, typeof(*ce), active_tracker); + + intel_context_unpin(ce); +} + +void +intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + kref_init(&ce->ref); + + ce->gem_context = ctx; + ce->engine = engine; + ce->ops = engine->cops; + ce->sseu = engine->sseu; + + INIT_LIST_HEAD(&ce->signal_link); + INIT_LIST_HEAD(&ce->signals); + + mutex_init(&ce->pin_mutex); + + i915_active_request_init(&ce->active_tracker, + NULL, intel_context_retire); +} + +static void i915_global_context_shrink(void) +{ + kmem_cache_shrink(global.slab_ce); +} + +static void i915_global_context_exit(void) +{ + kmem_cache_destroy(global.slab_ce); +} + +static struct i915_global_context global = { { + .shrink = i915_global_context_shrink, + .exit = i915_global_context_exit, +} }; + +int __init i915_global_context_init(void) +{ + global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); + if (!global.slab_ce) + return -ENOMEM; + + i915_global_register(&global.base); + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h new file mode 100644 index 000000000000..ebc861b1a49e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -0,0 +1,87 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_CONTEXT_H__ +#define __INTEL_CONTEXT_H__ + +#include + +#include "intel_context_types.h" +#include "intel_engine_types.h" + +struct intel_context *intel_context_alloc(void); +void intel_context_free(struct intel_context *ce); + +void intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + +/** + * intel_context_lookup - Find the matching HW context for this (ctx, engine) + * @ctx - the parent GEM context + * @engine - the target HW engine + * + * May return NULL if the HW context hasn't been instantiated (i.e. unused). + */ +struct intel_context * +intel_context_lookup(struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + +/** + * intel_context_pin_lock - Stablises the 'pinned' status of the HW context + * @ctx - the parent GEM context + * @engine - the target HW engine + * + * Acquire a lock on the pinned status of the HW context, such that the context + * can neither be bound to the GPU or unbound whilst the lock is held, i.e. + * intel_context_is_pinned() remains stable. + */ +struct intel_context * +intel_context_pin_lock(struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + +static inline bool +intel_context_is_pinned(struct intel_context *ce) +{ + return atomic_read(&ce->pin_count); +} + +static inline void intel_context_pin_unlock(struct intel_context *ce) +__releases(ce->pin_mutex) +{ + mutex_unlock(&ce->pin_mutex); +} + +struct intel_context * +__intel_context_insert(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_context *ce); +void +__intel_context_remove(struct intel_context *ce); + +struct intel_context * +intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine); + +static inline void __intel_context_pin(struct intel_context *ce) +{ + GEM_BUG_ON(!intel_context_is_pinned(ce)); + atomic_inc(&ce->pin_count); +} + +void intel_context_unpin(struct intel_context *ce); + +static inline struct intel_context *intel_context_get(struct intel_context *ce) +{ + kref_get(&ce->ref); + return ce; +} + +static inline void intel_context_put(struct intel_context *ce) +{ + kref_put(&ce->ref, ce->ops->destroy); +} + +#endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h new file mode 100644 index 000000000000..9ec4f787c908 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_CONTEXT_TYPES__ +#define __INTEL_CONTEXT_TYPES__ + +#include +#include +#include +#include +#include + +#include "i915_active_types.h" +#include "intel_sseu.h" + +struct i915_gem_context; +struct i915_vma; +struct intel_context; +struct intel_ring; + +struct intel_context_ops { + int (*pin)(struct intel_context *ce); + void (*unpin)(struct intel_context *ce); + + void (*reset)(struct intel_context *ce); + void (*destroy)(struct kref *kref); +}; + +struct intel_context { + struct kref ref; + + struct i915_gem_context *gem_context; + struct intel_engine_cs *engine; + struct intel_engine_cs *active; + + struct list_head active_link; + struct list_head signal_link; + struct list_head signals; + + struct i915_vma *state; + struct intel_ring *ring; + + u32 *lrc_reg_state; + u64 lrc_desc; + + atomic_t pin_count; + struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ + + /** + * active_tracker: Active tracker for the external rq activity + * on this intel_context object. + */ + struct i915_active_request active_tracker; + + const struct intel_context_ops *ops; + struct rb_node node; + + /** sseu: Control eu/slice partitioning */ + struct intel_sseu sseu; +}; + +#endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h new file mode 100644 index 000000000000..72c7c337ace9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -0,0 +1,583 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _INTEL_RINGBUFFER_H_ +#define _INTEL_RINGBUFFER_H_ + +#include + +#include +#include +#include +#include + +#include "i915_gem_batch_pool.h" +#include "i915_pmu.h" +#include "i915_reg.h" +#include "i915_request.h" +#include "i915_selftest.h" +#include "i915_timeline.h" +#include "intel_engine_types.h" +#include "intel_gpu_commands.h" +#include "intel_workarounds.h" + +struct drm_printer; + +/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) + +/* + * The register defines to be used with the following macros need to accept a + * base param, e.g: + * + * REG_FOO(base) _MMIO((base) + ) + * ENGINE_READ(engine, REG_FOO); + * + * register arrays are to be defined and accessed as follows: + * + * REG_BAR(base, i) _MMIO((base) + + (i) * ) + * ENGINE_READ_IDX(engine, REG_BAR, i) + */ + +#define __ENGINE_REG_OP(op__, engine__, ...) \ + intel_uncore_##op__((engine__)->uncore, __VA_ARGS__) + +#define __ENGINE_READ_OP(op__, engine__, reg__) \ + __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base)) + +#define ENGINE_READ16(...) __ENGINE_READ_OP(read16, __VA_ARGS__) +#define ENGINE_READ(...) __ENGINE_READ_OP(read, __VA_ARGS__) +#define ENGINE_READ_FW(...) __ENGINE_READ_OP(read_fw, __VA_ARGS__) +#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__) + +#define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \ + __ENGINE_REG_OP(read64_2x32, (engine__), \ + lower_reg__((engine__)->mmio_base), \ + upper_reg__((engine__)->mmio_base)) + +#define ENGINE_READ_IDX(engine__, reg__, idx__) \ + __ENGINE_REG_OP(read, (engine__), reg__((engine__)->mmio_base, (idx__))) + +#define __ENGINE_WRITE_OP(op__, engine__, reg__, val__) \ + __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base), (val__)) + +#define ENGINE_WRITE16(...) __ENGINE_WRITE_OP(write16, __VA_ARGS__) +#define ENGINE_WRITE(...) __ENGINE_WRITE_OP(write, __VA_ARGS__) +#define ENGINE_WRITE_FW(...) __ENGINE_WRITE_OP(write_fw, __VA_ARGS__) + +/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to + * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. + */ +enum intel_engine_hangcheck_action { + ENGINE_IDLE = 0, + ENGINE_WAIT, + ENGINE_ACTIVE_SEQNO, + ENGINE_ACTIVE_HEAD, + ENGINE_ACTIVE_SUBUNITS, + ENGINE_WAIT_KICK, + ENGINE_DEAD, +}; + +static inline const char * +hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) +{ + switch (a) { + case ENGINE_IDLE: + return "idle"; + case ENGINE_WAIT: + return "wait"; + case ENGINE_ACTIVE_SEQNO: + return "active seqno"; + case ENGINE_ACTIVE_HEAD: + return "active head"; + case ENGINE_ACTIVE_SUBUNITS: + return "active subunits"; + case ENGINE_WAIT_KICK: + return "wait kick"; + case ENGINE_DEAD: + return "dead"; + } + + return "unknown"; +} + +void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); + +static inline bool __execlists_need_preempt(int prio, int last) +{ + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. + */ + return prio > max(I915_PRIORITY_NORMAL - 1, last); +} + +static inline void +execlists_set_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __set_bit(bit, (unsigned long *)&execlists->active); +} + +static inline bool +execlists_set_active_once(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); +} + +static inline void +execlists_clear_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __clear_bit(bit, (unsigned long *)&execlists->active); +} + +static inline void +execlists_clear_all_active(struct intel_engine_execlists *execlists) +{ + execlists->active = 0; +} + +static inline bool +execlists_is_active(const struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return test_bit(bit, (unsigned long *)&execlists->active); +} + +void execlists_user_begin(struct intel_engine_execlists *execlists, + const struct execlist_port *port); +void execlists_user_end(struct intel_engine_execlists *execlists); + +void +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); + +struct i915_request * +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); + +static inline unsigned int +execlists_num_ports(const struct intel_engine_execlists * const execlists) +{ + return execlists->port_mask + 1; +} + +static inline struct execlist_port * +execlists_port_complete(struct intel_engine_execlists * const execlists, + struct execlist_port * const port) +{ + const unsigned int m = execlists->port_mask; + + GEM_BUG_ON(port_index(port, execlists) != 0); + GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); + + memmove(port, port + 1, m * sizeof(struct execlist_port)); + memset(port + m, 0, sizeof(struct execlist_port)); + + return port; +} + +static inline u32 +intel_read_status_page(const struct intel_engine_cs *engine, int reg) +{ + /* Ensure that the compiler doesn't optimize away the load. */ + return READ_ONCE(engine->status_page.addr[reg]); +} + +static inline void +intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) +{ + /* Writing into the status page should be done sparingly. Since + * we do when we are uncertain of the device state, we take a bit + * of extra paranoia to try and ensure that the HWS takes the value + * we give and that it doesn't end up trapped inside the CPU! + */ + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { + mb(); + clflush(&engine->status_page.addr[reg]); + engine->status_page.addr[reg] = value; + clflush(&engine->status_page.addr[reg]); + mb(); + } else { + WRITE_ONCE(engine->status_page.addr[reg], value); + } +} + +/* + * Reads a dword out of the status page, which is written to from the command + * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or + * MI_STORE_DATA_IMM. + * + * The following dwords have a reserved meaning: + * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. + * 0x04: ring 0 head pointer + * 0x05: ring 1 head pointer (915-class) + * 0x06: ring 2 head pointer (915-class) + * 0x10-0x1b: Context status DWords (GM45) + * 0x1f: Last written status offset. (GM45) + * 0x20-0x2f: Reserved (Gen6+) + * + * The area from dword 0x30 to 0x3ff is available for driver usage. + */ +#define I915_GEM_HWS_PREEMPT 0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) +#define I915_GEM_HWS_HANGCHECK 0x34 +#define I915_GEM_HWS_HANGCHECK_ADDR (I915_GEM_HWS_HANGCHECK * sizeof(u32)) +#define I915_GEM_HWS_SEQNO 0x40 +#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_SCRATCH 0x80 +#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) + +#define I915_HWS_CSB_BUF0_INDEX 0x10 +#define I915_HWS_CSB_WRITE_INDEX 0x1f +#define CNL_HWS_CSB_WRITE_INDEX 0x2f + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size); +int intel_ring_pin(struct intel_ring *ring); +void intel_ring_reset(struct intel_ring *ring, u32 tail); +unsigned int intel_ring_update_space(struct intel_ring *ring); +void intel_ring_unpin(struct intel_ring *ring); +void intel_ring_free(struct kref *ref); + +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) +{ + kref_get(&ring->ref); + return ring; +} + +static inline void intel_ring_put(struct intel_ring *ring) +{ + kref_put(&ring->ref, intel_ring_free); +} + +void intel_engine_stop(struct intel_engine_cs *engine); +void intel_engine_cleanup(struct intel_engine_cs *engine); + +int __must_check intel_ring_cacheline_align(struct i915_request *rq); + +u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); + +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) +{ + /* Dummy function. + * + * This serves as a placeholder in the code so that the reader + * can compare against the preceding intel_ring_begin() and + * check that the number of dwords emitted matches the space + * reserved for the command packet (i.e. the value passed to + * intel_ring_begin()). + */ + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); +} + +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + +static inline bool +intel_ring_offset_valid(const struct intel_ring *ring, + unsigned int pos) +{ + if (pos & -ring->size) /* must be strictly within the ring */ + return false; + + if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ + return false; + + return true; +} + +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) +{ + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ + u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); + return intel_ring_wrap(rq->ring, offset); +} + +static inline void +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) +{ + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); + + /* + * "Ring Buffer Use" + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 + * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 + * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + * + * We use ring->head as the last known location of the actual RING_HEAD, + * it may have advanced but in the worst case it is equally the same + * as ring->head and so we should never program RING_TAIL to advance + * into the same cacheline as ring->head. + */ +#define cacheline(a) round_down(a, CACHELINE_BYTES) + GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && + tail < ring->head); +#undef cacheline +} + +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} + +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); +} + +int intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_init_common(struct intel_engine_cs *engine); +void intel_engine_cleanup_common(struct intel_engine_cs *engine); + +int intel_init_render_ring_buffer(struct intel_engine_cs *engine); +int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); +int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); +int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); + +int intel_engine_stop_cs(struct intel_engine_cs *engine); +void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); + +void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); + +u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); +u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); + +void intel_engine_get_instdone(struct intel_engine_cs *engine, + struct intel_instdone *instdone); + +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); + +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); + +void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); + +static inline void +intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) +{ + irq_work_queue(&engine->breadcrumbs.irq_work); +} + +void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); + +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); + +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p); + +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +{ + memset(batch, 0, 6 * sizeof(u32)); + + batch[0] = GFX_OP_PIPE_CONTROL(6); + batch[1] = flags; + batch[2] = offset; + + return batch + 6; +} + +static inline u32 * +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + /* w/a for post sync ops following a GPGPU operation we + * need a prior CS_STALL, which is emitted by the flush + * following the batch. + */ + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = gtt_offset; + *cs++ = 0; + *cs++ = value; + /* We're thrashing one dword of HWS. */ + *cs++ = 0; + + return cs; +} + +static inline u32 * +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + GEM_BUG_ON(gtt_offset & (1 << 5)); + /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags; + *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = value; + + return cs; +} + +static inline void intel_engine_reset(struct intel_engine_cs *engine, + bool stalled) +{ + if (engine->reset.reset) + engine->reset.reset(engine, stalled); +} + +void intel_engines_sanitize(struct drm_i915_private *i915, bool force); +void intel_gt_resume(struct drm_i915_private *i915); + +bool intel_engine_is_idle(struct intel_engine_cs *engine); +bool intel_engines_are_idle(struct drm_i915_private *dev_priv); + +void intel_engine_lost_context(struct intel_engine_cs *engine); + +void intel_engines_park(struct drm_i915_private *i915); +void intel_engines_unpark(struct drm_i915_private *i915); + +void intel_engines_reset_default_submission(struct drm_i915_private *i915); +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); + +bool intel_engine_can_store_dword(struct intel_engine_cs *engine); + +__printf(3, 4) +void intel_engine_dump(struct intel_engine_cs *engine, + struct drm_printer *m, + const char *header, ...); + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); + +static inline void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + if (engine->stats.active++ == 0) + engine->stats.start = ktime_get(); + GEM_BUG_ON(engine->stats.active == 0); + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static inline void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + ktime_t last; + + if (engine->stats.active && --engine->stats.active == 0) { + /* + * Decrement the active context count and in case GPU + * is now idle add up to the running total. + */ + last = ktime_sub(ktime_get(), engine->stats.start); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } else if (engine->stats.active == 0) { + /* + * After turning on engine stats, context out might be + * the first event in which case we account from the + * time stats gathering was turned on. + */ + last = ktime_sub(ktime_get(), engine->stats.enabled_at); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +int intel_enable_engine_stats(struct intel_engine_cs *engine); +void intel_disable_engine_stats(struct intel_engine_cs *engine); + +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); + +struct i915_request * +intel_engine_find_active_request(struct intel_engine_cs *engine); + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) + +static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) +{ + if (!execlists->preempt_hang.inject_hang) + return false; + + complete(&execlists->preempt_hang.completion); + return true; +} + +#else + +static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) +{ + return false; +} + +#endif + +static inline u32 +intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine) +{ + return engine->hangcheck.next_seqno = + next_pseudo_random32(engine->hangcheck.next_seqno); +} + +static inline u32 +intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine) +{ + return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK); +} + +#endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c new file mode 100644 index 000000000000..79ac56748b90 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -0,0 +1,1761 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" + +#include "intel_engine.h" +#include "intel_lrc.h" +#include "intel_reset.h" + +/* Haswell does have the CXT_SIZE register however it does not appear to be + * valid. Now, docs explain in dwords what is in the context object. The full + * size is 70720 bytes, however, the power context and execlist context will + * never be saved (power context is stored elsewhere, and execlists don't work + * on HSW) - so the final size, including the extra state required for the + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. + */ +#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) + +#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) +#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) +#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) + +struct engine_class_info { + const char *name; + int (*init_legacy)(struct intel_engine_cs *engine); + int (*init_execlists)(struct intel_engine_cs *engine); + + u8 uabi_class; +}; + +static const struct engine_class_info intel_engine_classes[] = { + [RENDER_CLASS] = { + .name = "rcs", + .init_execlists = logical_render_ring_init, + .init_legacy = intel_init_render_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_RENDER, + }, + [COPY_ENGINE_CLASS] = { + .name = "bcs", + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_blt_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_COPY, + }, + [VIDEO_DECODE_CLASS] = { + .name = "vcs", + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO, + }, + [VIDEO_ENHANCEMENT_CLASS] = { + .name = "vecs", + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_vebox_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, + }, +}; + +#define MAX_MMIO_BASES 3 +struct engine_info { + unsigned int hw_id; + u8 class; + u8 instance; + /* mmio bases table *must* be sorted in reverse gen order */ + struct engine_mmio_base { + u32 gen : 8; + u32 base : 24; + } mmio_bases[MAX_MMIO_BASES]; +}; + +static const struct engine_info intel_engines[] = { + [RCS0] = { + .hw_id = RCS0_HW, + .class = RENDER_CLASS, + .instance = 0, + .mmio_bases = { + { .gen = 1, .base = RENDER_RING_BASE } + }, + }, + [BCS0] = { + .hw_id = BCS0_HW, + .class = COPY_ENGINE_CLASS, + .instance = 0, + .mmio_bases = { + { .gen = 6, .base = BLT_RING_BASE } + }, + }, + [VCS0] = { + .hw_id = VCS0_HW, + .class = VIDEO_DECODE_CLASS, + .instance = 0, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD_RING_BASE }, + { .gen = 6, .base = GEN6_BSD_RING_BASE }, + { .gen = 4, .base = BSD_RING_BASE } + }, + }, + [VCS1] = { + .hw_id = VCS1_HW, + .class = VIDEO_DECODE_CLASS, + .instance = 1, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD2_RING_BASE }, + { .gen = 8, .base = GEN8_BSD2_RING_BASE } + }, + }, + [VCS2] = { + .hw_id = VCS2_HW, + .class = VIDEO_DECODE_CLASS, + .instance = 2, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD3_RING_BASE } + }, + }, + [VCS3] = { + .hw_id = VCS3_HW, + .class = VIDEO_DECODE_CLASS, + .instance = 3, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD4_RING_BASE } + }, + }, + [VECS0] = { + .hw_id = VECS0_HW, + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 0, + .mmio_bases = { + { .gen = 11, .base = GEN11_VEBOX_RING_BASE }, + { .gen = 7, .base = VEBOX_RING_BASE } + }, + }, + [VECS1] = { + .hw_id = VECS1_HW, + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 1, + .mmio_bases = { + { .gen = 11, .base = GEN11_VEBOX2_RING_BASE } + }, + }, +}; + +/** + * ___intel_engine_context_size() - return the size of the context for an engine + * @dev_priv: i915 device private + * @class: engine class + * + * Each engine class may require a different amount of space for a context + * image. + * + * Return: size (in bytes) of an engine class specific context image + * + * Note: this size includes the HWSP, which is part of the context image + * in LRC mode, but does not include the "shared data page" used with + * GuC submission. The caller should account for this if using the GuC. + */ +static u32 +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) +{ + u32 cxt_size; + + BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); + + switch (class) { + case RENDER_CLASS: + switch (INTEL_GEN(dev_priv)) { + default: + MISSING_CASE(INTEL_GEN(dev_priv)); + return DEFAULT_LR_CONTEXT_RENDER_SIZE; + case 11: + return GEN11_LR_CONTEXT_RENDER_SIZE; + case 10: + return GEN10_LR_CONTEXT_RENDER_SIZE; + case 9: + return GEN9_LR_CONTEXT_RENDER_SIZE; + case 8: + return GEN8_LR_CONTEXT_RENDER_SIZE; + case 7: + if (IS_HASWELL(dev_priv)) + return HSW_CXT_TOTAL_SIZE; + + cxt_size = I915_READ(GEN7_CXT_SIZE); + return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, + PAGE_SIZE); + case 6: + cxt_size = I915_READ(CXT_SIZE); + return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, + PAGE_SIZE); + case 5: + case 4: + case 3: + case 2: + /* For the special day when i810 gets merged. */ + case 1: + return 0; + } + break; + default: + MISSING_CASE(class); + /* fall through */ + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + case COPY_ENGINE_CLASS: + if (INTEL_GEN(dev_priv) < 8) + return 0; + return GEN8_LR_CONTEXT_OTHER_SIZE; + } +} + +static u32 __engine_mmio_base(struct drm_i915_private *i915, + const struct engine_mmio_base *bases) +{ + int i; + + for (i = 0; i < MAX_MMIO_BASES; i++) + if (INTEL_GEN(i915) >= bases[i].gen) + break; + + GEM_BUG_ON(i == MAX_MMIO_BASES); + GEM_BUG_ON(!bases[i].base); + + return bases[i].base; +} + +static void __sprint_engine_name(char *name, const struct engine_info *info) +{ + WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", + intel_engine_classes[info->class].name, + info->instance) >= INTEL_ENGINE_CS_MAX_NAME); +} + +void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask) +{ + /* + * Though they added more rings on g4x/ilk, they did not add + * per-engine HWSTAM until gen6. + */ + if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS) + return; + + if (INTEL_GEN(engine->i915) >= 3) + ENGINE_WRITE(engine, RING_HWSTAM, mask); + else + ENGINE_WRITE16(engine, RING_HWSTAM, mask); +} + +static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) +{ + /* Mask off all writes into the unknown HWSP */ + intel_engine_set_hwsp_writemask(engine, ~0u); +} + +static int +intel_engine_setup(struct drm_i915_private *dev_priv, + enum intel_engine_id id) +{ + const struct engine_info *info = &intel_engines[id]; + struct intel_engine_cs *engine; + + GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); + + BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); + BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + + if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) + return -EINVAL; + + if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) + return -EINVAL; + + if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance])) + return -EINVAL; + + GEM_BUG_ON(dev_priv->engine[id]); + engine = kzalloc(sizeof(*engine), GFP_KERNEL); + if (!engine) + return -ENOMEM; + + BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); + + engine->id = id; + engine->mask = BIT(id); + engine->i915 = dev_priv; + engine->uncore = &dev_priv->uncore; + __sprint_engine_name(engine->name, info); + engine->hw_id = engine->guc_id = info->hw_id; + engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases); + engine->class = info->class; + engine->instance = info->instance; + + engine->uabi_class = intel_engine_classes[info->class].uabi_class; + + engine->context_size = __intel_engine_context_size(dev_priv, + engine->class); + if (WARN_ON(engine->context_size > BIT(20))) + engine->context_size = 0; + if (engine->context_size) + DRIVER_CAPS(dev_priv)->has_logical_contexts = true; + + /* Nothing to do here, execute in order of dependencies */ + engine->schedule = NULL; + + seqlock_init(&engine->stats.lock); + + ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); + + /* Scrub mmio state on takeover */ + intel_engine_sanitize_mmio(engine); + + dev_priv->engine_class[info->class][info->instance] = engine; + dev_priv->engine[id] = engine; + return 0; +} + +/** + * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers + * @dev_priv: i915 device private + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init_mmio(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *device_info = mkwrite_device_info(dev_priv); + const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int mask = 0; + unsigned int i; + int err; + + WARN_ON(engine_mask == 0); + WARN_ON(engine_mask & + GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); + + if (i915_inject_load_failure()) + return -ENODEV; + + for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { + if (!HAS_ENGINE(dev_priv, i)) + continue; + + err = intel_engine_setup(dev_priv, i); + if (err) + goto cleanup; + + mask |= BIT(i); + } + + /* + * Catch failures to update intel_engines table when the new engines + * are added to the driver by a warning and disabling the forgotten + * engines. + */ + if (WARN_ON(mask != engine_mask)) + device_info->engine_mask = mask; + + /* We always presume we have at least RCS available for later probing */ + if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) { + err = -ENODEV; + goto cleanup; + } + + RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask); + + i915_check_and_clear_faults(dev_priv); + + return 0; + +cleanup: + for_each_engine(engine, dev_priv, id) + kfree(engine); + return err; +} + +/** + * intel_engines_init() - init the Engine Command Streamers + * @dev_priv: i915 device private + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id, err_id; + int err; + + for_each_engine(engine, dev_priv, id) { + const struct engine_class_info *class_info = + &intel_engine_classes[engine->class]; + int (*init)(struct intel_engine_cs *engine); + + if (HAS_EXECLISTS(dev_priv)) + init = class_info->init_execlists; + else + init = class_info->init_legacy; + + err = -EINVAL; + err_id = id; + + if (GEM_DEBUG_WARN_ON(!init)) + goto cleanup; + + err = init(engine); + if (err) + goto cleanup; + + GEM_BUG_ON(!engine->submit_request); + } + + return 0; + +cleanup: + for_each_engine(engine, dev_priv, id) { + if (id >= err_id) { + kfree(engine); + dev_priv->engine[id] = NULL; + } else { + dev_priv->gt.cleanup_engine(engine); + } + } + return err; +} + +static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) +{ + i915_gem_batch_pool_init(&engine->batch_pool, engine); +} + +static void intel_engine_init_execlist(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + execlists->port_mask = 1; + GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); + GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); + + execlists->queue_priority_hint = INT_MIN; + execlists->queue = RB_ROOT_CACHED; +} + +static void cleanup_status_page(struct intel_engine_cs *engine) +{ + struct i915_vma *vma; + + /* Prevent writes into HWSP after returning the page to the system */ + intel_engine_set_hwsp_writemask(engine, ~0u); + + vma = fetch_and_zero(&engine->status_page.vma); + if (!vma) + return; + + if (!HWS_NEEDS_PHYSICAL(engine->i915)) + i915_vma_unpin(vma); + + i915_gem_object_unpin_map(vma->obj); + __i915_gem_object_release_unless_active(vma->obj); +} + +static int pin_ggtt_status_page(struct intel_engine_cs *engine, + struct i915_vma *vma) +{ + unsigned int flags; + + flags = PIN_GLOBAL; + if (!HAS_LLC(engine->i915)) + /* + * On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actually map it). + */ + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; + + return i915_vma_pin(vma, 0, 0, flags); +} + +static int init_status_page(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + void *vaddr; + int ret; + + /* + * Though the HWS register does support 36bit addresses, historically + * we have had hangs and corruption reported due to wild writes if + * the HWS is placed above 4G. We only allow objects to be allocated + * in GFP_DMA32 for i965, and no earlier physical address users had + * access to more than 4G. + */ + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate status page\n"); + return PTR_ERR(obj); + } + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err; + } + + engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); + engine->status_page.vma = vma; + + if (!HWS_NEEDS_PHYSICAL(engine->i915)) { + ret = pin_ggtt_status_page(engine, vma); + if (ret) + goto err_unpin; + } + + return 0; + +err_unpin: + i915_gem_object_unpin_map(obj); +err: + i915_gem_object_put(obj); + return ret; +} + +/** + * intel_engines_setup_common - setup engine state not requiring hw access + * @engine: Engine to setup. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do not require hardware access. + * + * Typically done early in the submission mode specific engine setup stage. + */ +int intel_engine_setup_common(struct intel_engine_cs *engine) +{ + int err; + + err = init_status_page(engine); + if (err) + return err; + + err = i915_timeline_init(engine->i915, + &engine->timeline, + engine->status_page.vma); + if (err) + goto err_hwsp; + + i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); + + intel_engine_init_breadcrumbs(engine); + intel_engine_init_execlist(engine); + intel_engine_init_hangcheck(engine); + intel_engine_init_batch_pool(engine); + intel_engine_init_cmd_parser(engine); + + /* Use the whole device by default */ + engine->sseu = + intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); + + return 0; + +err_hwsp: + cleanup_status_page(engine); + return err; +} + +void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) +{ + static const struct { + u8 engine; + u8 sched; + } map[] = { +#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) } + MAP(PREEMPTION, PREEMPTION), + MAP(SEMAPHORES, SEMAPHORES), +#undef MAP + }; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 enabled, disabled; + + enabled = 0; + disabled = 0; + for_each_engine(engine, i915, id) { /* all engines must agree! */ + int i; + + if (engine->schedule) + enabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + else + disabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + + for (i = 0; i < ARRAY_SIZE(map); i++) { + if (engine->flags & BIT(map[i].engine)) + enabled |= BIT(map[i].sched); + else + disabled |= BIT(map[i].sched); + } + } + + i915->caps.scheduler = enabled & ~disabled; + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED)) + i915->caps.scheduler = 0; +} + +struct measure_breadcrumb { + struct i915_request rq; + struct i915_timeline timeline; + struct intel_ring ring; + u32 cs[1024]; +}; + +static int measure_breadcrumb_dw(struct intel_engine_cs *engine) +{ + struct measure_breadcrumb *frame; + int dw = -ENOMEM; + + GEM_BUG_ON(!engine->i915->gt.scratch); + + frame = kzalloc(sizeof(*frame), GFP_KERNEL); + if (!frame) + return -ENOMEM; + + if (i915_timeline_init(engine->i915, + &frame->timeline, + engine->status_page.vma)) + goto out_frame; + + INIT_LIST_HEAD(&frame->ring.request_list); + frame->ring.timeline = &frame->timeline; + frame->ring.vaddr = frame->cs; + frame->ring.size = sizeof(frame->cs); + frame->ring.effective_size = frame->ring.size; + intel_ring_update_space(&frame->ring); + + frame->rq.i915 = engine->i915; + frame->rq.engine = engine; + frame->rq.ring = &frame->ring; + frame->rq.timeline = &frame->timeline; + + dw = i915_timeline_pin(&frame->timeline); + if (dw < 0) + goto out_timeline; + + dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + + i915_timeline_unpin(&frame->timeline); + +out_timeline: + i915_timeline_fini(&frame->timeline); +out_frame: + kfree(frame); + return dw; +} + +static int pin_context(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_context **out) +{ + struct intel_context *ce; + + ce = intel_context_pin(ctx, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + *out = ce; + return 0; +} + +/** + * intel_engines_init_common - initialize cengine state which might require hw access + * @engine: Engine to initialize. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do require hardware access. + * + * Typcally done at later stages of submission mode specific engine setup. + * + * Returns zero on success or an error code on failure. + */ +int intel_engine_init_common(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + int ret; + + /* We may need to do things with the shrinker which + * require us to immediately switch back to the default + * context. This can cause a problem as pinning the + * default context also requires GTT space which may not + * be available. To avoid this we always pin the default + * context. + */ + ret = pin_context(i915->kernel_context, engine, + &engine->kernel_context); + if (ret) + return ret; + + /* + * Similarly the preempt context must always be available so that + * we can interrupt the engine at any time. However, as preemption + * is optional, we allow it to fail. + */ + if (i915->preempt_context) + pin_context(i915->preempt_context, engine, + &engine->preempt_context); + + ret = measure_breadcrumb_dw(engine); + if (ret < 0) + goto err_unpin; + + engine->emit_fini_breadcrumb_dw = ret; + + engine->set_default_submission(engine); + + return 0; + +err_unpin: + if (engine->preempt_context) + intel_context_unpin(engine->preempt_context); + intel_context_unpin(engine->kernel_context); + return ret; +} + +void intel_gt_resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * After resume, we may need to poke into the pinned kernel + * contexts to paper over any damage caused by the sudden suspend. + * Only the kernel contexts should remain pinned over suspend, + * allowing us to fixup the user contexts on their first pin. + */ + for_each_engine(engine, i915, id) { + struct intel_context *ce; + + ce = engine->kernel_context; + if (ce) + ce->ops->reset(ce); + + ce = engine->preempt_context; + if (ce) + ce->ops->reset(ce); + } +} + +/** + * intel_engines_cleanup_common - cleans up the engine state created by + * the common initiailizers. + * @engine: Engine to cleanup. + * + * This cleans up everything created by the common helpers. + */ +void intel_engine_cleanup_common(struct intel_engine_cs *engine) +{ + cleanup_status_page(engine); + + intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_cmd_parser(engine); + i915_gem_batch_pool_fini(&engine->batch_pool); + + if (engine->default_state) + i915_gem_object_put(engine->default_state); + + if (engine->preempt_context) + intel_context_unpin(engine->preempt_context); + intel_context_unpin(engine->kernel_context); + + i915_timeline_fini(&engine->timeline); + + intel_wa_list_free(&engine->ctx_wa_list); + intel_wa_list_free(&engine->wa_list); + intel_wa_list_free(&engine->whitelist); +} + +u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + u64 acthd; + + if (INTEL_GEN(i915) >= 8) + acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW); + else if (INTEL_GEN(i915) >= 4) + acthd = ENGINE_READ(engine, RING_ACTHD); + else + acthd = ENGINE_READ(engine, ACTHD); + + return acthd; +} + +u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) +{ + u64 bbaddr; + + if (INTEL_GEN(engine->i915) >= 8) + bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW); + else + bbaddr = ENGINE_READ(engine, RING_BBADDR); + + return bbaddr; +} + +int intel_engine_stop_cs(struct intel_engine_cs *engine) +{ + struct intel_uncore *uncore = engine->uncore; + const u32 base = engine->mmio_base; + const i915_reg_t mode = RING_MI_MODE(base); + int err; + + if (INTEL_GEN(engine->i915) < 3) + return -ENODEV; + + GEM_TRACE("%s\n", engine->name); + + intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); + + err = 0; + if (__intel_wait_for_register_fw(uncore, + mode, MODE_IDLE, MODE_IDLE, + 1000, 0, + NULL)) { + GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); + err = -ETIMEDOUT; + } + + /* A final mmio read to let GPU writes be hopefully flushed to memory */ + intel_uncore_posting_read_fw(uncore, mode); + + return err; +} + +void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) +{ + GEM_TRACE("%s\n", engine->name); + + ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); +} + +const char *i915_cache_level_str(struct drm_i915_private *i915, int type) +{ + switch (type) { + case I915_CACHE_NONE: return " uncached"; + case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped"; + case I915_CACHE_L3_LLC: return " L3+LLC"; + case I915_CACHE_WT: return " WT"; + default: return ""; + } +} + +u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + u32 mcr_s_ss_select; + u32 slice = fls(sseu->slice_mask); + u32 subslice = fls(sseu->subslice_mask[slice]); + + if (IS_GEN(dev_priv, 10)) + mcr_s_ss_select = GEN8_MCR_SLICE(slice) | + GEN8_MCR_SUBSLICE(subslice); + else if (INTEL_GEN(dev_priv) >= 11) + mcr_s_ss_select = GEN11_MCR_SLICE(slice) | + GEN11_MCR_SUBSLICE(subslice); + else + mcr_s_ss_select = 0; + + return mcr_s_ss_select; +} + +static inline u32 +read_subslice_reg(struct drm_i915_private *dev_priv, int slice, + int subslice, i915_reg_t reg) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + u32 mcr_slice_subslice_mask; + u32 mcr_slice_subslice_select; + u32 default_mcr_s_ss_select; + u32 mcr; + u32 ret; + enum forcewake_domains fw_domains; + + if (INTEL_GEN(dev_priv) >= 11) { + mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | + GEN11_MCR_SUBSLICE_MASK; + mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) | + GEN11_MCR_SUBSLICE(subslice); + } else { + mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | + GEN8_MCR_SUBSLICE_MASK; + mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) | + GEN8_MCR_SUBSLICE(subslice); + } + + default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv); + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, + FW_REG_READ); + fw_domains |= intel_uncore_forcewake_for_reg(uncore, + GEN8_MCR_SELECTOR, + FW_REG_READ | FW_REG_WRITE); + + spin_lock_irq(&uncore->lock); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + + WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) != + default_mcr_s_ss_select); + + mcr &= ~mcr_slice_subslice_mask; + mcr |= mcr_slice_subslice_select; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + ret = intel_uncore_read_fw(uncore, reg); + + mcr &= ~mcr_slice_subslice_mask; + mcr |= default_mcr_s_ss_select; + + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irq(&uncore->lock); + + return ret; +} + +/* NB: please notice the memset */ +void intel_engine_get_instdone(struct intel_engine_cs *engine, + struct intel_instdone *instdone) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct intel_uncore *uncore = engine->uncore; + u32 mmio_base = engine->mmio_base; + int slice; + int subslice; + + memset(instdone, 0, sizeof(*instdone)); + + switch (INTEL_GEN(dev_priv)) { + default: + instdone->instdone = + intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); + + if (engine->id != RCS0) + break; + + instdone->slice_common = + intel_uncore_read(uncore, GEN7_SC_INSTDONE); + for_each_instdone_slice_subslice(dev_priv, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(dev_priv, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(dev_priv, slice, subslice, + GEN7_ROW_INSTDONE); + } + break; + case 7: + instdone->instdone = + intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); + + if (engine->id != RCS0) + break; + + instdone->slice_common = + intel_uncore_read(uncore, GEN7_SC_INSTDONE); + instdone->sampler[0][0] = + intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); + instdone->row[0][0] = + intel_uncore_read(uncore, GEN7_ROW_INSTDONE); + + break; + case 6: + case 5: + case 4: + instdone->instdone = + intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); + if (engine->id == RCS0) + /* HACK: Using the wrong struct member */ + instdone->slice_common = + intel_uncore_read(uncore, GEN4_INSTDONE1); + break; + case 3: + case 2: + instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); + break; + } +} + +static bool ring_is_idle(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + intel_wakeref_t wakeref; + bool idle = true; + + if (I915_SELFTEST_ONLY(!engine->mmio_base)) + return true; + + /* If the whole device is asleep, the engine must be idle */ + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) + return true; + + /* First check that no commands are left in the ring */ + if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) != + (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR)) + idle = false; + + /* No bit for gen2, so assume the CS parser is idle */ + if (INTEL_GEN(dev_priv) > 2 && + !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE)) + idle = false; + + intel_runtime_pm_put(dev_priv, wakeref); + + return idle; +} + +/** + * intel_engine_is_idle() - Report if the engine has finished process all work + * @engine: the intel_engine_cs + * + * Return true if there are no requests pending, nothing left to be submitted + * to hardware, and that the engine is idle. + */ +bool intel_engine_is_idle(struct intel_engine_cs *engine) +{ + /* More white lies, if wedged, hw state is inconsistent */ + if (i915_reset_failed(engine->i915)) + return true; + + /* Waiting to drain ELSP? */ + if (READ_ONCE(engine->execlists.active)) { + struct tasklet_struct *t = &engine->execlists.tasklet; + + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); + + /* Otherwise flush the tasklet if it was on another cpu */ + tasklet_unlock_wait(t); + + if (READ_ONCE(engine->execlists.active)) + return false; + } + + /* ELSP is empty, but there are ready requests? E.g. after reset */ + if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) + return false; + + /* Ring stopped? */ + return ring_is_idle(engine); +} + +bool intel_engines_are_idle(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * If the driver is wedged, HW state may be very inconsistent and + * report that it is still busy, even though we have stopped using it. + */ + if (i915_reset_failed(i915)) + return true; + + /* Already parked (and passed an idleness test); must still be idle */ + if (!READ_ONCE(i915->gt.awake)) + return true; + + for_each_engine(engine, i915, id) { + if (!intel_engine_is_idle(engine)) + return false; + } + + return true; +} + +void intel_engines_reset_default_submission(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + engine->set_default_submission(engine); +} + +static bool reset_engines(struct drm_i915_private *i915) +{ + if (INTEL_INFO(i915)->gpu_reset_clobbers_display) + return false; + + return intel_gpu_reset(i915, ALL_ENGINES) == 0; +} + +/** + * intel_engines_sanitize: called after the GPU has lost power + * @i915: the i915 device + * @force: ignore a failed reset and sanitize engine state anyway + * + * Anytime we reset the GPU, either with an explicit GPU reset or through a + * PCI power cycle, the GPU loses state and we must reset our state tracking + * to match. Note that calling intel_engines_sanitize() if the GPU has not + * been reset results in much confusion! + */ +void intel_engines_sanitize(struct drm_i915_private *i915, bool force) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + GEM_TRACE("\n"); + + if (!reset_engines(i915) && !force) + return; + + for_each_engine(engine, i915, id) + intel_engine_reset(engine, false); +} + +/** + * intel_engines_park: called when the GT is transitioning from busy->idle + * @i915: the i915 device + * + * The GT is now idle and about to go to sleep (maybe never to wake again?). + * Time for us to tidy and put away our toys (release resources back to the + * system). + */ +void intel_engines_park(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + /* Flush the residual irq tasklets first. */ + intel_engine_disarm_breadcrumbs(engine); + tasklet_kill(&engine->execlists.tasklet); + + /* + * We are committed now to parking the engines, make sure there + * will be no more interrupts arriving later and the engines + * are truly idle. + */ + if (wait_for(intel_engine_is_idle(engine), 10)) { + struct drm_printer p = drm_debug_printer(__func__); + + dev_err(i915->drm.dev, + "%s is not idle before parking\n", + engine->name); + intel_engine_dump(engine, &p, NULL); + } + + /* Must be reset upon idling, or we may miss the busy wakeup. */ + GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); + + if (engine->park) + engine->park(engine); + + if (engine->pinned_default_state) { + i915_gem_object_unpin_map(engine->default_state); + engine->pinned_default_state = NULL; + } + + i915_gem_batch_pool_fini(&engine->batch_pool); + engine->execlists.no_priolist = false; + } + + i915->gt.active_engines = 0; +} + +/** + * intel_engines_unpark: called when the GT is transitioning from idle->busy + * @i915: the i915 device + * + * The GT was idle and now about to fire up with some new user requests. + */ +void intel_engines_unpark(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + void *map; + + /* Pin the default state for fast resets from atomic context. */ + map = NULL; + if (engine->default_state) + map = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR_OR_NULL(map)) + engine->pinned_default_state = map; + + if (engine->unpark) + engine->unpark(engine); + + intel_engine_init_hangcheck(engine); + } +} + +/** + * intel_engine_lost_context: called when the GPU is reset into unknown state + * @engine: the engine + * + * We have either reset the GPU or otherwise about to lose state tracking of + * the current GPU logical state (e.g. suspend). On next use, it is therefore + * imperative that we make no presumptions about the current state and load + * from scratch. + */ +void intel_engine_lost_context(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + lockdep_assert_held(&engine->i915->drm.struct_mutex); + + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); +} + +bool intel_engine_can_store_dword(struct intel_engine_cs *engine) +{ + switch (INTEL_GEN(engine->i915)) { + case 2: + return false; /* uses physical not virtual addresses */ + case 3: + /* maybe only uses physical not virtual addresses */ + return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); + case 6: + return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ + default: + return true; + } +} + +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int which; + + which = 0; + for_each_engine(engine, i915, id) + if (engine->default_state) + which |= BIT(engine->uabi_class); + + return which; +} + +static int print_sched_attr(struct drm_i915_private *i915, + const struct i915_sched_attr *attr, + char *buf, int x, int len) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return x; + + x += snprintf(buf + x, len - x, + " prio=%d", attr->priority); + + return x; +} + +static void print_request(struct drm_printer *m, + struct i915_request *rq, + const char *prefix) +{ + const char *name = rq->fence.ops->get_timeline_name(&rq->fence); + char buf[80] = ""; + int x = 0; + + x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); + + drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", + prefix, + rq->fence.context, rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags) ? "+" : "", + buf, + jiffies_to_msecs(jiffies - rq->emitted_jiffies), + name); +} + +static void hexdump(struct drm_printer *m, const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + drm_printf(m, "*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + drm_printf(m, "[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + +static void intel_engine_print_registers(const struct intel_engine_cs *engine, + struct drm_printer *m) +{ + struct drm_i915_private *dev_priv = engine->i915; + const struct intel_engine_execlists * const execlists = + &engine->execlists; + u64 addr; + + if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7)) + drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); + drm_printf(m, "\tRING_START: 0x%08x\n", + ENGINE_READ(engine, RING_START)); + drm_printf(m, "\tRING_HEAD: 0x%08x\n", + ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR); + drm_printf(m, "\tRING_TAIL: 0x%08x\n", + ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR); + drm_printf(m, "\tRING_CTL: 0x%08x%s\n", + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : ""); + if (INTEL_GEN(engine->i915) > 2) { + drm_printf(m, "\tRING_MODE: 0x%08x%s\n", + ENGINE_READ(engine, RING_MI_MODE), + ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : ""); + } + + if (INTEL_GEN(dev_priv) >= 6) { + drm_printf(m, "\tRING_IMR: %08x\n", + ENGINE_READ(engine, RING_IMR)); + } + + addr = intel_engine_get_active_head(engine); + drm_printf(m, "\tACTHD: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + addr = intel_engine_get_last_batch_head(engine); + drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + if (INTEL_GEN(dev_priv) >= 8) + addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW); + else if (INTEL_GEN(dev_priv) >= 4) + addr = ENGINE_READ(engine, RING_DMA_FADD); + else + addr = ENGINE_READ(engine, DMA_FADD_I8XX); + drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + if (INTEL_GEN(dev_priv) >= 4) { + drm_printf(m, "\tIPEIR: 0x%08x\n", + ENGINE_READ(engine, RING_IPEIR)); + drm_printf(m, "\tIPEHR: 0x%08x\n", + ENGINE_READ(engine, RING_IPEHR)); + } else { + drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR)); + drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR)); + } + + if (HAS_EXECLISTS(dev_priv)) { + const u32 *hws = + &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; + const u8 num_entries = execlists->csb_size; + unsigned int idx; + u8 read, write; + + drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), + ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), + num_entries); + + read = execlists->csb_head; + write = READ_ONCE(*execlists->csb_write); + + drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n", + read, write, + yesno(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state)), + enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); + if (read >= num_entries) + read = 0; + if (write >= num_entries) + write = 0; + if (read > write) + write += num_entries; + while (read < write) { + idx = ++read % num_entries; + drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n", + idx, hws[idx * 2], hws[idx * 2 + 1]); + } + + rcu_read_lock(); + for (idx = 0; idx < execlists_num_ports(execlists); idx++) { + struct i915_request *rq; + unsigned int count; + + rq = port_unpack(&execlists->port[idx], &count); + if (rq) { + char hdr[80]; + + snprintf(hdr, sizeof(hdr), + "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", + idx, count, + i915_ggtt_offset(rq->ring->vma), + rq->timeline->hwsp_offset, + hwsp_seqno(rq)); + print_request(m, rq, hdr); + } else { + drm_printf(m, "\t\tELSP[%d] idle\n", idx); + } + } + drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); + rcu_read_unlock(); + } else if (INTEL_GEN(dev_priv) > 6) { + drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", + ENGINE_READ(engine, RING_PP_DIR_BASE)); + drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", + ENGINE_READ(engine, RING_PP_DIR_BASE_READ)); + drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", + ENGINE_READ(engine, RING_PP_DIR_DCLV)); + } +} + +static void print_request_ring(struct drm_printer *m, struct i915_request *rq) +{ + void *ring; + int size; + + drm_printf(m, + "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", + rq->head, rq->postfix, rq->tail, + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + + size = rq->tail - rq->head; + if (rq->tail < rq->head) + size += rq->ring->size; + + ring = kmalloc(size, GFP_ATOMIC); + if (ring) { + const void *vaddr = rq->ring->vaddr; + unsigned int head = rq->head; + unsigned int len = 0; + + if (rq->tail < head) { + len = rq->ring->size - head; + memcpy(ring, vaddr + head, len); + head = 0; + } + memcpy(ring + len, vaddr + head, size - len); + + hexdump(m, ring, size); + kfree(ring); + } +} + +void intel_engine_dump(struct intel_engine_cs *engine, + struct drm_printer *m, + const char *header, ...) +{ + struct i915_gpu_error * const error = &engine->i915->gpu_error; + struct i915_request *rq; + intel_wakeref_t wakeref; + + if (header) { + va_list ap; + + va_start(ap, header); + drm_vprintf(m, header, &ap); + va_end(ap); + } + + if (i915_reset_failed(engine->i915)) + drm_printf(m, "*** WEDGED ***\n"); + + drm_printf(m, "\tHangcheck %x:%x [%d ms]\n", + engine->hangcheck.last_seqno, + engine->hangcheck.next_seqno, + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + drm_printf(m, "\tReset count: %d (global %d)\n", + i915_reset_engine_count(error, engine), + i915_reset_count(error)); + + rcu_read_lock(); + + drm_printf(m, "\tRequests:\n"); + + rq = list_first_entry(&engine->timeline.requests, + struct i915_request, link); + if (&rq->link != &engine->timeline.requests) + print_request(m, rq, "\t\tfirst "); + + rq = list_last_entry(&engine->timeline.requests, + struct i915_request, link); + if (&rq->link != &engine->timeline.requests) + print_request(m, rq, "\t\tlast "); + + rq = intel_engine_find_active_request(engine); + if (rq) { + print_request(m, rq, "\t\tactive "); + + drm_printf(m, "\t\tring->start: 0x%08x\n", + i915_ggtt_offset(rq->ring->vma)); + drm_printf(m, "\t\tring->head: 0x%08x\n", + rq->ring->head); + drm_printf(m, "\t\tring->tail: 0x%08x\n", + rq->ring->tail); + drm_printf(m, "\t\tring->emit: 0x%08x\n", + rq->ring->emit); + drm_printf(m, "\t\tring->space: 0x%08x\n", + rq->ring->space); + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + rq->timeline->hwsp_offset); + + print_request_ring(m, rq); + } + + rcu_read_unlock(); + + wakeref = intel_runtime_pm_get_if_in_use(engine->i915); + if (wakeref) { + intel_engine_print_registers(engine, m); + intel_runtime_pm_put(engine->i915, wakeref); + } else { + drm_printf(m, "\tDevice is asleep; skipping register dump\n"); + } + + intel_execlists_show_requests(engine, m, print_request, 8); + + drm_printf(m, "HWSP:\n"); + hexdump(m, engine->status_page.addr, PAGE_SIZE); + + drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); + + intel_engine_print_breadcrumbs(engine, m); +} + +static u8 user_class_map[] = { + [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS, + [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS, + [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS, + [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS, +}; + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) +{ + if (class >= ARRAY_SIZE(user_class_map)) + return NULL; + + class = user_class_map[class]; + + GEM_BUG_ON(class > MAX_ENGINE_CLASS); + + if (instance > MAX_ENGINE_INSTANCE) + return NULL; + + return i915->engine_class[class][instance]; +} + +/** + * intel_enable_engine_stats() - Enable engine busy tracking on engine + * @engine: engine to enable stats collection + * + * Start collecting the engine busyness data for @engine. + * + * Returns 0 on success or a negative error code. + */ +int intel_enable_engine_stats(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + unsigned long flags; + int err = 0; + + if (!intel_engine_supports_stats(engine)) + return -ENODEV; + + spin_lock_irqsave(&engine->timeline.lock, flags); + write_seqlock(&engine->stats.lock); + + if (unlikely(engine->stats.enabled == ~0)) { + err = -EBUSY; + goto unlock; + } + + if (engine->stats.enabled++ == 0) { + const struct execlist_port *port = execlists->port; + unsigned int num_ports = execlists_num_ports(execlists); + + engine->stats.enabled_at = ktime_get(); + + /* XXX submission method oblivious? */ + while (num_ports-- && port_isset(port)) { + engine->stats.active++; + port++; + } + + if (engine->stats.active) + engine->stats.start = engine->stats.enabled_at; + } + +unlock: + write_sequnlock(&engine->stats.lock); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + return err; +} + +static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) +{ + ktime_t total = engine->stats.total; + + /* + * If the engine is executing something at the moment + * add it to the total. + */ + if (engine->stats.active) + total = ktime_add(total, + ktime_sub(ktime_get(), engine->stats.start)); + + return total; +} + +/** + * intel_engine_get_busy_time() - Return current accumulated engine busyness + * @engine: engine to report on + * + * Returns accumulated time @engine was busy since engine stats were enabled. + */ +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) +{ + unsigned int seq; + ktime_t total; + + do { + seq = read_seqbegin(&engine->stats.lock); + total = __intel_engine_get_busy_time(engine); + } while (read_seqretry(&engine->stats.lock, seq)); + + return total; +} + +/** + * intel_disable_engine_stats() - Disable engine busy tracking on engine + * @engine: engine to disable stats collection + * + * Stops collecting the engine busyness data for @engine. + */ +void intel_disable_engine_stats(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (!intel_engine_supports_stats(engine)) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + WARN_ON_ONCE(engine->stats.enabled == 0); + if (--engine->stats.enabled == 0) { + engine->stats.total = __intel_engine_get_busy_time(engine); + engine->stats.active = 0; + } + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static bool match_ring(struct i915_request *rq) +{ + u32 ring = ENGINE_READ(rq->engine, RING_START); + + return ring == i915_ggtt_offset(rq->ring->vma); +} + +struct i915_request * +intel_engine_find_active_request(struct intel_engine_cs *engine) +{ + struct i915_request *request, *active = NULL; + unsigned long flags; + + /* + * We are called by the error capture, reset and to dump engine + * state at random points in time. In particular, note that neither is + * crucially ordered with an interrupt. After a hang, the GPU is dead + * and we assume that no more writes can happen (we waited long enough + * for all writes that were in transaction to be flushed) - adding an + * extra delay for a recent interrupt is pointless. Hence, we do + * not need an engine->irq_seqno_barrier() before the seqno reads. + * At all other times, we must assume the GPU is still running, but + * we only care about the snapshot of this moment. + */ + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { + if (i915_request_completed(request)) + continue; + + if (!i915_request_started(request)) + break; + + /* More than one preemptible request may match! */ + if (!match_ring(request)) + break; + + active = request; + break; + } + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + return active; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_engine_cs.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h new file mode 100644 index 000000000000..3adf58da6d2c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -0,0 +1,548 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_ENGINE_TYPES__ +#define __INTEL_ENGINE_TYPES__ + +#include +#include +#include +#include +#include + +#include "i915_gem.h" +#include "i915_gem_batch_pool.h" +#include "i915_pmu.h" +#include "i915_priolist_types.h" +#include "i915_selftest.h" +#include "i915_timeline_types.h" +#include "intel_sseu.h" +#include "intel_workarounds_types.h" + +#define I915_MAX_SLICES 3 +#define I915_MAX_SUBSLICES 8 + +#define I915_CMD_HASH_ORDER 9 + +struct dma_fence; +struct drm_i915_reg_table; +struct i915_gem_context; +struct i915_request; +struct i915_sched_attr; +struct intel_uncore; + +typedef u8 intel_engine_mask_t; +#define ALL_ENGINES ((intel_engine_mask_t)~0ul) + +struct intel_hw_status_page { + struct i915_vma *vma; + u32 *addr; +}; + +struct intel_instdone { + u32 instdone; + /* The following exist only in the RCS engine */ + u32 slice_common; + u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; + u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; +}; + +struct intel_engine_hangcheck { + u64 acthd; + u32 last_seqno; + u32 next_seqno; + unsigned long action_timestamp; + struct intel_instdone instdone; +}; + +struct intel_ring { + struct kref ref; + struct i915_vma *vma; + void *vaddr; + + struct i915_timeline *timeline; + struct list_head request_list; + struct list_head active_link; + + u32 head; + u32 tail; + u32 emit; + + u32 space; + u32 size; + u32 effective_size; +}; + +/* + * we use a single page to load ctx workarounds so all of these + * values are referred in terms of dwords + * + * struct i915_wa_ctx_bb: + * offset: specifies batch starting position, also helpful in case + * if we want to have multiple batches at different offsets based on + * some criteria. It is not a requirement at the moment but provides + * an option for future use. + * size: size of the batch in DWORDS + */ +struct i915_ctx_workarounds { + struct i915_wa_ctx_bb { + u32 offset; + u32 size; + } indirect_ctx, per_ctx; + struct i915_vma *vma; +}; + +#define I915_MAX_VCS 4 +#define I915_MAX_VECS 2 + +/* + * Engine IDs definitions. + * Keep instances of the same type engine together. + */ +enum intel_engine_id { + RCS0 = 0, + BCS0, + VCS0, + VCS1, + VCS2, + VCS3, +#define _VCS(n) (VCS0 + (n)) + VECS0, + VECS1, +#define _VECS(n) (VECS0 + (n)) + I915_NUM_ENGINES +}; + +struct st_preempt_hang { + struct completion completion; + unsigned int count; + bool inject_hang; +}; + +/** + * struct intel_engine_execlists - execlist submission queue and port state + * + * The struct intel_engine_execlists represents the combined logical state of + * driver and the hardware state for execlist mode of submission. + */ +struct intel_engine_execlists { + /** + * @tasklet: softirq tasklet for bottom handler + */ + struct tasklet_struct tasklet; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; + + /** + * @submit_reg: gen-specific execlist submission register + * set to the ExecList Submission Port (elsp) register pre-Gen11 and to + * the ExecList Submission Queue Contents register array for Gen11+ + */ + u32 __iomem *submit_reg; + + /** + * @ctrl_reg: the enhanced execlists control register, used to load the + * submit queue on the HW and to request preemptions to idle + */ + u32 __iomem *ctrl_reg; + + /** + * @port: execlist port states + * + * For each hardware ELSP (ExecList Submission Port) we keep + * track of the last request and the number of times we submitted + * that port to hw. We then count the number of times the hw reports + * a context completion or preemption. As only one context can + * be active on hw, we limit resubmission of context to port[0]. This + * is called Lite Restore, of the context. + */ + struct execlist_port { + /** + * @request_count: combined request and submission count + */ + struct i915_request *request_count; +#define EXECLIST_COUNT_BITS 2 +#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) +#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) +#define port_set(p, packed) ((p)->request_count = (packed)) +#define port_isset(p) ((p)->request_count) +#define port_index(p, execlists) ((p) - (execlists)->port) + + /** + * @context_id: context ID for port + */ + GEM_DEBUG_DECL(u32 context_id); + +#define EXECLIST_MAX_PORTS 2 + } port[EXECLIST_MAX_PORTS]; + + /** + * @active: is the HW active? We consider the HW as active after + * submitting any context for execution and until we have seen the + * last context completion event. After that, we do not expect any + * more events until we submit, and so can park the HW. + * + * As we have a small number of different sources from which we feed + * the HW, we track the state of each inside a single bitfield. + */ + unsigned int active; +#define EXECLISTS_ACTIVE_USER 0 +#define EXECLISTS_ACTIVE_PREEMPT 1 +#define EXECLISTS_ACTIVE_HWACK 2 + + /** + * @port_mask: number of execlist ports - 1 + */ + unsigned int port_mask; + + /** + * @queue_priority_hint: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. + */ + int queue_priority_hint; + + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root_cached queue; + + /** + * @csb_write: control register for Context Switch buffer + * + * Note this register may be either mmio or HWSP shadow. + */ + u32 *csb_write; + + /** + * @csb_status: status array for Context Switch buffer + * + * Note these register may be either mmio or HWSP shadow. + */ + u32 *csb_status; + + /** + * @preempt_complete_status: expected CSB upon completing preemption + */ + u32 preempt_complete_status; + + /** + * @csb_size: context status buffer FIFO size + */ + u8 csb_size; + + /** + * @csb_head: context status buffer head + */ + u8 csb_head; + + I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) +}; + +#define INTEL_ENGINE_CS_MAX_NAME 8 + +struct intel_engine_cs { + struct drm_i915_private *i915; + struct intel_uncore *uncore; + char name[INTEL_ENGINE_CS_MAX_NAME]; + + enum intel_engine_id id; + unsigned int hw_id; + unsigned int guc_id; + intel_engine_mask_t mask; + + u8 uabi_class; + + u8 class; + u8 instance; + u32 context_size; + u32 mmio_base; + + struct intel_sseu sseu; + + struct intel_ring *buffer; + + struct i915_timeline timeline; + + struct intel_context *kernel_context; /* pinned */ + struct intel_context *preempt_context; /* pinned; optional */ + + struct drm_i915_gem_object *default_state; + void *pinned_default_state; + + /* Rather than have every client wait upon all user interrupts, + * with the herd waking after every interrupt and each doing the + * heavyweight seqno dance, we delegate the task (of being the + * bottom-half of the user interrupt) to the first client. After + * every interrupt, we wake up one client, who does the heavyweight + * coherent seqno read and either goes back to sleep (if incomplete), + * or wakes up all the completed clients in parallel, before then + * transferring the bottom-half status to the next client in the queue. + * + * Compared to walking the entire list of waiters in a single dedicated + * bottom-half, we reduce the latency of the first waiter by avoiding + * a context switch, but incur additional coherent seqno reads when + * following the chain of request breadcrumbs. Since it is most likely + * that we have a single client waiting on each seqno, then reducing + * the overhead of waking that client is much preferred. + */ + struct intel_breadcrumbs { + spinlock_t irq_lock; + struct list_head signalers; + + struct irq_work irq_work; /* for use from inside irq_lock */ + + unsigned int irq_enabled; + + bool irq_armed; + } breadcrumbs; + + struct intel_engine_pmu { + /** + * @enable: Bitmask of enable sample events on this engine. + * + * Bits correspond to sample event types, for instance + * I915_SAMPLE_QUEUED is bit 0 etc. + */ + u32 enable; + /** + * @enable_count: Reference count for the enabled samplers. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. + */ + unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; + /** + * @sample: Counter values for sampling events. + * + * Our internal timer stores the current counters in this field. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. + */ + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; + } pmu; + + /* + * A pool of objects to use as shadow copies of client batch buffers + * when the command parser is enabled. Prevents the client from + * modifying the batch contents after software parsing. + */ + struct i915_gem_batch_pool batch_pool; + + struct intel_hw_status_page status_page; + struct i915_ctx_workarounds wa_ctx; + struct i915_wa_list ctx_wa_list; + struct i915_wa_list wa_list; + struct i915_wa_list whitelist; + + u32 irq_keep_mask; /* always keep these interrupts */ + u32 irq_enable_mask; /* bitmask to enable ring interrupt */ + void (*irq_enable)(struct intel_engine_cs *engine); + void (*irq_disable)(struct intel_engine_cs *engine); + + int (*init_hw)(struct intel_engine_cs *engine); + + struct { + void (*prepare)(struct intel_engine_cs *engine); + void (*reset)(struct intel_engine_cs *engine, bool stalled); + void (*finish)(struct intel_engine_cs *engine); + } reset; + + void (*park)(struct intel_engine_cs *engine); + void (*unpark)(struct intel_engine_cs *engine); + + void (*set_default_submission)(struct intel_engine_cs *engine); + + const struct intel_context_ops *cops; + + int (*request_alloc)(struct i915_request *rq); + int (*init_context)(struct i915_request *rq); + + int (*emit_flush)(struct i915_request *request, u32 mode); +#define EMIT_INVALIDATE BIT(0) +#define EMIT_FLUSH BIT(1) +#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) + int (*emit_bb_start)(struct i915_request *rq, + u64 offset, u32 length, + unsigned int dispatch_flags); +#define I915_DISPATCH_SECURE BIT(0) +#define I915_DISPATCH_PINNED BIT(1) + int (*emit_init_breadcrumb)(struct i915_request *rq); + u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, + u32 *cs); + unsigned int emit_fini_breadcrumb_dw; + + /* Pass the request to the hardware queue (e.g. directly into + * the legacy ringbuffer or to the end of an execlist). + * + * This is called from an atomic context with irqs disabled; must + * be irq safe. + */ + void (*submit_request)(struct i915_request *rq); + + /* + * Call when the priority on a request has changed and it and its + * dependencies may need rescheduling. Note the request itself may + * not be ready to run! + */ + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); + + /* + * Cancel all requests on the hardware, or queued for execution. + * This should only cancel the ready requests that have been + * submitted to the engine (via the engine->submit_request callback). + * This is called when marking the device as wedged. + */ + void (*cancel_requests)(struct intel_engine_cs *engine); + + void (*cleanup)(struct intel_engine_cs *engine); + + struct intel_engine_execlists execlists; + + /* Contexts are pinned whilst they are active on the GPU. The last + * context executed remains active whilst the GPU is idle - the + * switch away and write to the context object only occurs on the + * next execution. Contexts are only unpinned on retirement of the + * following request ensuring that we can always write to the object + * on the context switch even after idling. Across suspend, we switch + * to the kernel context and trash it as the save may not happen + * before the hardware is powered down. + */ + struct intel_context *last_retired_context; + + /* status_notifier: list of callbacks for context-switch changes */ + struct atomic_notifier_head context_status_notifier; + + struct intel_engine_hangcheck hangcheck; + +#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_SUPPORTS_STATS BIT(1) +#define I915_ENGINE_HAS_PREEMPTION BIT(2) +#define I915_ENGINE_HAS_SEMAPHORES BIT(3) +#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) + unsigned int flags; + + /* + * Table of commands the command parser needs to know about + * for this engine. + */ + DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); + + /* + * Table of registers allowed in commands that read/write registers. + */ + const struct drm_i915_reg_table *reg_tables; + int reg_table_count; + + /* + * Returns the bitmask for the length field of the specified command. + * Return 0 for an unrecognized/invalid command. + * + * If the command parser finds an entry for a command in the engine's + * cmd_tables, it gets the command's length based on the table entry. + * If not, it calls this function to determine the per-engine length + * field encoding for the command (i.e. different opcode ranges use + * certain bits to encode the command length in the header). + */ + u32 (*get_cmd_length_mask)(u32 cmd_header); + + struct { + /** + * @lock: Lock protecting the below fields. + */ + seqlock_t lock; + /** + * @enabled: Reference count indicating number of listeners. + */ + unsigned int enabled; + /** + * @active: Number of contexts currently scheduled in. + */ + unsigned int active; + /** + * @enabled_at: Timestamp when busy stats were enabled. + */ + ktime_t enabled_at; + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). + */ + ktime_t total; + } stats; +}; + +static inline bool +intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; +} + +static inline bool +intel_engine_supports_stats(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_SUPPORTS_STATS; +} + +static inline bool +intel_engine_has_preemption(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_PREEMPTION; +} + +static inline bool +intel_engine_has_semaphores(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_SEMAPHORES; +} + +static inline bool +intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; +} + +#define instdone_slice_mask(dev_priv__) \ + (IS_GEN(dev_priv__, 7) ? \ + 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) + +#define instdone_subslice_mask(dev_priv__) \ + (IS_GEN(dev_priv__, 7) ? \ + 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) + +#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ + for ((slice__) = 0, (subslice__) = 0; \ + (slice__) < I915_MAX_SLICES; \ + (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ + (slice__) += ((subslice__) == 0)) \ + for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ + (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) + +#endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h new file mode 100644 index 000000000000..a34ece53a771 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -0,0 +1,278 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright � 2003-2018 Intel Corporation + */ + +#ifndef _INTEL_GPU_COMMANDS_H_ +#define _INTEL_GPU_COMMANDS_H_ + +/* + * Instruction field definitions used by the command parser + */ +#define INSTR_CLIENT_SHIFT 29 +#define INSTR_MI_CLIENT 0x0 +#define INSTR_BC_CLIENT 0x2 +#define INSTR_RC_CLIENT 0x3 +#define INSTR_SUBCLIENT_SHIFT 27 +#define INSTR_SUBCLIENT_MASK 0x18000000 +#define INSTR_MEDIA_SUBCLIENT 0x2 +#define INSTR_26_TO_24_MASK 0x7000000 +#define INSTR_26_TO_24_SHIFT 24 + +/* + * Memory interface instructions used by the kernel + */ +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) +/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */ +#define MI_GLOBAL_GTT (1<<22) + +#define MI_NOOP MI_INSTR(0, 0) +#define MI_USER_INTERRUPT MI_INSTR(0x02, 0) +#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) +#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) +#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1) +#define MI_FLUSH MI_INSTR(0x04, 0) +#define MI_READ_FLUSH (1 << 0) +#define MI_EXE_FLUSH (1 << 1) +#define MI_NO_WRITE_FLUSH (1 << 2) +#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */ +#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */ +#define MI_INVALIDATE_ISP (1 << 5) /* invalidate indirect state pointers */ +#define MI_REPORT_HEAD MI_INSTR(0x07, 0) +#define MI_ARB_ON_OFF MI_INSTR(0x08, 0) +#define MI_ARB_ENABLE (1<<0) +#define MI_ARB_DISABLE (0<<0) +#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) +#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) +#define MI_SUSPEND_FLUSH_EN (1<<0) +#define MI_SET_APPID MI_INSTR(0x0e, 0) +#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0) +#define MI_OVERLAY_CONTINUE (0x0<<21) +#define MI_OVERLAY_ON (0x1<<21) +#define MI_OVERLAY_OFF (0x2<<21) +#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0) +#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) +#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) +#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) +/* IVB has funny definitions for which plane to flip. */ +#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) +/* SKL ones */ +#define MI_DISPLAY_FLIP_SKL_PLANE_1_A (0 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_1_B (1 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_1_C (2 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_2_A (4 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_2_B (5 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_2_C (6 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_3_A (7 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_3_B (8 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_3_C (9 << 8) +#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */ +#define MI_SEMAPHORE_GLOBAL_GTT (1<<22) +#define MI_SEMAPHORE_UPDATE (1<<21) +#define MI_SEMAPHORE_COMPARE (1<<20) +#define MI_SEMAPHORE_REGISTER (1<<18) +#define MI_SEMAPHORE_SYNC_VR (0<<16) /* RCS wait for VCS (RVSYNC) */ +#define MI_SEMAPHORE_SYNC_VER (1<<16) /* RCS wait for VECS (RVESYNC) */ +#define MI_SEMAPHORE_SYNC_BR (2<<16) /* RCS wait for BCS (RBSYNC) */ +#define MI_SEMAPHORE_SYNC_BV (0<<16) /* VCS wait for BCS (VBSYNC) */ +#define MI_SEMAPHORE_SYNC_VEV (1<<16) /* VCS wait for VECS (VVESYNC) */ +#define MI_SEMAPHORE_SYNC_RV (2<<16) /* VCS wait for RCS (VRSYNC) */ +#define MI_SEMAPHORE_SYNC_RB (0<<16) /* BCS wait for RCS (BRSYNC) */ +#define MI_SEMAPHORE_SYNC_VEB (1<<16) /* BCS wait for VECS (BVESYNC) */ +#define MI_SEMAPHORE_SYNC_VB (2<<16) /* BCS wait for VCS (BVSYNC) */ +#define MI_SEMAPHORE_SYNC_BVE (0<<16) /* VECS wait for BCS (VEBSYNC) */ +#define MI_SEMAPHORE_SYNC_VVE (1<<16) /* VECS wait for VCS (VEVSYNC) */ +#define MI_SEMAPHORE_SYNC_RVE (2<<16) /* VECS wait for RCS (VERSYNC) */ +#define MI_SEMAPHORE_SYNC_INVALID (3<<16) +#define MI_SEMAPHORE_SYNC_MASK (3<<16) +#define MI_SET_CONTEXT MI_INSTR(0x18, 0) +#define MI_MM_SPACE_GTT (1<<8) +#define MI_MM_SPACE_PHYSICAL (0<<8) +#define MI_SAVE_EXT_STATE_EN (1<<3) +#define MI_RESTORE_EXT_STATE_EN (1<<2) +#define MI_FORCE_RESTORE (1<<1) +#define MI_RESTORE_INHIBIT (1<<0) +#define HSW_MI_RS_SAVE_STATE_EN (1<<3) +#define HSW_MI_RS_RESTORE_STATE_EN (1<<2) +#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ +#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) +#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ +#define MI_SEMAPHORE_POLL (1 << 15) +#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) +#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) +#define MI_SEMAPHORE_SAD_LT_SDD (2 << 12) +#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) +#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) +#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) +#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) +#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) +#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ +#define MI_USE_GGTT (1 << 22) /* g4x+ */ +#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) +/* + * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: + * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw + * simply ignores the register load under certain conditions. + * - One can actually load arbitrary many arbitrary registers: Simply issue x + * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! + */ +#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +#define MI_LRI_FORCE_POSTED (1<<12) +#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) +#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) +#define MI_SRM_LRM_GLOBAL_GTT (1<<22) +#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ +#define MI_FLUSH_DW_STORE_INDEX (1<<21) +#define MI_INVALIDATE_TLB (1<<18) +#define MI_FLUSH_DW_OP_STOREDW (1<<14) +#define MI_FLUSH_DW_OP_MASK (3<<14) +#define MI_FLUSH_DW_NOTIFY (1<<8) +#define MI_INVALIDATE_BSD (1<<7) +#define MI_FLUSH_DW_USE_GTT (1<<2) +#define MI_FLUSH_DW_USE_PPGTT (0<<2) +#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) +#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) +#define MI_BATCH_BUFFER MI_INSTR(0x30, 1) +#define MI_BATCH_NON_SECURE (1) +/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ +#define MI_BATCH_NON_SECURE_I965 (1<<8) +#define MI_BATCH_PPGTT_HSW (1<<8) +#define MI_BATCH_NON_SECURE_HSW (1<<13) +#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) +#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ +#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_RESOURCE_STREAMER (1<<10) + +/* + * 3D instructions used by the kernel + */ +#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags)) + +#define GEN9_MEDIA_POOL_STATE ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4) +#define GEN9_MEDIA_POOL_ENABLE (1 << 31) +#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24)) +#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19)) +#define SC_UPDATE_SCISSOR (0x1<<1) +#define SC_ENABLE_MASK (0x1<<0) +#define SC_ENABLE (0x1<<0) +#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16)) +#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1)) +#define SCI_YMIN_MASK (0xffff<<16) +#define SCI_XMIN_MASK (0xffff<<0) +#define SCI_YMAX_MASK (0xffff<<16) +#define SCI_XMAX_MASK (0xffff<<0) +#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19)) +#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1) +#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0) +#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4) +#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0) +#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) +#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) +#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) + +#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) +#define BLT_WRITE_A (2<<20) +#define BLT_WRITE_RGB (1<<20) +#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A) +#define BLT_DEPTH_8 (0<<24) +#define BLT_DEPTH_16_565 (1<<24) +#define BLT_DEPTH_16_1555 (2<<24) +#define BLT_DEPTH_32 (3<<24) +#define BLT_ROP_SRC_COPY (0xcc<<16) +#define BLT_ROP_COLOR_COPY (0xf0<<16) +#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */ +#define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */ +#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2) +#define ASYNC_FLIP (1<<22) +#define DISPLAY_PLANE_A (0<<20) +#define DISPLAY_PLANE_B (1<<20) +#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL_FLUSH_L3 (1<<27) +#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ +#define PIPE_CONTROL_MMIO_WRITE (1<<23) +#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) +#define PIPE_CONTROL_CS_STALL (1<<20) +#define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) +#define PIPE_CONTROL_QW_WRITE (1<<14) +#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) +#define PIPE_CONTROL_DEPTH_STALL (1<<13) +#define PIPE_CONTROL_WRITE_FLUSH (1<<12) +#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ +#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ +#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ +#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_NOTIFY (1<<8) +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) +#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) +#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) +#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ + +/* + * Commands used only by the command parser + */ +#define MI_SET_PREDICATE MI_INSTR(0x01, 0) +#define MI_ARB_CHECK MI_INSTR(0x05, 0) +#define MI_RS_CONTROL MI_INSTR(0x06, 0) +#define MI_URB_ATOMIC_ALLOC MI_INSTR(0x09, 0) +#define MI_PREDICATE MI_INSTR(0x0C, 0) +#define MI_RS_CONTEXT MI_INSTR(0x0F, 0) +#define MI_TOPOLOGY_FILTER MI_INSTR(0x0D, 0) +#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0) +#define MI_URB_CLEAR MI_INSTR(0x19, 0) +#define MI_UPDATE_GTT MI_INSTR(0x23, 0) +#define MI_CLFLUSH MI_INSTR(0x27, 0) +#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) +#define MI_REPORT_PERF_COUNT_GGTT (1<<0) +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) +#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) +#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) +#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) +#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) + +#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16)) +#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16)) +#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16)) +#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) +#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16)) +#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16)) +#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16)) +#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16)) +#define GFX_OP_3DSTATE_SO_DECL_LIST \ + ((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16)) + +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16)) + +#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16)) + +#define COLOR_BLT ((0x2<<29)|(0x40<<22)) +#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) + +#endif /* _INTEL_GPU_COMMANDS_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c new file mode 100644 index 000000000000..3053a706a561 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -0,0 +1,334 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "intel_reset.h" +#include "i915_drv.h" + +struct hangcheck { + u64 acthd; + u32 seqno; + enum intel_engine_hangcheck_action action; + unsigned long action_timestamp; + int deadlock; + struct intel_instdone instdone; + bool wedged:1; + bool stalled:1; +}; + +static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) +{ + u32 tmp = current_instdone | *old_instdone; + bool unchanged; + + unchanged = tmp == *old_instdone; + *old_instdone |= tmp; + + return unchanged; +} + +static bool subunits_stuck(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct intel_instdone instdone; + struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; + bool stuck; + int slice; + int subslice; + + if (engine->id != RCS0) + return true; + + intel_engine_get_instdone(engine, &instdone); + + /* There might be unstable subunit states even when + * actual head is not moving. Filter out the unstable ones by + * accumulating the undone -> done transitions and only + * consider those as progress. + */ + stuck = instdone_unchanged(instdone.instdone, + &accu_instdone->instdone); + stuck &= instdone_unchanged(instdone.slice_common, + &accu_instdone->slice_common); + + for_each_instdone_slice_subslice(dev_priv, slice, subslice) { + stuck &= instdone_unchanged(instdone.sampler[slice][subslice], + &accu_instdone->sampler[slice][subslice]); + stuck &= instdone_unchanged(instdone.row[slice][subslice], + &accu_instdone->row[slice][subslice]); + } + + return stuck; +} + +static enum intel_engine_hangcheck_action +head_stuck(struct intel_engine_cs *engine, u64 acthd) +{ + if (acthd != engine->hangcheck.acthd) { + + /* Clear subunit states on head movement */ + memset(&engine->hangcheck.instdone, 0, + sizeof(engine->hangcheck.instdone)); + + return ENGINE_ACTIVE_HEAD; + } + + if (!subunits_stuck(engine)) + return ENGINE_ACTIVE_SUBUNITS; + + return ENGINE_DEAD; +} + +static enum intel_engine_hangcheck_action +engine_stuck(struct intel_engine_cs *engine, u64 acthd) +{ + struct drm_i915_private *dev_priv = engine->i915; + enum intel_engine_hangcheck_action ha; + u32 tmp; + + ha = head_stuck(engine, acthd); + if (ha != ENGINE_DEAD) + return ha; + + if (IS_GEN(dev_priv, 2)) + return ENGINE_DEAD; + + /* Is the chip hanging on a WAIT_FOR_EVENT? + * If so we can simply poke the RB_WAIT bit + * and break the hang. This should work on + * all but the second generation chipsets. + */ + tmp = ENGINE_READ(engine, RING_CTL); + if (tmp & RING_WAIT) { + i915_handle_error(dev_priv, engine->mask, 0, + "stuck wait on %s", engine->name); + ENGINE_WRITE(engine, RING_CTL, tmp); + return ENGINE_WAIT_KICK; + } + + return ENGINE_DEAD; +} + +static void hangcheck_load_sample(struct intel_engine_cs *engine, + struct hangcheck *hc) +{ + hc->acthd = intel_engine_get_active_head(engine); + hc->seqno = intel_engine_get_hangcheck_seqno(engine); +} + +static void hangcheck_store_sample(struct intel_engine_cs *engine, + const struct hangcheck *hc) +{ + engine->hangcheck.acthd = hc->acthd; + engine->hangcheck.last_seqno = hc->seqno; +} + +static enum intel_engine_hangcheck_action +hangcheck_get_action(struct intel_engine_cs *engine, + const struct hangcheck *hc) +{ + if (engine->hangcheck.last_seqno != hc->seqno) + return ENGINE_ACTIVE_SEQNO; + + if (intel_engine_is_idle(engine)) + return ENGINE_IDLE; + + return engine_stuck(engine, hc->acthd); +} + +static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, + struct hangcheck *hc) +{ + unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; + + hc->action = hangcheck_get_action(engine, hc); + + /* We always increment the progress + * if the engine is busy and still processing + * the same request, so that no single request + * can run indefinitely (such as a chain of + * batches). The only time we do not increment + * the hangcheck score on this ring, if this + * engine is in a legitimate wait for another + * engine. In that case the waiting engine is a + * victim and we want to be sure we catch the + * right culprit. Then every time we do kick + * the ring, make it as a progress as the seqno + * advancement might ensure and if not, it + * will catch the hanging engine. + */ + + switch (hc->action) { + case ENGINE_IDLE: + case ENGINE_ACTIVE_SEQNO: + /* Clear head and subunit states on seqno movement */ + hc->acthd = 0; + + memset(&engine->hangcheck.instdone, 0, + sizeof(engine->hangcheck.instdone)); + + /* Intentional fall through */ + case ENGINE_WAIT_KICK: + case ENGINE_WAIT: + engine->hangcheck.action_timestamp = jiffies; + break; + + case ENGINE_ACTIVE_HEAD: + case ENGINE_ACTIVE_SUBUNITS: + /* + * Seqno stuck with still active engine gets leeway, + * in hopes that it is just a long shader. + */ + timeout = I915_SEQNO_DEAD_TIMEOUT; + break; + + case ENGINE_DEAD: + break; + + default: + MISSING_CASE(hc->action); + } + + hc->stalled = time_after(jiffies, + engine->hangcheck.action_timestamp + timeout); + hc->wedged = time_after(jiffies, + engine->hangcheck.action_timestamp + + I915_ENGINE_WEDGED_TIMEOUT); +} + +static void hangcheck_declare_hang(struct drm_i915_private *i915, + unsigned int hung, + unsigned int stuck) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp; + char msg[80]; + int len; + + /* If some rings hung but others were still busy, only + * blame the hanging rings in the synopsis. + */ + if (stuck != hung) + hung &= ~stuck; + len = scnprintf(msg, sizeof(msg), + "%s on ", stuck == hung ? "no progress" : "hang"); + for_each_engine_masked(engine, i915, hung, tmp) + len += scnprintf(msg + len, sizeof(msg) - len, + "%s, ", engine->name); + msg[len-2] = '\0'; + + return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg); +} + +/* + * This is called when the chip hasn't reported back with completed + * batchbuffers in a long time. We keep track per ring seqno progress and + * if there are no progress, hangcheck score for that ring is increased. + * Further, acthd is inspected to see if the ring is stuck. On stuck case + * we kick the ring. If we see no progress on three subsequent calls + * we assume chip is wedged and try to fix it by resetting the chip. + */ +static void i915_hangcheck_elapsed(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), + gpu_error.hangcheck_work.work); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int hung = 0, stuck = 0, wedged = 0; + + if (!i915_modparams.enable_hangcheck) + return; + + if (!READ_ONCE(dev_priv->gt.awake)) + return; + + if (i915_terminally_wedged(dev_priv)) + return; + + /* As enabling the GPU requires fairly extensive mmio access, + * periodically arm the mmio checker to see if we are triggering + * any invalid access. + */ + intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore); + + for_each_engine(engine, dev_priv, id) { + struct hangcheck hc; + + intel_engine_signal_breadcrumbs(engine); + + hangcheck_load_sample(engine, &hc); + hangcheck_accumulate_sample(engine, &hc); + hangcheck_store_sample(engine, &hc); + + if (hc.stalled) { + hung |= engine->mask; + if (hc.action != ENGINE_DEAD) + stuck |= engine->mask; + } + + if (hc.wedged) + wedged |= engine->mask; + } + + if (GEM_SHOW_DEBUG() && (hung | stuck)) { + struct drm_printer p = drm_debug_printer("hangcheck"); + + for_each_engine(engine, dev_priv, id) { + if (intel_engine_is_idle(engine)) + continue; + + intel_engine_dump(engine, &p, "%s\n", engine->name); + } + } + + if (wedged) { + dev_err(dev_priv->drm.dev, + "GPU recovery timed out," + " cancelling all in-flight rendering.\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(dev_priv); + } + + if (hung) + hangcheck_declare_hang(dev_priv, hung, stuck); + + /* Reset timer in case GPU hangs without another request being added */ + i915_queue_hangcheck(dev_priv); +} + +void intel_engine_init_hangcheck(struct intel_engine_cs *engine) +{ + memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); + engine->hangcheck.action_timestamp = jiffies; +} + +void intel_hangcheck_init(struct drm_i915_private *i915) +{ + INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, + i915_hangcheck_elapsed); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_hangcheck.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c new file mode 100644 index 000000000000..5cadf8f6a23d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -0,0 +1,2908 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Ben Widawsky + * Michel Thierry + * Thomas Daniel + * Oscar Mateo + * + */ + +/** + * DOC: Logical Rings, Logical Ring Contexts and Execlists + * + * Motivation: + * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". + * These expanded contexts enable a number of new abilities, especially + * "Execlists" (also implemented in this file). + * + * One of the main differences with the legacy HW contexts is that logical + * ring contexts incorporate many more things to the context's state, like + * PDPs or ringbuffer control registers: + * + * The reason why PDPs are included in the context is straightforward: as + * PPGTTs (per-process GTTs) are actually per-context, having the PDPs + * contained there mean you don't need to do a ppgtt->switch_mm yourself, + * instead, the GPU will do it for you on the context switch. + * + * But, what about the ringbuffer control registers (head, tail, etc..)? + * shouldn't we just need a set of those per engine command streamer? This is + * where the name "Logical Rings" starts to make sense: by virtualizing the + * rings, the engine cs shifts to a new "ring buffer" with every context + * switch. When you want to submit a workload to the GPU you: A) choose your + * context, B) find its appropriate virtualized ring, C) write commands to it + * and then, finally, D) tell the GPU to switch to that context. + * + * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch + * to a contexts is via a context execution list, ergo "Execlists". + * + * LRC implementation: + * Regarding the creation of contexts, we have: + * + * - One global default context. + * - One local default context for each opened fd. + * - One local extra context for each context create ioctl call. + * + * Now that ringbuffers belong per-context (and not per-engine, like before) + * and that contexts are uniquely tied to a given engine (and not reusable, + * like before) we need: + * + * - One ringbuffer per-engine inside each context. + * - One backing object per-engine inside each context. + * + * The global default context starts its life with these new objects fully + * allocated and populated. The local default context for each opened fd is + * more complex, because we don't know at creation time which engine is going + * to use them. To handle this, we have implemented a deferred creation of LR + * contexts: + * + * The local context starts its life as a hollow or blank holder, that only + * gets populated for a given engine once we receive an execbuffer. If later + * on we receive another execbuffer ioctl for the same context but a different + * engine, we allocate/populate a new ringbuffer and context backing object and + * so on. + * + * Finally, regarding local contexts created using the ioctl call: as they are + * only allowed with the render ring, we can allocate & populate them right + * away (no need to defer anything, at least for now). + * + * Execlists implementation: + * Execlists are the new method by which, on gen8+ hardware, workloads are + * submitted for execution (as opposed to the legacy, ringbuffer-based, method). + * This method works as follows: + * + * When a request is committed, its commands (the BB start and any leading or + * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer + * for the appropriate context. The tail pointer in the hardware context is not + * updated at this time, but instead, kept by the driver in the ringbuffer + * structure. A structure representing this request is added to a request queue + * for the appropriate engine: this structure contains a copy of the context's + * tail after the request was written to the ring buffer and a pointer to the + * context itself. + * + * If the engine's request queue was empty before the request was added, the + * queue is processed immediately. Otherwise the queue will be processed during + * a context switch interrupt. In any case, elements on the queue will get sent + * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a + * globally unique 20-bits submission ID. + * + * When execution of a request completes, the GPU updates the context status + * buffer with a context complete event and generates a context switch interrupt. + * During the interrupt handling, the driver examines the events in the buffer: + * for each context complete event, if the announced ID matches that on the head + * of the request queue, then that request is retired and removed from the queue. + * + * After processing, if any requests were retired and the queue is not empty + * then a new execution list can be submitted. The two requests at the front of + * the queue are next to be submitted but since a context may not occur twice in + * an execution list, if subsequent requests have the same ID as the first then + * the two requests must be combined. This is done simply by discarding requests + * at the head of the queue until either only one requests is left (in which case + * we use a NULL second context) or the first two requests have unique IDs. + * + * By always executing the first two requests in the queue the driver ensures + * that the GPU is kept as busy as possible. In the case where a single context + * completes but a second context is still executing, the request for this second + * context will be at the head of the queue when we remove the first one. This + * request will then be resubmitted along with a new request for a different context, + * which will cause the hardware to continue executing the second request and queue + * the new request (the GPU detects the condition of a context getting preempted + * with the same context and optimizes the context switch flow by not doing + * preemption, but just sampling the new tail pointer). + * + */ +#include + +#include "i915_drv.h" +#include "i915_gem_render_state.h" +#include "i915_vgpu.h" +#include "intel_lrc_reg.h" +#include "intel_mocs.h" +#include "intel_reset.h" +#include "intel_workarounds.h" + +#define RING_EXECLIST_QFULL (1 << 0x2) +#define RING_EXECLIST1_VALID (1 << 0x3) +#define RING_EXECLIST0_VALID (1 << 0x4) +#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) +#define RING_EXECLIST1_ACTIVE (1 << 0x11) +#define RING_EXECLIST0_ACTIVE (1 << 0x12) + +#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) +#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) +#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) +#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) +#define GEN8_CTX_STATUS_COMPLETE (1 << 4) +#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) + +#define GEN8_CTX_STATUS_COMPLETED_MASK \ + (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) + +/* Typical size of the average request (2 pipecontrols and a MI_BB) */ +#define EXECLISTS_REQUEST_SIZE 64 /* bytes */ +#define WA_TAIL_DWORDS 2 +#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) + +#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE) + +static int execlists_context_deferred_alloc(struct intel_context *ce, + struct intel_engine_cs *engine); +static void execlists_init_reg_state(u32 *reg_state, + struct intel_context *ce, + struct intel_engine_cs *engine, + struct intel_ring *ring); + +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->sched.attr.priority; +} + +static int effective_prio(const struct i915_request *rq) +{ + int prio = rq_prio(rq); + + /* + * On unwinding the active request, we give it a priority bump + * equivalent to a freshly submitted request. This protects it from + * being gazumped again, but it would be preferable if we didn't + * let it be gazumped in the first place! + * + * See __unwind_incomplete_requests() + */ + if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(rq)) { + /* + * After preemption, we insert the active request at the + * end of the new priority level. This means that we will be + * _lower_ priority than the preemptee all things equal (and + * so the preemption is valid), so adjust our comparison + * accordingly. + */ + prio |= ACTIVE_PRIORITY; + prio--; + } + + /* Restrict mere WAIT boosts from triggering preemption */ + return prio | __NO_PREEMPTION; +} + +static int queue_prio(const struct intel_engine_execlists *execlists) +{ + struct i915_priolist *p; + struct rb_node *rb; + + rb = rb_first_cached(&execlists->queue); + if (!rb) + return INT_MIN; + + /* + * As the priolist[] are inverted, with the highest priority in [0], + * we have to flip the index value to become priority. + */ + p = to_priolist(rb); + return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); +} + +static inline bool need_preempt(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + int last_prio; + + if (!engine->preempt_context) + return false; + + if (i915_request_completed(rq)) + return false; + + /* + * Check if the current priority hint merits a preemption attempt. + * + * We record the highest value priority we saw during rescheduling + * prior to this dequeue, therefore we know that if it is strictly + * less than the current tail of ESLP[0], we do not need to force + * a preempt-to-idle cycle. + * + * However, the priority hint is a mere hint that we may need to + * preempt. If that hint is stale or we may be trying to preempt + * ourselves, ignore the request. + */ + last_prio = effective_prio(rq); + if (!__execlists_need_preempt(engine->execlists.queue_priority_hint, + last_prio)) + return false; + + /* + * Check against the first request in ELSP[1], it will, thanks to the + * power of PI, be the highest priority of that context. + */ + if (!list_is_last(&rq->link, &engine->timeline.requests) && + rq_prio(list_next_entry(rq, link)) > last_prio) + return true; + + /* + * If the inflight context did not trigger the preemption, then maybe + * it was the set of queued requests? Pick the highest priority in + * the queue (the first active priolist) and see if it deserves to be + * running instead of ELSP[0]. + * + * The highest priority request in the queue can not be either + * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same + * context, it's priority would not exceed ELSP[0] aka last_prio. + */ + return queue_prio(&engine->execlists) > last_prio; +} + +__maybe_unused static inline bool +assert_priority_queue(const struct i915_request *prev, + const struct i915_request *next) +{ + const struct intel_engine_execlists *execlists = + &prev->engine->execlists; + + /* + * Without preemption, the prev may refer to the still active element + * which we refuse to let go. + * + * Even with preemption, there are times when we think it is better not + * to preempt and leave an ostensibly lower priority request in flight. + */ + if (port_request(execlists->port) == prev) + return true; + + return rq_prio(prev) >= rq_prio(next); +} + +/* + * The context descriptor encodes various attributes of a context, + * including its GTT address and some flags. Because it's fairly + * expensive to calculate, we'll just do it once and cache the result, + * which remains valid until the context is unpinned. + * + * This is what a descriptor looks like, from LSB to MSB:: + * + * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) + * bits 12-31: LRCA, GTT address of (the HWSP of) this context + * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) + * bits 53-54: mbz, reserved for use by hardware + * bits 55-63: group ID, currently unused and set to 0 + * + * Starting from Gen11, the upper dword of the descriptor has a new format: + * + * bits 32-36: reserved + * bits 37-47: SW context ID + * bits 48:53: engine instance + * bit 54: mbz, reserved for use by hardware + * bits 55-60: SW counter + * bits 61-63: engine class + * + * engine info, SW context ID and SW counter need to form a unique number + * (Context ID) per lrc. + */ +static u64 +lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) +{ + struct i915_gem_context *ctx = ce->gem_context; + u64 desc; + + BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); + + desc = ctx->desc_template; /* bits 0-11 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 12)); + + desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; + /* bits 12-31 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 32)); + + /* + * The following 32bits are copied into the OA reports (dword 2). + * Consider updating oa_get_render_ctx_id in i915_perf.c when changing + * anything below. + */ + if (INTEL_GEN(engine->i915) >= 11) { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; + /* bits 37-47 */ + + desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; + /* bits 48-53 */ + + /* TODO: decide what to do with SW counter (bits 55-60) */ + + desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; + /* bits 61-63 */ + } else { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ + } + + return desc; +} + +static void unwind_wa_tail(struct i915_request *rq) +{ + rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); + assert_ring_tail_valid(rq->ring, rq->tail); +} + +static struct i915_request * +__unwind_incomplete_requests(struct intel_engine_cs *engine) +{ + struct i915_request *rq, *rn, *active = NULL; + struct list_head *uninitialized_var(pl); + int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY; + + lockdep_assert_held(&engine->timeline.lock); + + list_for_each_entry_safe_reverse(rq, rn, + &engine->timeline.requests, + link) { + if (i915_request_completed(rq)) + break; + + __i915_request_unsubmit(rq); + unwind_wa_tail(rq); + + GEM_BUG_ON(rq->hw_context->active); + + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(engine, prio); + } + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + + list_add(&rq->sched.link, pl); + + active = rq; + } + + /* + * The active request is now effectively the start of a new client + * stream, so give it the equivalent small priority bump to prevent + * it being gazumped a second time by another peer. + * + * Note we have to be careful not to apply a priority boost to a request + * still spinning on its semaphores. If the request hasn't started, that + * means it is still waiting for its dependencies to be signaled, and + * if we apply a priority boost to this request, we will boost it past + * its signalers and so break PI. + * + * One consequence of this preemption boost is that we may jump + * over lesser priorities (such as I915_PRIORITY_WAIT), effectively + * making those priorities non-preemptible. They will be moved forward + * in the priority queue, but they will not gain immediate access to + * the GPU. + */ + if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) { + prio |= ACTIVE_PRIORITY; + active->sched.attr.priority = prio; + list_move_tail(&active->sched.link, + i915_sched_lookup_priolist(engine, prio)); + } + + return active; +} + +struct i915_request * +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) +{ + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + + return __unwind_incomplete_requests(engine); +} + +static inline void +execlists_context_status_change(struct i915_request *rq, unsigned long status) +{ + /* + * Only used when GVT-g is enabled now. When GVT-g is disabled, + * The compiler should eliminate this function as dead-code. + */ + if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) + return; + + atomic_notifier_call_chain(&rq->engine->context_status_notifier, + status, rq); +} + +inline void +execlists_user_begin(struct intel_engine_execlists *execlists, + const struct execlist_port *port) +{ + execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); +} + +inline void +execlists_user_end(struct intel_engine_execlists *execlists) +{ + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); +} + +static inline void +execlists_context_schedule_in(struct i915_request *rq) +{ + GEM_BUG_ON(rq->hw_context->active); + + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + intel_engine_context_in(rq->engine); + rq->hw_context->active = rq->engine; +} + +static inline void +execlists_context_schedule_out(struct i915_request *rq, unsigned long status) +{ + rq->hw_context->active = NULL; + intel_engine_context_out(rq->engine); + execlists_context_status_change(rq, status); + trace_i915_request_out(rq); +} + +static u64 execlists_update_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->hw_context; + + ce->lrc_reg_state[CTX_RING_TAIL + 1] = + intel_ring_set_tail(rq->ring, rq->tail); + + /* + * Make sure the context image is complete before we submit it to HW. + * + * Ostensibly, writes (including the WCB) should be flushed prior to + * an uncached write such as our mmio register access, the empirical + * evidence (esp. on Braswell) suggests that the WC write into memory + * may not be visible to the HW prior to the completion of the UC + * register write and that we may begin execution from the context + * before its image is complete leading to invalid PD chasing. + * + * Furthermore, Braswell, at least, wants a full mb to be sure that + * the writes are coherent in memory (visible to the GPU) prior to + * execution, and not just visible to other CPUs (as is the result of + * wmb). + */ + mb(); + return ce->lrc_desc; +} + +static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) +{ + if (execlists->ctrl_reg) { + writel(lower_32_bits(desc), execlists->submit_reg + port * 2); + writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); + } else { + writel(upper_32_bits(desc), execlists->submit_reg); + writel(lower_32_bits(desc), execlists->submit_reg); + } +} + +static void execlists_submit_ports(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + struct execlist_port *port = execlists->port; + unsigned int n; + + /* + * We can skip acquiring intel_runtime_pm_get() here as it was taken + * on our behalf by the request (see i915_gem_mark_busy()) and it will + * not be relinquished until the device is idle (see + * i915_gem_idle_work_handler()). As a precaution, we make sure + * that all ELSP are drained i.e. we have processed the CSB, + * before allowing ourselves to idle and calling intel_runtime_pm_put(). + */ + GEM_BUG_ON(!engine->i915->gt.awake); + + /* + * ELSQ note: the submit queue is not cleared after being submitted + * to the HW so we need to make sure we always clean it up. This is + * currently ensured by the fact that we always write the same number + * of elsq entries, keep this in mind before changing the loop below. + */ + for (n = execlists_num_ports(execlists); n--; ) { + struct i915_request *rq; + unsigned int count; + u64 desc; + + rq = port_unpack(&port[n], &count); + if (rq) { + GEM_BUG_ON(count > !n); + if (!count++) + execlists_context_schedule_in(rq); + port_set(&port[n], port_pack(rq, count)); + desc = execlists_update_context(rq); + GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); + + GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", + engine->name, n, + port[n].context_id, count, + rq->fence.context, rq->fence.seqno, + hwsp_seqno(rq), + rq_prio(rq)); + } else { + GEM_BUG_ON(!n); + desc = 0; + } + + write_desc(execlists, desc, n); + } + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); +} + +static bool ctx_single_port_submission(const struct intel_context *ce) +{ + return (IS_ENABLED(CONFIG_DRM_I915_GVT) && + i915_gem_context_force_single_submission(ce->gem_context)); +} + +static bool can_merge_ctx(const struct intel_context *prev, + const struct intel_context *next) +{ + if (prev != next) + return false; + + if (ctx_single_port_submission(prev)) + return false; + + return true; +} + +static bool can_merge_rq(const struct i915_request *prev, + const struct i915_request *next) +{ + GEM_BUG_ON(!assert_priority_queue(prev, next)); + + if (!can_merge_ctx(prev->hw_context, next->hw_context)) + return false; + + return true; +} + +static void port_assign(struct execlist_port *port, struct i915_request *rq) +{ + GEM_BUG_ON(rq == port_request(port)); + + if (port_isset(port)) + i915_request_put(port_request(port)); + + port_set(port, port_pack(i915_request_get(rq), port_count(port))); +} + +static void inject_preempt_context(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + struct intel_context *ce = engine->preempt_context; + unsigned int n; + + GEM_BUG_ON(execlists->preempt_complete_status != + upper_32_bits(ce->lrc_desc)); + + /* + * Switch to our empty preempt context so + * the state of the GPU is known (idle). + */ + GEM_TRACE("%s\n", engine->name); + for (n = execlists_num_ports(execlists); --n; ) + write_desc(execlists, 0, n); + + write_desc(execlists, ce->lrc_desc, n); + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); + execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); + + (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); +} + +static void complete_preempt_context(struct intel_engine_execlists *execlists) +{ + GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); + + if (inject_preempt_hang(execlists)) + return; + + execlists_cancel_port_requests(execlists); + __unwind_incomplete_requests(container_of(execlists, + struct intel_engine_cs, + execlists)); +} + +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct execlist_port *port = execlists->port; + const struct execlist_port * const last_port = + &execlists->port[execlists->port_mask]; + struct i915_request *last = port_request(port); + struct rb_node *rb; + bool submit = false; + + /* + * Hardware submission is through 2 ports. Conceptually each port + * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is + * static for a context, and unique to each, so we only execute + * requests belonging to a single context from each ring. RING_HEAD + * is maintained by the CS in the context image, it marks the place + * where it got up to last time, and through RING_TAIL we tell the CS + * where we want to execute up to this time. + * + * In this list the requests are in order of execution. Consecutive + * requests from the same context are adjacent in the ringbuffer. We + * can combine these requests into a single RING_TAIL update: + * + * RING_HEAD...req1...req2 + * ^- RING_TAIL + * since to execute req2 the CS must first execute req1. + * + * Our goal then is to point each port to the end of a consecutive + * sequence of requests as being the most optimal (fewest wake ups + * and context switches) submission. + */ + + if (last) { + /* + * Don't resubmit or switch until all outstanding + * preemptions (lite-restore) are seen. Then we + * know the next preemption status we see corresponds + * to this ELSP update. + */ + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); + GEM_BUG_ON(!port_count(&port[0])); + + /* + * If we write to ELSP a second time before the HW has had + * a chance to respond to the previous write, we can confuse + * the HW and hit "undefined behaviour". After writing to ELSP, + * we must then wait until we see a context-switch event from + * the HW to indicate that it has had a chance to respond. + */ + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) + return; + + if (need_preempt(engine, last)) { + inject_preempt_context(engine); + return; + } + + /* + * In theory, we could coalesce more requests onto + * the second port (the first port is active, with + * no preemptions pending). However, that means we + * then have to deal with the possible lite-restore + * of the second port (as we submit the ELSP, there + * may be a context-switch) but also we may complete + * the resubmission before the context-switch. Ergo, + * coalescing onto the second port will cause a + * preemption event, but we cannot predict whether + * that will affect port[0] or port[1]. + * + * If the second port is already active, we can wait + * until the next context-switch before contemplating + * new requests. The GPU will be busy and we should be + * able to resubmit the new ELSP before it idles, + * avoiding pipeline bubbles (momentary pauses where + * the driver is unable to keep up the supply of new + * work). However, we have to double check that the + * priorities of the ports haven't been switch. + */ + if (port_count(&port[1])) + return; + + /* + * WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == rq:TAIL as we resubmit the + * request. See gen8_emit_fini_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; + } + + while ((rb = rb_first_cached(&execlists->queue))) { + struct i915_priolist *p = to_priolist(rb); + struct i915_request *rq, *rn; + int i; + + priolist_for_each_request_consume(rq, rn, p, i) { + /* + * Can we combine this request with the current port? + * It has to be the same context/ringbuffer and not + * have any exceptions (e.g. GVT saying never to + * combine contexts). + * + * If we can combine the requests, we can execute both + * by updating the RING_TAIL to point to the end of the + * second request, and so we never need to tell the + * hardware about the first. + */ + if (last && !can_merge_rq(last, rq)) { + /* + * If we are on the second port and cannot + * combine this request with the last, then we + * are done. + */ + if (port == last_port) + goto done; + + /* + * We must not populate both ELSP[] with the + * same LRCA, i.e. we must submit 2 different + * contexts if we submit 2 ELSP. + */ + if (last->hw_context == rq->hw_context) + goto done; + + /* + * If GVT overrides us we only ever submit + * port[0], leaving port[1] empty. Note that we + * also have to be careful that we don't queue + * the same context (even though a different + * request) to the second port. + */ + if (ctx_single_port_submission(last->hw_context) || + ctx_single_port_submission(rq->hw_context)) + goto done; + + + if (submit) + port_assign(port, last); + port++; + + GEM_BUG_ON(port_isset(port)); + } + + list_del_init(&rq->sched.link); + + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); + + last = rq; + submit = true; + } + + rb_erase_cached(&p->node, &execlists->queue); + i915_priolist_free(p); + } + +done: + /* + * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. + * + * We choose the priority hint such that if we add a request of greater + * priority than this, we kick the submission tasklet to decide on + * the right order of submitting the requests to hardware. We must + * also be prepared to reorder requests as they are in-flight on the + * HW. We derive the priority hint then as the first "hole" in + * the HW submission ports and if there are no available slots, + * the priority of the lowest executing request, i.e. last. + * + * When we do receive a higher priority request ready to run from the + * user, see queue_request(), the priority hint is bumped to that + * request triggering preemption on the next dequeue (or subsequent + * interrupt for secondary ports). + */ + execlists->queue_priority_hint = queue_prio(execlists); + + if (submit) { + port_assign(port, last); + execlists_submit_ports(engine); + } + + /* We must always keep the beast fed if we have work piled up */ + GEM_BUG_ON(rb_first_cached(&execlists->queue) && + !port_isset(execlists->port)); + + /* Re-evaluate the executing context setup after each preemptive kick */ + if (last) + execlists_user_begin(execlists, execlists->port); + + /* If the engine is now idle, so should be the flag; and vice versa. */ + GEM_BUG_ON(execlists_is_active(&engine->execlists, + EXECLISTS_ACTIVE_USER) == + !port_isset(engine->execlists.port)); +} + +void +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) +{ + struct execlist_port *port = execlists->port; + unsigned int num_ports = execlists_num_ports(execlists); + + while (num_ports-- && port_isset(port)) { + struct i915_request *rq = port_request(port); + + GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", + rq->engine->name, + (unsigned int)(port - execlists->port), + rq->fence.context, rq->fence.seqno, + hwsp_seqno(rq)); + + GEM_BUG_ON(!execlists->active); + execlists_context_schedule_out(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); + + i915_request_put(rq); + + memset(port, 0, sizeof(*port)); + port++; + } + + execlists_clear_all_active(execlists); +} + +static inline void +invalidate_csb_entries(const u32 *first, const u32 *last) +{ + clflush((void *)first); + clflush((void *)last); +} + +static inline bool +reset_in_progress(const struct intel_engine_execlists *execlists) +{ + return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); +} + +static void process_csb(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct execlist_port *port = execlists->port; + const u32 * const buf = execlists->csb_status; + const u8 num_entries = execlists->csb_size; + u8 head, tail; + + lockdep_assert_held(&engine->timeline.lock); + + /* + * Note that csb_write, csb_status may be either in HWSP or mmio. + * When reading from the csb_write mmio register, we have to be + * careful to only use the GEN8_CSB_WRITE_PTR portion, which is + * the low 4bits. As it happens we know the next 4bits are always + * zero and so we can simply masked off the low u8 of the register + * and treat it identically to reading from the HWSP (without having + * to use explicit shifting and masking, and probably bifurcating + * the code to handle the legacy mmio read). + */ + head = execlists->csb_head; + tail = READ_ONCE(*execlists->csb_write); + GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail); + if (unlikely(head == tail)) + return; + + /* + * Hopefully paired with a wmb() in HW! + * + * We must complete the read of the write pointer before any reads + * from the CSB, so that we do not see stale values. Without an rmb + * (lfence) the HW may speculatively perform the CSB[] reads *before* + * we perform the READ_ONCE(*csb_write). + */ + rmb(); + + do { + struct i915_request *rq; + unsigned int status; + unsigned int count; + + if (++head == num_entries) + head = 0; + + /* + * We are flying near dragons again. + * + * We hold a reference to the request in execlist_port[] + * but no more than that. We are operating in softirq + * context and so cannot hold any mutex or sleep. That + * prevents us stopping the requests we are processing + * in port[] from being retired simultaneously (the + * breadcrumb will be complete before we see the + * context-switch). As we only hold the reference to the + * request, any pointer chasing underneath the request + * is subject to a potential use-after-free. Thus we + * store all of the bookkeeping within port[] as + * required, and avoid using unguarded pointers beneath + * request itself. The same applies to the atomic + * status notifier. + */ + + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", + engine->name, head, + buf[2 * head + 0], buf[2 * head + 1], + execlists->active); + + status = buf[2 * head]; + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | + GEN8_CTX_STATUS_PREEMPTED)) + execlists_set_active(execlists, + EXECLISTS_ACTIVE_HWACK); + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_HWACK); + + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) + continue; + + /* We should never get a COMPLETED | IDLE_ACTIVE! */ + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); + + if (status & GEN8_CTX_STATUS_COMPLETE && + buf[2*head + 1] == execlists->preempt_complete_status) { + GEM_TRACE("%s preempt-idle\n", engine->name); + complete_preempt_context(execlists); + continue; + } + + if (status & GEN8_CTX_STATUS_PREEMPTED && + execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)) + continue; + + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); + + rq = port_unpack(port, &count); + GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", + engine->name, + port->context_id, count, + rq ? rq->fence.context : 0, + rq ? rq->fence.seqno : 0, + rq ? hwsp_seqno(rq) : 0, + rq ? rq_prio(rq) : 0); + + /* Check the context/desc id for this event matches */ + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); + + GEM_BUG_ON(count == 0); + if (--count == 0) { + /* + * On the final event corresponding to the + * submission of this context, we expect either + * an element-switch event or a completion + * event (and on completion, the active-idle + * marker). No more preemptions, lite-restore + * or otherwise. + */ + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); + GEM_BUG_ON(!port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + + /* + * We rely on the hardware being strongly + * ordered, that the breadcrumb write is + * coherent (visible from the CPU) before the + * user interrupt and CSB is processed. + */ + GEM_BUG_ON(!i915_request_completed(rq)); + + execlists_context_schedule_out(rq, + INTEL_CONTEXT_SCHEDULE_OUT); + i915_request_put(rq); + + GEM_TRACE("%s completed ctx=%d\n", + engine->name, port->context_id); + + port = execlists_port_complete(execlists, port); + if (port_isset(port)) + execlists_user_begin(execlists, port); + else + execlists_user_end(execlists); + } else { + port_set(port, port_pack(rq, count)); + } + } while (head != tail); + + execlists->csb_head = head; + + /* + * Gen11 has proven to fail wrt global observation point between + * entry and tail update, failing on the ordering and thus + * we see an old entry in the context status buffer. + * + * Forcibly evict out entries for the next gpu csb update, + * to increase the odds that we get a fresh entries with non + * working hardware. The cost for doing so comes out mostly with + * the wash as hardware, working or not, will need to do the + * invalidation before. + */ + invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); +} + +static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) +{ + lockdep_assert_held(&engine->timeline.lock); + + process_csb(engine); + if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) + execlists_dequeue(engine); +} + +/* + * Check the unread Context Status Buffers and manage the submission of new + * contexts to the ELSP accordingly. + */ +static void execlists_submission_tasklet(unsigned long data) +{ + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + unsigned long flags; + + GEM_TRACE("%s awake?=%d, active=%x\n", + engine->name, + !!engine->i915->gt.awake, + engine->execlists.active); + + spin_lock_irqsave(&engine->timeline.lock, flags); + __execlists_submission_tasklet(engine); + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +static void queue_request(struct intel_engine_cs *engine, + struct i915_sched_node *node, + int prio) +{ + list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); +} + +static void __submit_queue_imm(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + if (reset_in_progress(execlists)) + return; /* defer until we restart the engine following reset */ + + if (execlists->tasklet.func == execlists_submission_tasklet) + __execlists_submission_tasklet(engine); + else + tasklet_hi_schedule(&execlists->tasklet); +} + +static void submit_queue(struct intel_engine_cs *engine, int prio) +{ + if (prio > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = prio; + __submit_queue_imm(engine); + } +} + +static void execlists_submit_request(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline.lock, flags); + + queue_request(engine, &request->sched, rq_prio(request)); + + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(list_empty(&request->sched.link)); + + submit_queue(engine, rq_prio(request)); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +static void __execlists_context_fini(struct intel_context *ce) +{ + intel_ring_put(ce->ring); + + GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); + i915_gem_object_put(ce->state->obj); +} + +static void execlists_context_destroy(struct kref *kref) +{ + struct intel_context *ce = container_of(kref, typeof(*ce), ref); + + GEM_BUG_ON(intel_context_is_pinned(ce)); + + if (ce->state) + __execlists_context_fini(ce); + + intel_context_free(ce); +} + +static int __context_pin(struct i915_vma *vma) +{ + unsigned int flags; + int err; + + flags = PIN_GLOBAL | PIN_HIGH; + flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + return err; + + vma->obj->pin_global++; + vma->obj->mm.dirty = true; + + return 0; +} + +static void __context_unpin(struct i915_vma *vma) +{ + vma->obj->pin_global--; + __i915_vma_unpin(vma); +} + +static void execlists_context_unpin(struct intel_context *ce) +{ + struct intel_engine_cs *engine; + + /* + * The tasklet may still be using a pointer to our state, via an + * old request. However, since we know we only unpin the context + * on retirement of the following request, we know that the last + * request referencing us will have had a completion CS interrupt. + * If we see that it is still active, it means that the tasklet hasn't + * had the chance to run yet; let it run before we teardown the + * reference it may use. + */ + engine = READ_ONCE(ce->active); + if (unlikely(engine)) { + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); + process_csb(engine); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + GEM_BUG_ON(READ_ONCE(ce->active)); + } + + i915_gem_context_unpin_hw_id(ce->gem_context); + + intel_ring_unpin(ce->ring); + + i915_gem_object_unpin_map(ce->state->obj); + __context_unpin(ce->state); +} + +static void +__execlists_update_reg_state(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + struct intel_ring *ring = ce->ring; + u32 *regs = ce->lrc_reg_state; + + GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); + GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); + + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD + 1] = ring->head; + regs[CTX_RING_TAIL + 1] = ring->tail; + + /* RPCS */ + if (engine->class == RENDER_CLASS) + regs[CTX_R_PWR_CLK_STATE + 1] = + intel_sseu_make_rpcs(engine->i915, &ce->sseu); +} + +static int +__execlists_context_pin(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + void *vaddr; + int ret; + + GEM_BUG_ON(!ce->gem_context->ppgtt); + + ret = execlists_context_deferred_alloc(ce, engine); + if (ret) + goto err; + GEM_BUG_ON(!ce->state); + + ret = __context_pin(ce->state); + if (ret) + goto err; + + vaddr = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(engine->i915) | + I915_MAP_OVERRIDE); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto unpin_vma; + } + + ret = intel_ring_pin(ce->ring); + if (ret) + goto unpin_map; + + ret = i915_gem_context_pin_hw_id(ce->gem_context); + if (ret) + goto unpin_ring; + + ce->lrc_desc = lrc_descriptor(ce, engine); + ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + __execlists_update_reg_state(ce, engine); + + return 0; + +unpin_ring: + intel_ring_unpin(ce->ring); +unpin_map: + i915_gem_object_unpin_map(ce->state->obj); +unpin_vma: + __context_unpin(ce->state); +err: + return ret; +} + +static int execlists_context_pin(struct intel_context *ce) +{ + return __execlists_context_pin(ce, ce->engine); +} + +static void execlists_context_reset(struct intel_context *ce) +{ + /* + * Because we emit WA_TAIL_DWORDS there may be a disparity + * between our bookkeeping in ce->ring->head and ce->ring->tail and + * that stored in context. As we only write new commands from + * ce->ring->tail onwards, everything before that is junk. If the GPU + * starts reading from its RING_HEAD from the context, it may try to + * execute that junk and die. + * + * The contexts that are stilled pinned on resume belong to the + * kernel, and are local to each engine. All other contexts will + * have their head/tail sanitized upon pinning before use, so they + * will never see garbage, + * + * So to avoid that we reset the context images upon resume. For + * simplicity, we just zero everything out. + */ + intel_ring_reset(ce->ring, 0); + __execlists_update_reg_state(ce, ce->engine); +} + +static const struct intel_context_ops execlists_context_ops = { + .pin = execlists_context_pin, + .unpin = execlists_context_unpin, + + .reset = execlists_context_reset, + .destroy = execlists_context_destroy, +}; + +static int gen8_emit_init_breadcrumb(struct i915_request *rq) +{ + u32 *cs; + + GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Check if we have been preempted before we even get started. + * + * After this point i915_request_started() reports true, even if + * we get preempted and so are no longer running. + */ + *cs++ = MI_ARB_CHECK; + *cs++ = MI_NOOP; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = rq->timeline->hwsp_offset; + *cs++ = 0; + *cs++ = rq->fence.seqno - 1; + + intel_ring_advance(rq, cs); + + /* Record the updated position of the request's payload */ + rq->infix = intel_ring_offset(rq, cs); + + return 0; +} + +static int emit_pdps(struct i915_request *rq) +{ + const struct intel_engine_cs * const engine = rq->engine; + struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt; + int err, i; + u32 *cs; + + GEM_BUG_ON(intel_vgpu_active(rq->i915)); + + /* + * Beware ye of the dragons, this sequence is magic! + * + * Small changes to this sequence can cause anything from + * GPU hangs to forcewake errors and machine lockups! + */ + + /* Flush any residual operations from the context load */ + err = engine->emit_flush(rq, EMIT_FLUSH); + if (err) + return err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; + + cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Ensure the LRI have landed before we invalidate & continue */ + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; + for (i = GEN8_3LVL_PDPES; i--; ) { + const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); + u32 base = engine->mmio_base; + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); + *cs++ = lower_32_bits(pd_daddr); + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + /* Be doubly sure the LRI have landed before proceeding */ + err = engine->emit_flush(rq, EMIT_FLUSH); + if (err) + return err; + + /* Re-invalidate the TLB for luck */ + return engine->emit_flush(rq, EMIT_INVALIDATE); +} + +static int execlists_request_alloc(struct i915_request *request) +{ + int ret; + + GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); + + /* + * Flush enough space to reduce the likelihood of waiting after + * we start building the request - in which case we will just + * have to repeat work. + */ + request->reserved_space += EXECLISTS_REQUEST_SIZE; + + /* + * Note that after this point, we have committed to using + * this request as it is being used to both track the + * state of engine initialisation and liveness of the + * golden renderstate above. Think twice before you try + * to cancel/unwind this request now. + */ + + /* Unconditionally invalidate GPU caches and TLBs. */ + if (i915_vm_is_4lvl(&request->gem_context->ppgtt->vm)) + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); + else + ret = emit_pdps(request); + if (ret) + return ret; + + request->reserved_space -= EXECLISTS_REQUEST_SIZE; + return 0; +} + +/* + * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after + * PIPE_CONTROL instruction. This is required for the flush to happen correctly + * but there is a slight complication as this is applied in WA batch where the + * values are only initialized once so we cannot take register value at the + * beginning and reuse it further; hence we save its value to memory, upload a + * constant value with bit21 set and then we restore it back with the saved value. + * To simplify the WA, a constant value is formed by using the default value + * of this register. This shouldn't be a problem because we are only modifying + * it for a short period and this batch in non-premptible. We can ofcourse + * use additional instructions that read the actual value of the register + * at that time and set our bit of interest but it makes the WA complicated. + * + * This WA is also required for Gen9 so extracting as a function avoids + * code duplication. + */ +static u32 * +gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) +{ + /* NB no one else is allowed to scribble over scratch + 256! */ + *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_scratch_offset(engine->i915) + 256; + *batch++ = 0; + + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; + + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + + *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_scratch_offset(engine->i915) + 256; + *batch++ = 0; + + return batch; +} + +/* + * Typically we only have one indirect_ctx and per_ctx batch buffer which are + * initialized at the beginning and shared across all contexts but this field + * helps us to have multiple batches at different offsets and select them based + * on a criteria. At the moment this batch always start at the beginning of the page + * and at this point we don't have multiple wa_ctx batch buffers. + * + * The number of WA applied are not known at the beginning; we use this field + * to return the no of DWORDS written. + * + * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END + * so it adds NOOPs as padding to make it cacheline aligned. + * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together + * makes a complete batch buffer. + */ +static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + /* WaDisableCtxRestoreArbitration:bdw,chv */ + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ + if (IS_BROADWELL(engine->i915)) + batch = gen8_emit_flush_coherentl3_wa(engine, batch); + + /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ + /* Actual scratch location is at 128 bytes offset */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_scratch_offset(engine->i915) + + 2 * CACHELINE_BYTES); + + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + /* Pad to end of cacheline */ + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; + + /* + * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because + * execution depends on the length specified in terms of cache lines + * in the register CTX_RCS_INDIRECT_CTX + */ + + return batch; +} + +struct lri { + i915_reg_t reg; + u32 value; +}; + +static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) +{ + GEM_BUG_ON(!count || count > 63); + + *batch++ = MI_LOAD_REGISTER_IMM(count); + do { + *batch++ = i915_mmio_reg_offset(lri->reg); + *batch++ = lri->value; + } while (lri++, --count); + *batch++ = MI_NOOP; + + return batch; +} + +static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + static const struct lri lri[] = { + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ + { + COMMON_SLICE_CHICKEN2, + __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, + 0), + }, + + /* BSpec: 11391 */ + { + FF_SLICE_CHICKEN, + __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, + FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), + }, + + /* BSpec: 11299 */ + { + _3D_CHICKEN3, + __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, + _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), + } + }; + + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ + batch = gen8_emit_flush_coherentl3_wa(engine, batch); + + batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); + + /* WaMediaPoolStateCmdInWABB:bxt,glk */ + if (HAS_POOLED_EU(engine->i915)) { + /* + * EU pool configuration is setup along with golden context + * during context initialization. This value depends on + * device type (2x6 or 3x6) and needs to be updated based + * on which subslice is disabled especially for 2x6 + * devices, however it is safe to load default + * configuration of 3x6 device instead of masking off + * corresponding bits because HW ignores bits of a disabled + * subslice and drops down to appropriate config. Please + * see render_state_setup() in i915_gem_render_state.c for + * possible configurations, to avoid duplication they are + * not shown here again. + */ + *batch++ = GEN9_MEDIA_POOL_STATE; + *batch++ = GEN9_MEDIA_POOL_ENABLE; + *batch++ = 0x00777000; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; + } + + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + /* Pad to end of cacheline */ + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; + + return batch; +} + +static u32 * +gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + int i; + + /* + * WaPipeControlBefore3DStateSamplePattern: cnl + * + * Ensure the engine is idle prior to programming a + * 3DSTATE_SAMPLE_PATTERN during a context restore. + */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL, + 0); + /* + * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for + * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in + * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is + * confusing. Since gen8_emit_pipe_control() already advances the + * batch by 6 dwords, we advance the other 10 here, completing a + * cacheline. It's not clear if the workaround requires this padding + * before other commands, or if it's just the regular padding we would + * already have for the workaround bb, so leave it here for now. + */ + for (i = 0; i < 10; i++) + *batch++ = MI_NOOP; + + /* Pad to end of cacheline */ + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; + + return batch; +} + +#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) + +static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err; + + engine->wa_ctx.vma = vma; + return 0; + +err: + i915_gem_object_put(obj); + return err; +} + +static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) +{ + i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); +} + +typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); + +static int intel_init_workaround_bb(struct intel_engine_cs *engine) +{ + struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; + struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, + &wa_ctx->per_ctx }; + wa_bb_func_t wa_bb_fn[2]; + struct page *page; + void *batch, *batch_ptr; + unsigned int i; + int ret; + + if (GEM_DEBUG_WARN_ON(engine->id != RCS0)) + return -EINVAL; + + switch (INTEL_GEN(engine->i915)) { + case 11: + return 0; + case 10: + wa_bb_fn[0] = gen10_init_indirectctx_bb; + wa_bb_fn[1] = NULL; + break; + case 9: + wa_bb_fn[0] = gen9_init_indirectctx_bb; + wa_bb_fn[1] = NULL; + break; + case 8: + wa_bb_fn[0] = gen8_init_indirectctx_bb; + wa_bb_fn[1] = NULL; + break; + default: + MISSING_CASE(INTEL_GEN(engine->i915)); + return 0; + } + + ret = lrc_setup_wa_ctx(engine); + if (ret) { + DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); + return ret; + } + + page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); + batch = batch_ptr = kmap_atomic(page); + + /* + * Emit the two workaround batch buffers, recording the offset from the + * start of the workaround batch buffer object for each and their + * respective sizes. + */ + for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { + wa_bb[i]->offset = batch_ptr - batch; + if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, + CACHELINE_BYTES))) { + ret = -EINVAL; + break; + } + if (wa_bb_fn[i]) + batch_ptr = wa_bb_fn[i](engine, batch_ptr); + wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); + } + + BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + + kunmap_atomic(batch); + if (ret) + lrc_destroy_wa_ctx(engine); + + return ret; +} + +static void enable_execlists(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ + + if (INTEL_GEN(dev_priv) >= 11) + I915_WRITE(RING_MODE_GEN7(engine), + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + else + I915_WRITE(RING_MODE_GEN7(engine), + _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + + I915_WRITE(RING_MI_MODE(engine->mmio_base), + _MASKED_BIT_DISABLE(STOP_RING)); + + I915_WRITE(RING_HWS_PGA(engine->mmio_base), + i915_ggtt_offset(engine->status_page.vma)); + POSTING_READ(RING_HWS_PGA(engine->mmio_base)); +} + +static bool unexpected_starting_state(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + bool unexpected = false; + + if (I915_READ(RING_MI_MODE(engine->mmio_base)) & STOP_RING) { + DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n"); + unexpected = true; + } + + return unexpected; +} + +static int gen8_init_common_ring(struct intel_engine_cs *engine) +{ + intel_engine_apply_workarounds(engine); + intel_engine_apply_whitelist(engine); + + intel_mocs_init_engine(engine); + + intel_engine_reset_breadcrumbs(engine); + + if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) { + struct drm_printer p = drm_debug_printer(__func__); + + intel_engine_dump(engine, &p, NULL); + } + + enable_execlists(engine); + + return 0; +} + +static void execlists_reset_prepare(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + unsigned long flags; + + GEM_TRACE("%s: depth<-%d\n", engine->name, + atomic_read(&execlists->tasklet.count)); + + /* + * Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its execlists->tasklet *just* as we are + * calling engine->init_hw() and also writing the ELSP. + * Turning off the execlists->tasklet until the reset is over + * prevents the race. + */ + __tasklet_disable_sync_once(&execlists->tasklet); + GEM_BUG_ON(!reset_in_progress(execlists)); + + intel_engine_stop_cs(engine); + + /* And flush any current direct submission. */ + spin_lock_irqsave(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +static bool lrc_regs_ok(const struct i915_request *rq) +{ + const struct intel_ring *ring = rq->ring; + const u32 *regs = rq->hw_context->lrc_reg_state; + + /* Quick spot check for the common signs of context corruption */ + + if (regs[CTX_RING_BUFFER_CONTROL + 1] != + (RING_CTL_SIZE(ring->size) | RING_VALID)) + return false; + + if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma)) + return false; + + return true; +} + +static void reset_csb_pointers(struct intel_engine_execlists *execlists) +{ + const unsigned int reset_value = execlists->csb_size - 1; + + /* + * After a reset, the HW starts writing into CSB entry [0]. We + * therefore have to set our HEAD pointer back one entry so that + * the *first* entry we check is entry 0. To complicate this further, + * as we don't wait for the first interrupt after reset, we have to + * fake the HW write to point back to the last entry so that our + * inline comparison of our cached head position against the last HW + * write works even before the first interrupt. + */ + execlists->csb_head = reset_value; + WRITE_ONCE(*execlists->csb_write, reset_value); + wmb(); /* Make sure this is visible to HW (paranoia?) */ + + invalidate_csb_entries(&execlists->csb_status[0], + &execlists->csb_status[reset_value]); +} + +static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct intel_context *ce; + struct i915_request *rq; + u32 *regs; + + process_csb(engine); /* drain preemption events */ + + /* Following the reset, we need to reload the CSB read/write pointers */ + reset_csb_pointers(&engine->execlists); + + /* + * Save the currently executing context, even if we completed + * its request, it was still running at the time of the + * reset and will have been clobbered. + */ + if (!port_isset(execlists->port)) + goto out_clear; + + ce = port_request(execlists->port)->hw_context; + + /* + * Catch up with any missed context-switch interrupts. + * + * Ideally we would just read the remaining CSB entries now that we + * know the gpu is idle. However, the CSB registers are sometimes^W + * often trashed across a GPU reset! Instead we have to rely on + * guessing the missed context-switch events by looking at what + * requests were completed. + */ + execlists_cancel_port_requests(execlists); + + /* Push back any incomplete requests for replay after the reset. */ + rq = __unwind_incomplete_requests(engine); + if (!rq) + goto out_replay; + + if (rq->hw_context != ce) { /* caught just before a CS event */ + rq = NULL; + goto out_replay; + } + + /* + * If this request hasn't started yet, e.g. it is waiting on a + * semaphore, we need to avoid skipping the request or else we + * break the signaling chain. However, if the context is corrupt + * the request will not restart and we will be stuck with a wedged + * device. It is quite often the case that if we issue a reset + * while the GPU is loading the context image, that the context + * image becomes corrupt. + * + * Otherwise, if we have not started yet, the request should replay + * perfectly and we do not need to flag the result as being erroneous. + */ + if (!i915_request_started(rq) && lrc_regs_ok(rq)) + goto out_replay; + + /* + * If the request was innocent, we leave the request in the ELSP + * and will try to replay it on restarting. The context image may + * have been corrupted by the reset, in which case we may have + * to service a new GPU hang, but more likely we can continue on + * without impact. + * + * If the request was guilty, we presume the context is corrupt + * and have to at least restore the RING register in the context + * image back to the expected values to skip over the guilty request. + */ + i915_reset_request(rq, stalled); + if (!stalled && lrc_regs_ok(rq)) + goto out_replay; + + /* + * We want a simple context + ring to execute the breadcrumb update. + * We cannot rely on the context being intact across the GPU hang, + * so clear it and rebuild just what we need for the breadcrumb. + * All pending requests for this context will be zapped, and any + * future request will be after userspace has had the opportunity + * to recreate its own state. + */ + regs = ce->lrc_reg_state; + if (engine->pinned_default_state) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + } + execlists_init_reg_state(regs, ce, engine, ce->ring); + + /* Rerun the request; its payload has been neutered (if guilty). */ +out_replay: + ce->ring->head = + rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail; + intel_ring_update_space(ce->ring); + __execlists_update_reg_state(ce, engine); + +out_clear: + execlists_clear_all_active(execlists); +} + +static void execlists_reset(struct intel_engine_cs *engine, bool stalled) +{ + unsigned long flags; + + GEM_TRACE("%s\n", engine->name); + + spin_lock_irqsave(&engine->timeline.lock, flags); + + __execlists_reset(engine, stalled); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +static void nop_submission_tasklet(unsigned long data) +{ + /* The driver is wedged; don't process any more events. */ +} + +static void execlists_cancel_requests(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq, *rn; + struct rb_node *rb; + unsigned long flags; + + GEM_TRACE("%s\n", engine->name); + + /* + * Before we call engine->cancel_requests(), we should have exclusive + * access to the submission state. This is arranged for us by the + * caller disabling the interrupt generation, the tasklet and other + * threads that may then access the same state, giving us a free hand + * to reset state. However, we still need to let lockdep be aware that + * we know this state may be accessed in hardirq context, so we + * disable the irq around this manipulation and we want to keep + * the spinlock focused on its duties and not accidentally conflate + * coverage to the submission's irq state. (Similarly, although we + * shouldn't need to disable irq around the manipulation of the + * submission's irq state, we also wish to remind ourselves that + * it is irq state.) + */ + spin_lock_irqsave(&engine->timeline.lock, flags); + + __execlists_reset(engine, true); + + /* Mark all executing requests as skipped. */ + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); + + i915_request_mark_complete(rq); + } + + /* Flush the queued requests to the timeline list (for retiring). */ + while ((rb = rb_first_cached(&execlists->queue))) { + struct i915_priolist *p = to_priolist(rb); + int i; + + priolist_for_each_request_consume(rq, rn, p, i) { + list_del_init(&rq->sched.link); + __i915_request_submit(rq); + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); + } + + rb_erase_cached(&p->node, &execlists->queue); + i915_priolist_free(p); + } + + /* Remaining _unready_ requests will be nop'ed when submitted */ + + execlists->queue_priority_hint = INT_MIN; + execlists->queue = RB_ROOT_CACHED; + GEM_BUG_ON(port_isset(execlists->port)); + + GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); + execlists->tasklet.func = nop_submission_tasklet; + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +static void execlists_reset_finish(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + /* + * After a GPU reset, we may have requests to replay. Do so now while + * we still have the forcewake to be sure that the GPU is not allowed + * to sleep before we restart and reload a context. + */ + GEM_BUG_ON(!reset_in_progress(execlists)); + if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) + execlists->tasklet.func(execlists->tasklet.data); + + if (__tasklet_enable(&execlists->tasklet)) + /* And kick in case we missed a new request submission. */ + tasklet_hi_schedule(&execlists->tasklet); + GEM_TRACE("%s: depth->%d\n", engine->name, + atomic_read(&execlists->tasklet.count)); +} + +static int gen8_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * WaDisableCtxRestoreArbitration:bdw,chv + * + * We don't need to perform MI_ARB_ENABLE as often as we do (in + * particular all the gen that do not need the w/a at all!), if we + * took care to make sure that on every switch into this context + * (both ordinary and for preemption) that arbitrartion was enabled + * we would be fine. However, for gen8 there is another w/a that + * requires us to not preempt inside GPGPU execution, so we keep + * arbitration disabled for gen8 batches. Arbitration will be + * re-enabled before we close the request + * (engine->emit_fini_breadcrumb). + */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + /* FIXME(BDW+): Address space and security selectors. */ + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int gen9_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, + ~(engine->irq_enable_mask | engine->irq_keep_mask)); + ENGINE_POSTING_READ(engine, RING_IMR); +} + +static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); +} + +static int gen8_emit_flush(struct i915_request *request, u32 mode) +{ + u32 cmd, *cs; + + cs = intel_ring_begin(request, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_FLUSH_DW + 1; + + /* We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + if (mode & EMIT_INVALIDATE) { + cmd |= MI_INVALIDATE_TLB; + if (request->engine->class == VIDEO_DECODE_CLASS) + cmd |= MI_INVALIDATE_BSD; + } + + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + intel_ring_advance(request, cs); + + return 0; +} + +static int gen8_emit_flush_render(struct i915_request *request, + u32 mode) +{ + struct intel_engine_cs *engine = request->engine; + u32 scratch_addr = + i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES; + bool vf_flush_wa = false, dc_flush_wa = false; + u32 *cs, flags = 0; + int len; + + flags |= PIPE_CONTROL_CS_STALL; + + if (mode & EMIT_FLUSH) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + } + + if (mode & EMIT_INVALIDATE) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + /* + * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL + * pipe control. + */ + if (IS_GEN(request->i915, 9)) + vf_flush_wa = true; + + /* WaForGAMHang:kbl */ + if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0)) + dc_flush_wa = true; + } + + len = 6; + + if (vf_flush_wa) + len += 6; + + if (dc_flush_wa) + len += 12; + + cs = intel_ring_begin(request, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (vf_flush_wa) + cs = gen8_emit_pipe_control(cs, 0, 0); + + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); + + intel_ring_advance(request, cs); + + return 0; +} + +/* + * Reserve space for 2 NOOPs at the end of each request to be + * used as a workaround for not being allowed to do lite + * restore with HEAD==TAIL (WaIdleLiteRestore). + */ +static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) +{ + /* Ensure there's always at least one preemption point per-request. */ + *cs++ = MI_ARB_CHECK; + *cs++ = MI_NOOP; + request->wa_tail = intel_ring_offset(request, cs); + + return cs; +} + +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + request->timeline->hwsp_offset, + 0); + + cs = gen8_emit_ggtt_write(cs, + intel_engine_next_hangcheck_seqno(request->engine), + I915_GEM_HWS_HANGCHECK_ADDR, + MI_FLUSH_DW_STORE_INDEX); + + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + request->tail = intel_ring_offset(request, cs); + assert_ring_tail_valid(request->ring, request->tail); + + return gen8_emit_wa_tail(request, cs); +} + +static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + request->timeline->hwsp_offset, + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); + + cs = gen8_emit_ggtt_write_rcs(cs, + intel_engine_next_hangcheck_seqno(request->engine), + I915_GEM_HWS_HANGCHECK_ADDR, + PIPE_CONTROL_STORE_DATA_INDEX); + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + request->tail = intel_ring_offset(request, cs); + assert_ring_tail_valid(request->ring, request->tail); + + return gen8_emit_wa_tail(request, cs); +} + +static int gen8_init_rcs_context(struct i915_request *rq) +{ + int ret; + + ret = intel_engine_emit_ctx_wa(rq); + if (ret) + return ret; + + ret = intel_rcs_context_init_mocs(rq); + /* + * Failing to program the MOCS is non-fatal.The system will not + * run at peak performance. So generate an error and carry on. + */ + if (ret) + DRM_ERROR("MOCS failed to program: expect performance issues.\n"); + + return i915_gem_render_state_emit(rq); +} + +/** + * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer + * @engine: Engine Command Streamer. + */ +void intel_logical_ring_cleanup(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv; + + /* + * Tasklet cannot be active at this point due intel_mark_active/idle + * so this is just for documentation. + */ + if (WARN_ON(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state))) + tasklet_kill(&engine->execlists.tasklet); + + dev_priv = engine->i915; + + if (engine->buffer) { + WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + } + + if (engine->cleanup) + engine->cleanup(engine); + + intel_engine_cleanup_common(engine); + + lrc_destroy_wa_ctx(engine); + + engine->i915 = NULL; + dev_priv->engine[engine->id] = NULL; + kfree(engine); +} + +void intel_execlists_set_default_submission(struct intel_engine_cs *engine) +{ + engine->submit_request = execlists_submit_request; + engine->cancel_requests = execlists_cancel_requests; + engine->schedule = i915_schedule; + engine->execlists.tasklet.func = execlists_submission_tasklet; + + engine->reset.prepare = execlists_reset_prepare; + engine->reset.reset = execlists_reset; + engine->reset.finish = execlists_reset_finish; + + engine->park = NULL; + engine->unpark = NULL; + + engine->flags |= I915_ENGINE_SUPPORTS_STATS; + if (!intel_vgpu_active(engine->i915)) + engine->flags |= I915_ENGINE_HAS_SEMAPHORES; + if (engine->preempt_context && + HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + engine->flags |= I915_ENGINE_HAS_PREEMPTION; +} + +static void +logical_ring_default_vfuncs(struct intel_engine_cs *engine) +{ + /* Default vfuncs which can be overriden by each engine. */ + engine->init_hw = gen8_init_common_ring; + + engine->reset.prepare = execlists_reset_prepare; + engine->reset.reset = execlists_reset; + engine->reset.finish = execlists_reset_finish; + + engine->cops = &execlists_context_ops; + engine->request_alloc = execlists_request_alloc; + + engine->emit_flush = gen8_emit_flush; + engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; + + engine->set_default_submission = intel_execlists_set_default_submission; + + if (INTEL_GEN(engine->i915) < 11) { + engine->irq_enable = gen8_logical_ring_enable_irq; + engine->irq_disable = gen8_logical_ring_disable_irq; + } else { + /* + * TODO: On Gen11 interrupt masks need to be clear + * to allow C6 entry. Keep interrupts enabled at + * and take the hit of generating extra interrupts + * until a more refined solution exists. + */ + } + if (IS_GEN(engine->i915, 8)) + engine->emit_bb_start = gen8_emit_bb_start; + else + engine->emit_bb_start = gen9_emit_bb_start; +} + +static inline void +logical_ring_default_irqs(struct intel_engine_cs *engine) +{ + unsigned int shift = 0; + + if (INTEL_GEN(engine->i915) < 11) { + const u8 irq_shifts[] = { + [RCS0] = GEN8_RCS_IRQ_SHIFT, + [BCS0] = GEN8_BCS_IRQ_SHIFT, + [VCS0] = GEN8_VCS0_IRQ_SHIFT, + [VCS1] = GEN8_VCS1_IRQ_SHIFT, + [VECS0] = GEN8_VECS_IRQ_SHIFT, + }; + + shift = irq_shifts[engine->id]; + } + + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; + engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; +} + +static int +logical_ring_setup(struct intel_engine_cs *engine) +{ + int err; + + err = intel_engine_setup_common(engine); + if (err) + return err; + + /* Intentionally left blank. */ + engine->buffer = NULL; + + tasklet_init(&engine->execlists.tasklet, + execlists_submission_tasklet, (unsigned long)engine); + + logical_ring_default_vfuncs(engine); + logical_ring_default_irqs(engine); + + return 0; +} + +static int logical_ring_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct intel_engine_execlists * const execlists = &engine->execlists; + u32 base = engine->mmio_base; + int ret; + + ret = intel_engine_init_common(engine); + if (ret) + return ret; + + intel_engine_init_workarounds(engine); + + if (HAS_LOGICAL_RING_ELSQ(i915)) { + execlists->submit_reg = i915->uncore.regs + + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); + execlists->ctrl_reg = i915->uncore.regs + + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); + } else { + execlists->submit_reg = i915->uncore.regs + + i915_mmio_reg_offset(RING_ELSP(base)); + } + + execlists->preempt_complete_status = ~0u; + if (engine->preempt_context) + execlists->preempt_complete_status = + upper_32_bits(engine->preempt_context->lrc_desc); + + execlists->csb_status = + &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; + + execlists->csb_write = + &engine->status_page.addr[intel_hws_csb_write_index(i915)]; + + if (INTEL_GEN(engine->i915) < 11) + execlists->csb_size = GEN8_CSB_ENTRIES; + else + execlists->csb_size = GEN11_CSB_ENTRIES; + + reset_csb_pointers(execlists); + + return 0; +} + +int logical_render_ring_init(struct intel_engine_cs *engine) +{ + int ret; + + ret = logical_ring_setup(engine); + if (ret) + return ret; + + /* Override some for render ring. */ + engine->init_context = gen8_init_rcs_context; + engine->emit_flush = gen8_emit_flush_render; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + + ret = logical_ring_init(engine); + if (ret) + return ret; + + ret = intel_init_workaround_bb(engine); + if (ret) { + /* + * We continue even if we fail to initialize WA batch + * because we only expect rare glitches but nothing + * critical to prevent us from using GPU + */ + DRM_ERROR("WA batch buffer initialization failed: %d\n", + ret); + } + + intel_engine_init_whitelist(engine); + + return 0; +} + +int logical_xcs_ring_init(struct intel_engine_cs *engine) +{ + int err; + + err = logical_ring_setup(engine); + if (err) + return err; + + return logical_ring_init(engine); +} + +static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) +{ + u32 indirect_ctx_offset; + + switch (INTEL_GEN(engine->i915)) { + default: + MISSING_CASE(INTEL_GEN(engine->i915)); + /* fall through */ + case 11: + indirect_ctx_offset = + GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; + case 10: + indirect_ctx_offset = + GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; + case 9: + indirect_ctx_offset = + GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; + case 8: + indirect_ctx_offset = + GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; + } + + return indirect_ctx_offset; +} + +static void execlists_init_reg_state(u32 *regs, + struct intel_context *ce, + struct intel_engine_cs *engine, + struct intel_ring *ring) +{ + struct i915_hw_ppgtt *ppgtt = ce->gem_context->ppgtt; + bool rcs = engine->class == RENDER_CLASS; + u32 base = engine->mmio_base; + + /* A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + */ + regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | + MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)); + if (INTEL_GEN(engine->i915) < 11) { + regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + } + CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); + CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), + RING_CTL_SIZE(ring->size) | RING_VALID); + CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); + CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); + CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); + CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); + CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); + CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); + if (rcs) { + struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; + + CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, + RING_INDIRECT_CTX_OFFSET(base), 0); + if (wa_ctx->indirect_ctx.size) { + u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + + regs[CTX_RCS_INDIRECT_CTX + 1] = + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); + + regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = + intel_lr_indirect_ctx_offset(engine) << 6; + } + + CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); + if (wa_ctx->per_ctx.size) { + u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + + regs[CTX_BB_PER_CTX_PTR + 1] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; + } + } + + regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); + /* PDP values well be assigned later if needed */ + CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); + CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); + CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0); + CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0); + CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0); + CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0); + CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0); + CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0); + + if (i915_vm_is_4lvl(&ppgtt->vm)) { + /* 64b PPGTT (48bit canonical) + * PDP0_DESCRIPTOR contains the base address to PML4 and + * other PDP Descriptors are ignored. + */ + ASSIGN_CTX_PML4(ppgtt, regs); + } else { + ASSIGN_CTX_PDP(ppgtt, regs, 3); + ASSIGN_CTX_PDP(ppgtt, regs, 2); + ASSIGN_CTX_PDP(ppgtt, regs, 1); + ASSIGN_CTX_PDP(ppgtt, regs, 0); + } + + if (rcs) { + regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); + + i915_oa_init_reg_state(engine, ce, regs); + } + + regs[CTX_END] = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + regs[CTX_END] |= BIT(0); +} + +static int +populate_lr_context(struct intel_context *ce, + struct drm_i915_gem_object *ctx_obj, + struct intel_engine_cs *engine, + struct intel_ring *ring) +{ + void *vaddr; + u32 *regs; + int ret; + + vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); + return ret; + } + + if (engine->default_state) { + /* + * We only want to copy over the template context state; + * skipping over the headers reserved for GuC communication, + * leaving those as zero. + */ + const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) { + ret = PTR_ERR(defaults); + goto err_unpin_ctx; + } + + memcpy(vaddr + start, defaults + start, engine->context_size); + i915_gem_object_unpin_map(engine->default_state); + } + + /* The second page of the context object contains some fields which must + * be set up prior to the first execution. */ + regs = vaddr + LRC_STATE_PN * PAGE_SIZE; + execlists_init_reg_state(regs, ce, engine, ring); + if (!engine->default_state) + regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (ce->gem_context == engine->i915->preempt_context && + INTEL_GEN(engine->i915) < 11) + regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); + + ret = 0; +err_unpin_ctx: + __i915_gem_object_flush_map(ctx_obj, + LRC_HEADER_PAGES * PAGE_SIZE, + engine->context_size); + i915_gem_object_unpin_map(ctx_obj); + return ret; +} + +static struct i915_timeline *get_timeline(struct i915_gem_context *ctx) +{ + if (ctx->timeline) + return i915_timeline_get(ctx->timeline); + else + return i915_timeline_create(ctx->i915, NULL); +} + +static int execlists_context_deferred_alloc(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *ctx_obj; + struct i915_vma *vma; + u32 context_size; + struct intel_ring *ring; + struct i915_timeline *timeline; + int ret; + + if (ce->state) + return 0; + + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); + + /* + * Before the actual start of the context image, we insert a few pages + * for our own use and for sharing with the GuC. + */ + context_size += LRC_HEADER_PAGES * PAGE_SIZE; + + ctx_obj = i915_gem_object_create(engine->i915, context_size); + if (IS_ERR(ctx_obj)) + return PTR_ERR(ctx_obj); + + vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto error_deref_obj; + } + + timeline = get_timeline(ce->gem_context); + if (IS_ERR(timeline)) { + ret = PTR_ERR(timeline); + goto error_deref_obj; + } + + ring = intel_engine_create_ring(engine, + timeline, + ce->gem_context->ring_size); + i915_timeline_put(timeline); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); + goto error_deref_obj; + } + + ret = populate_lr_context(ce, ctx_obj, engine, ring); + if (ret) { + DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); + goto error_ring_free; + } + + ce->ring = ring; + ce->state = vma; + + return 0; + +error_ring_free: + intel_ring_put(ring); +error_deref_obj: + i915_gem_object_put(ctx_obj); + return ret; +} + +void intel_execlists_show_requests(struct intel_engine_cs *engine, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + struct i915_request *rq, + const char *prefix), + unsigned int max) +{ + const struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_request *rq, *last; + unsigned long flags; + unsigned int count; + struct rb_node *rb; + + spin_lock_irqsave(&engine->timeline.lock, flags); + + last = NULL; + count = 0; + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (count++ < max - 1) + show_request(m, rq, "\t\tE "); + else + last = rq; + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d executing requests...\n", + count - max); + } + show_request(m, last, "\t\tE "); + } + + last = NULL; + count = 0; + if (execlists->queue_priority_hint != INT_MIN) + drm_printf(m, "\t\tQueue priority hint: %d\n", + execlists->queue_priority_hint); + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + int i; + + priolist_for_each_request(rq, p, i) { + if (count++ < max - 1) + show_request(m, rq, "\t\tQ "); + else + last = rq; + } + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d queued requests...\n", + count - max); + } + show_request(m, last, "\t\tQ "); + } + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +void intel_lr_context_reset(struct intel_engine_cs *engine, + struct intel_context *ce, + u32 head, + bool scrub) +{ + /* + * We want a simple context + ring to execute the breadcrumb update. + * We cannot rely on the context being intact across the GPU hang, + * so clear it and rebuild just what we need for the breadcrumb. + * All pending requests for this context will be zapped, and any + * future request will be after userspace has had the opportunity + * to recreate its own state. + */ + if (scrub) { + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + } + execlists_init_reg_state(regs, ce, engine, ce->ring); + } + + /* Rerun the request; its payload has been neutered (if guilty). */ + ce->ring->head = head; + intel_ring_update_space(ce->ring); + + __execlists_update_reg_state(ce, engine); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_lrc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h new file mode 100644 index 000000000000..1a33ec74af8c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -0,0 +1,116 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _INTEL_LRC_H_ +#define _INTEL_LRC_H_ + +#include "intel_engine.h" + +/* Execlists regs */ +#define RING_ELSP(base) _MMIO((base) + 0x230) +#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234) +#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4) +#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244) +#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) +#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) +#define CTX_CTRL_RS_CTX_ENABLE (1 << 1) +#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) +#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) +#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) + +#define EL_CTRL_LOAD (1 << 0) + +/* The docs specify that the write pointer wraps around after 5h, "After status + * is written out to the last available status QW at offset 5h, this pointer + * wraps to 0." + * + * Therefore, one must infer than even though there are 3 bits available, 6 and + * 7 appear to be * reserved. + */ +#define GEN8_CSB_ENTRIES 6 +#define GEN8_CSB_PTR_MASK 0x7 +#define GEN8_CSB_READ_PTR_MASK (GEN8_CSB_PTR_MASK << 8) +#define GEN8_CSB_WRITE_PTR_MASK (GEN8_CSB_PTR_MASK << 0) + +#define GEN11_CSB_ENTRIES 12 +#define GEN11_CSB_PTR_MASK 0xf +#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8) +#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) + +enum { + INTEL_CONTEXT_SCHEDULE_IN = 0, + INTEL_CONTEXT_SCHEDULE_OUT, + INTEL_CONTEXT_SCHEDULE_PREEMPTED, +}; + +/* Logical Rings */ +void intel_logical_ring_cleanup(struct intel_engine_cs *engine); +int logical_render_ring_init(struct intel_engine_cs *engine); +int logical_xcs_ring_init(struct intel_engine_cs *engine); + +/* Logical Ring Contexts */ + +/* + * We allocate a header at the start of the context image for our own + * use, therefore the actual location of the logical state is offset + * from the start of the VMA. The layout is + * + * | [guc] | [hwsp] [logical state] | + * |<- our header ->|<- context image ->| + * + */ +/* The first page is used for sharing data with the GuC */ +#define LRC_GUCSHR_PN (0) +#define LRC_GUCSHR_SZ (1) +/* At the start of the context image is its per-process HWS page */ +#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ) +#define LRC_PPHWSP_SZ (1) +/* Finally we have the logical state for the context */ +#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) + +/* + * Currently we include the PPHWSP in __intel_engine_context_size() so + * the size of the header is synonymous with the start of the PPHWSP. + */ +#define LRC_HEADER_PAGES LRC_PPHWSP_PN + +struct drm_printer; + +struct drm_i915_private; + +void intel_execlists_set_default_submission(struct intel_engine_cs *engine); + +void intel_lr_context_reset(struct intel_engine_cs *engine, + struct intel_context *ce, + u32 head, + bool scrub); + +void intel_execlists_show_requests(struct intel_engine_cs *engine, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + struct i915_request *rq, + const char *prefix), + unsigned int max); + +#endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h new file mode 100644 index 000000000000..5ef932d810a7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -0,0 +1,68 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef _INTEL_LRC_REG_H_ +#define _INTEL_LRC_REG_H_ + +#include + +/* GEN8+ Reg State Context */ +#define CTX_LRI_HEADER_0 0x01 +#define CTX_CONTEXT_CONTROL 0x02 +#define CTX_RING_HEAD 0x04 +#define CTX_RING_TAIL 0x06 +#define CTX_RING_BUFFER_START 0x08 +#define CTX_RING_BUFFER_CONTROL 0x0a +#define CTX_BB_HEAD_U 0x0c +#define CTX_BB_HEAD_L 0x0e +#define CTX_BB_STATE 0x10 +#define CTX_SECOND_BB_HEAD_U 0x12 +#define CTX_SECOND_BB_HEAD_L 0x14 +#define CTX_SECOND_BB_STATE 0x16 +#define CTX_BB_PER_CTX_PTR 0x18 +#define CTX_RCS_INDIRECT_CTX 0x1a +#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c +#define CTX_LRI_HEADER_1 0x21 +#define CTX_CTX_TIMESTAMP 0x22 +#define CTX_PDP3_UDW 0x24 +#define CTX_PDP3_LDW 0x26 +#define CTX_PDP2_UDW 0x28 +#define CTX_PDP2_LDW 0x2a +#define CTX_PDP1_UDW 0x2c +#define CTX_PDP1_LDW 0x2e +#define CTX_PDP0_UDW 0x30 +#define CTX_PDP0_LDW 0x32 +#define CTX_LRI_HEADER_2 0x41 +#define CTX_R_PWR_CLK_STATE 0x42 +#define CTX_END 0x44 + +#define CTX_REG(reg_state, pos, reg, val) do { \ + u32 *reg_state__ = (reg_state); \ + const u32 pos__ = (pos); \ + (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \ + (reg_state__)[(pos__) + 1] = (val); \ +} while (0) + +#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ + u32 *reg_state__ = (reg_state); \ + const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ + (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \ +} while (0) + +#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ + u32 *reg_state__ = (reg_state); \ + const u64 addr__ = px_dma(&ppgtt->pml4); \ + (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ +} while (0) + +#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 +#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 +#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 +#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A + +#endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c new file mode 100644 index 000000000000..79df66022d3a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "i915_drv.h" + +#include "intel_engine.h" +#include "intel_mocs.h" +#include "intel_lrc.h" + +/* structures required */ +struct drm_i915_mocs_entry { + u32 control_value; + u16 l3cc_value; + u16 used; +}; + +struct drm_i915_mocs_table { + unsigned int size; + unsigned int n_entries; + const struct drm_i915_mocs_entry *table; +}; + +/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ +#define _LE_CACHEABILITY(value) ((value) << 0) +#define _LE_TGT_CACHE(value) ((value) << 2) +#define LE_LRUM(value) ((value) << 4) +#define LE_AOM(value) ((value) << 6) +#define LE_RSC(value) ((value) << 7) +#define LE_SCC(value) ((value) << 8) +#define LE_PFM(value) ((value) << 11) +#define LE_SCF(value) ((value) << 14) +#define LE_COS(value) ((value) << 15) +#define LE_SSE(value) ((value) << 17) + +/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ +#define L3_ESC(value) ((value) << 0) +#define L3_SCC(value) ((value) << 1) +#define _L3_CACHEABILITY(value) ((value) << 4) + +/* Helper defines */ +#define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ +#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ + +/* (e)LLC caching options */ +#define LE_0_PAGETABLE _LE_CACHEABILITY(0) +#define LE_1_UC _LE_CACHEABILITY(1) +#define LE_2_WT _LE_CACHEABILITY(2) +#define LE_3_WB _LE_CACHEABILITY(3) + +/* Target cache */ +#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) +#define LE_TC_1_LLC _LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) + +/* L3 caching options */ +#define L3_0_DIRECT _L3_CACHEABILITY(0) +#define L3_1_UC _L3_CACHEABILITY(1) +#define L3_2_RESERVED _L3_CACHEABILITY(2) +#define L3_3_WB _L3_CACHEABILITY(3) + +#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ + [__idx] = { \ + .control_value = __control_value, \ + .l3cc_value = __l3cc_value, \ + .used = 1, \ + } + +/* + * MOCS tables + * + * These are the MOCS tables that are programmed across all the rings. + * The control value is programmed to all the rings that support the + * MOCS registers. While the l3cc_values are only programmed to the + * LNCFCMOCS0 - LNCFCMOCS32 registers. + * + * These tables are intended to be kept reasonably consistent across + * HW platforms, and for ICL+, be identical across OSes. To achieve + * that, for Icelake and above, list of entries is published as part + * of bspec. + * + * Entries not part of the following tables are undefined as far as + * userspace is concerned and shouldn't be relied upon. For the time + * being they will be initialized to PTE. + * + * The last two entries are reserved by the hardware. For ICL+ they + * should be initialized according to bspec and never used, for older + * platforms they should never be written to. + * + * NOTE: These tables are part of bspec and defined as part of hardware + * interface for ICL+. For older platforms, they are part of kernel + * ABI. It is expected that, for specific hardware platform, existing + * entries will remain constant and the table will only be updated by + * adding new entries, filling unused positions. + */ +#define GEN9_MOCS_ENTRIES \ + MOCS_ENTRY(I915_MOCS_UNCACHED, \ + LE_1_UC | LE_TC_2_LLC_ELLC, \ + L3_1_UC), \ + MOCS_ENTRY(I915_MOCS_PTE, \ + LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ + L3_3_WB) + +static const struct drm_i915_mocs_entry skylake_mocs_table[] = { + GEN9_MOCS_ENTRIES, + MOCS_ENTRY(I915_MOCS_CACHED, + LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), + L3_3_WB) +}; + +/* NOTE: the LE_TGT_CACHE is not used on Broxton */ +static const struct drm_i915_mocs_entry broxton_mocs_table[] = { + GEN9_MOCS_ENTRIES, + MOCS_ENTRY(I915_MOCS_CACHED, + LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), + L3_3_WB) +}; + +#define GEN11_MOCS_ENTRIES \ + /* Base - Uncached (Deprecated) */ \ + MOCS_ENTRY(I915_MOCS_UNCACHED, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 + LeCC:PAT (Deprecated) */ \ + MOCS_ENTRY(I915_MOCS_PTE, \ + LE_0_PAGETABLE | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - L3 + LLC */ \ + MOCS_ENTRY(2, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_3_WB), \ + /* Base - Uncached */ \ + MOCS_ENTRY(3, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 */ \ + MOCS_ENTRY(4, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - LLC */ \ + MOCS_ENTRY(5, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* Age 0 - LLC */ \ + MOCS_ENTRY(6, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_1_UC), \ + /* Age 0 - L3 + LLC */ \ + MOCS_ENTRY(7, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_3_WB), \ + /* Age: Don't Chg. - LLC */ \ + MOCS_ENTRY(8, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_1_UC), \ + /* Age: Don't Chg. - L3 + LLC */ \ + MOCS_ENTRY(9, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_3_WB), \ + /* No AOM - LLC */ \ + MOCS_ENTRY(10, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM - L3 + LLC */ \ + MOCS_ENTRY(11, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age 0 - LLC */ \ + MOCS_ENTRY(12, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age 0 - L3 + LLC */ \ + MOCS_ENTRY(13, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age:DC - LLC */ \ + MOCS_ENTRY(14, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age:DC - L3 + LLC */ \ + MOCS_ENTRY(15, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_3_WB), \ + /* Self-Snoop - L3 + LLC */ \ + MOCS_ENTRY(18, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(12.5%) */ \ + MOCS_ENTRY(19, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(25%) */ \ + MOCS_ENTRY(20, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(50%) */ \ + MOCS_ENTRY(21, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(75%) */ \ + MOCS_ENTRY(22, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(87.5%) */ \ + MOCS_ENTRY(23, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ + L3_3_WB), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(62, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(63, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC) + +static const struct drm_i915_mocs_entry icelake_mocs_table[] = { + GEN11_MOCS_ENTRIES +}; + +/** + * get_mocs_settings() + * @dev_priv: i915 device. + * @table: Output table that will be made to point at appropriate + * MOCS values for the device. + * + * This function will return the values of the MOCS table that needs to + * be programmed for the platform. It will return the values that need + * to be programmed and if they need to be programmed. + * + * Return: true if there are applicable MOCS settings for the device. + */ +static bool get_mocs_settings(struct drm_i915_private *dev_priv, + struct drm_i915_mocs_table *table) +{ + bool result = false; + + if (INTEL_GEN(dev_priv) >= 11) { + table->size = ARRAY_SIZE(icelake_mocs_table); + table->table = icelake_mocs_table; + table->n_entries = GEN11_NUM_MOCS_ENTRIES; + result = true; + } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + table->size = ARRAY_SIZE(skylake_mocs_table); + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->table = skylake_mocs_table; + result = true; + } else if (IS_GEN9_LP(dev_priv)) { + table->size = ARRAY_SIZE(broxton_mocs_table); + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->table = broxton_mocs_table; + result = true; + } else { + WARN_ONCE(INTEL_GEN(dev_priv) >= 9, + "Platform that should have a MOCS table does not.\n"); + } + + /* WaDisableSkipCaching:skl,bxt,kbl,glk */ + if (IS_GEN(dev_priv, 9)) { + int i; + + for (i = 0; i < table->size; i++) + if (WARN_ON(table->table[i].l3cc_value & + (L3_ESC(1) | L3_SCC(0x7)))) + return false; + } + + return result; +} + +static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) +{ + switch (engine_id) { + case RCS0: + return GEN9_GFX_MOCS(index); + case VCS0: + return GEN9_MFX0_MOCS(index); + case BCS0: + return GEN9_BLT_MOCS(index); + case VECS0: + return GEN9_VEBOX_MOCS(index); + case VCS1: + return GEN9_MFX1_MOCS(index); + case VCS2: + return GEN11_MFX2_MOCS(index); + default: + MISSING_CASE(engine_id); + return INVALID_MMIO_REG; + } +} + +/* + * Get control_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u32 get_entry_control(const struct drm_i915_mocs_table *table, + unsigned int index) +{ + if (table->table[index].used) + return table->table[index].control_value; + + return table->table[I915_MOCS_PTE].control_value; +} + +/** + * intel_mocs_init_engine() - emit the mocs control table + * @engine: The engine for whom to emit the registers. + * + * This function simply emits a MI_LOAD_REGISTER_IMM command for the + * given table starting at the given address. + */ +void intel_mocs_init_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_mocs_table table; + unsigned int index; + u32 unused_value; + + if (!get_mocs_settings(dev_priv, &table)) + return; + + /* Set unused values to PTE */ + unused_value = table.table[I915_MOCS_PTE].control_value; + + for (index = 0; index < table.size; index++) { + u32 value = get_entry_control(&table, index); + + I915_WRITE(mocs_register(engine->id, index), value); + } + + /* All remaining entries are also unused */ + for (; index < table.n_entries; index++) + I915_WRITE(mocs_register(engine->id, index), unused_value); +} + +/** + * emit_mocs_control_table() - emit the mocs control table + * @rq: Request to set up the MOCS table for. + * @table: The values to program into the control regs. + * + * This function simply emits a MI_LOAD_REGISTER_IMM command for the + * given table starting at the given address. + * + * Return: 0 on success, otherwise the error status. + */ +static int emit_mocs_control_table(struct i915_request *rq, + const struct drm_i915_mocs_table *table) +{ + enum intel_engine_id engine = rq->engine->id; + unsigned int index; + u32 unused_value; + u32 *cs; + + if (GEM_WARN_ON(table->size > table->n_entries)) + return -ENODEV; + + /* Set unused values to PTE */ + unused_value = table->table[I915_MOCS_PTE].control_value; + + cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); + + for (index = 0; index < table->size; index++) { + u32 value = get_entry_control(table, index); + + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = value; + } + + /* All remaining entries are also unused */ + for (; index < table->n_entries; index++) { + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = unused_value; + } + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +/* + * Get l3cc_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, + unsigned int index) +{ + if (table->table[index].used) + return table->table[index].l3cc_value; + + return table->table[I915_MOCS_PTE].l3cc_value; +} + +static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, + u16 low, + u16 high) +{ + return low | high << 16; +} + +/** + * emit_mocs_l3cc_table() - emit the mocs control table + * @rq: Request to set up the MOCS table for. + * @table: The values to program into the control regs. + * + * This function simply emits a MI_LOAD_REGISTER_IMM command for the + * given table starting at the given address. This register set is + * programmed in pairs. + * + * Return: 0 on success, otherwise the error status. + */ +static int emit_mocs_l3cc_table(struct i915_request *rq, + const struct drm_i915_mocs_table *table) +{ + u16 unused_value; + unsigned int i; + u32 *cs; + + if (GEM_WARN_ON(table->size > table->n_entries)) + return -ENODEV; + + /* Set unused values to PTE */ + unused_value = table->table[I915_MOCS_PTE].l3cc_value; + + cs = intel_ring_begin(rq, 2 + table->n_entries); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); + + for (i = 0; i < table->size / 2; i++) { + u16 low = get_entry_l3cc(table, 2 * i); + u16 high = get_entry_l3cc(table, 2 * i + 1); + + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, low, high); + } + + /* Odd table size - 1 left over */ + if (table->size & 0x01) { + u16 low = get_entry_l3cc(table, 2 * i); + + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, low, unused_value); + i++; + } + + /* All remaining entries are also unused */ + for (; i < table->n_entries / 2; i++) { + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, unused_value, unused_value); + } + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +/** + * intel_mocs_init_l3cc_table() - program the mocs control table + * @dev_priv: i915 device private + * + * This function simply programs the mocs registers for the given table + * starting at the given address. This register set is programmed in pairs. + * + * These registers may get programmed more than once, it is simpler to + * re-program 32 registers than maintain the state of when they were programmed. + * We are always reprogramming with the same values and this only on context + * start. + * + * Return: Nothing. + */ +void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) +{ + struct drm_i915_mocs_table table; + unsigned int i; + u16 unused_value; + + if (!get_mocs_settings(dev_priv, &table)) + return; + + /* Set unused values to PTE */ + unused_value = table.table[I915_MOCS_PTE].l3cc_value; + + for (i = 0; i < table.size / 2; i++) { + u16 low = get_entry_l3cc(&table, 2 * i); + u16 high = get_entry_l3cc(&table, 2 * i + 1); + + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, low, high)); + } + + /* Odd table size - 1 left over */ + if (table.size & 0x01) { + u16 low = get_entry_l3cc(&table, 2 * i); + + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, low, unused_value)); + i++; + } + + /* All remaining entries are also unused */ + for (; i < table.n_entries / 2; i++) + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, unused_value, unused_value)); +} + +/** + * intel_rcs_context_init_mocs() - program the MOCS register. + * @rq: Request to set up the MOCS tables for. + * + * This function will emit a batch buffer with the values required for + * programming the MOCS register values for all the currently supported + * rings. + * + * These registers are partially stored in the RCS context, so they are + * emitted at the same time so that when a context is created these registers + * are set up. These registers have to be emitted into the start of the + * context as setting the ELSP will re-init some of these registers back + * to the hw values. + * + * Return: 0 on success, otherwise the error status. + */ +int intel_rcs_context_init_mocs(struct i915_request *rq) +{ + struct drm_i915_mocs_table t; + int ret; + + if (get_mocs_settings(rq->i915, &t)) { + /* Program the RCS control registers */ + ret = emit_mocs_control_table(rq, &t); + if (ret) + return ret; + + /* Now program the l3cc registers */ + ret = emit_mocs_l3cc_table(rq, &t); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h new file mode 100644 index 000000000000..0913704a1af2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INTEL_MOCS_H +#define INTEL_MOCS_H + +/** + * DOC: Memory Objects Control State (MOCS) + * + * Motivation: + * In previous Gens the MOCS settings was a value that was set by user land as + * part of the batch. In Gen9 this has changed to be a single table (per ring) + * that all batches now reference by index instead of programming the MOCS + * directly. + * + * The one wrinkle in this is that only PART of the MOCS tables are included + * in context (The GFX_MOCS_0 - GFX_MOCS_64 and the LNCFCMOCS0 - LNCFCMOCS32 + * registers). The rest are not (the settings for the other rings). + * + * This table needs to be set at system start-up because the way the table + * interacts with the contexts and the GmmLib interface. + * + * + * Implementation: + * + * The tables (one per supported platform) are defined in intel_mocs.c + * and are programmed in the first batch after the context is loaded + * (with the hardware workarounds). This will then let the usual + * context handling keep the MOCS in step. + */ + +struct drm_i915_private; +struct i915_request; +struct intel_engine_cs; + +int intel_rcs_context_init_mocs(struct i915_request *rq); +void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); +void intel_mocs_init_engine(struct intel_engine_cs *engine); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c new file mode 100644 index 000000000000..7db498567843 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -0,0 +1,1471 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2018 Intel Corporation + */ + +#include +#include + +#include "i915_drv.h" +#include "i915_gpu_error.h" +#include "intel_reset.h" + +#include "intel_guc.h" + +#define RESET_MAX_RETRIES 3 + +/* XXX How to handle concurrent GGTT updates using tiling registers? */ +#define RESET_UNDER_STOP_MACHINE 0 + +static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) +{ + intel_uncore_rmw(uncore, reg, 0, set); +} + +static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) +{ + intel_uncore_rmw(uncore, reg, clr, 0); +} + +static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set) +{ + intel_uncore_rmw_fw(uncore, reg, 0, set); +} + +static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) +{ + intel_uncore_rmw_fw(uncore, reg, clr, 0); +} + +static void engine_skip_context(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_gem_context *hung_ctx = rq->gem_context; + + lockdep_assert_held(&engine->timeline.lock); + + if (!i915_request_is_active(rq)) + return; + + list_for_each_entry_continue(rq, &engine->timeline.requests, link) + if (rq->gem_context == hung_ctx) + i915_request_skip(rq, -EIO); +} + +static void client_mark_guilty(struct drm_i915_file_private *file_priv, + const struct i915_gem_context *ctx) +{ + unsigned int score; + unsigned long prev_hang; + + if (i915_gem_context_is_banned(ctx)) + score = I915_CLIENT_SCORE_CONTEXT_BAN; + else + score = 0; + + prev_hang = xchg(&file_priv->hang_timestamp, jiffies); + if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) + score += I915_CLIENT_SCORE_HANG_FAST; + + if (score) { + atomic_add(score, &file_priv->ban_score); + + DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", + ctx->name, score, + atomic_read(&file_priv->ban_score)); + } +} + +static bool context_mark_guilty(struct i915_gem_context *ctx) +{ + unsigned long prev_hang; + bool banned; + int i; + + atomic_inc(&ctx->guilty_count); + + /* Cool contexts are too cool to be banned! (Used for reset testing.) */ + if (!i915_gem_context_is_bannable(ctx)) + return false; + + /* Record the timestamp for the last N hangs */ + prev_hang = ctx->hang_timestamp[0]; + for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++) + ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1]; + ctx->hang_timestamp[i] = jiffies; + + /* If we have hung N+1 times in rapid succession, we ban the context! */ + banned = !i915_gem_context_is_recoverable(ctx); + if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES)) + banned = true; + if (banned) { + DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", + ctx->name, atomic_read(&ctx->guilty_count)); + i915_gem_context_set_banned(ctx); + } + + if (!IS_ERR_OR_NULL(ctx->file_priv)) + client_mark_guilty(ctx->file_priv, ctx); + + return banned; +} + +static void context_mark_innocent(struct i915_gem_context *ctx) +{ + atomic_inc(&ctx->active_count); +} + +void i915_reset_request(struct i915_request *rq, bool guilty) +{ + GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n", + rq->engine->name, + rq->fence.context, + rq->fence.seqno, + yesno(guilty)); + + lockdep_assert_held(&rq->engine->timeline.lock); + GEM_BUG_ON(i915_request_completed(rq)); + + if (guilty) { + i915_request_skip(rq, -EIO); + if (context_mark_guilty(rq->gem_context)) + engine_skip_context(rq); + } else { + dma_fence_set_error(&rq->fence, -EAGAIN); + context_mark_innocent(rq->gem_context); + } +} + +static void gen3_stop_engine(struct intel_engine_cs *engine) +{ + struct intel_uncore *uncore = engine->uncore; + const u32 base = engine->mmio_base; + + GEM_TRACE("%s\n", engine->name); + + if (intel_engine_stop_cs(engine)) + GEM_TRACE("%s: timed out on STOP_RING\n", engine->name); + + intel_uncore_write_fw(uncore, + RING_HEAD(base), + intel_uncore_read_fw(uncore, RING_TAIL(base))); + intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ + + intel_uncore_write_fw(uncore, RING_HEAD(base), 0); + intel_uncore_write_fw(uncore, RING_TAIL(base), 0); + intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); + + /* The ring must be empty before it is disabled */ + intel_uncore_write_fw(uncore, RING_CTL(base), 0); + + /* Check acts as a post */ + if (intel_uncore_read_fw(uncore, RING_HEAD(base))) + GEM_TRACE("%s: ring head [%x] not parked\n", + engine->name, + intel_uncore_read_fw(uncore, RING_HEAD(base))); +} + +static void i915_stop_engines(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp; + + if (INTEL_GEN(i915) < 3) + return; + + for_each_engine_masked(engine, i915, engine_mask, tmp) + gen3_stop_engine(engine); +} + +static bool i915_in_reset(struct pci_dev *pdev) +{ + u8 gdrst; + + pci_read_config_byte(pdev, I915_GDRST, &gdrst); + return gdrst & GRDOM_RESET_STATUS; +} + +static int i915_do_reset(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + struct pci_dev *pdev = i915->drm.pdev; + int err; + + /* Assert reset for at least 20 usec, and wait for acknowledgement. */ + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); + udelay(50); + err = wait_for_atomic(i915_in_reset(pdev), 50); + + /* Clear the reset request. */ + pci_write_config_byte(pdev, I915_GDRST, 0); + udelay(50); + if (!err) + err = wait_for_atomic(!i915_in_reset(pdev), 50); + + return err; +} + +static bool g4x_reset_complete(struct pci_dev *pdev) +{ + u8 gdrst; + + pci_read_config_byte(pdev, I915_GDRST, &gdrst); + return (gdrst & GRDOM_RESET_ENABLE) == 0; +} + +static int g33_do_reset(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + struct pci_dev *pdev = i915->drm.pdev; + + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); + return wait_for_atomic(g4x_reset_complete(pdev), 50); +} + +static int g4x_do_reset(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + struct pci_dev *pdev = i915->drm.pdev; + struct intel_uncore *uncore = &i915->uncore; + int ret; + + /* WaVcpClkGateDisableForMediaReset:ctg,elk */ + rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); + intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); + + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_MEDIA | GRDOM_RESET_ENABLE); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + if (ret) { + DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + goto out; + } + + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_RENDER | GRDOM_RESET_ENABLE); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + if (ret) { + DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + goto out; + } + +out: + pci_write_config_byte(pdev, I915_GDRST, 0); + + rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); + intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); + + return ret; +} + +static int ironlake_do_reset(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + struct intel_uncore *uncore = &i915->uncore; + int ret; + + intel_uncore_write_fw(uncore, ILK_GDSR, + ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(uncore, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); + if (ret) { + DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + goto out; + } + + intel_uncore_write_fw(uncore, ILK_GDSR, + ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(uncore, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); + if (ret) { + DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + goto out; + } + +out: + intel_uncore_write_fw(uncore, ILK_GDSR, 0); + intel_uncore_posting_read_fw(uncore, ILK_GDSR); + return ret; +} + +/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ +static int gen6_hw_domain_reset(struct drm_i915_private *i915, + u32 hw_domain_mask) +{ + struct intel_uncore *uncore = &i915->uncore; + int err; + + /* + * GEN6_GDRST is not in the gt power well, no need to check + * for fifo space for the write or forcewake the chip for + * the read + */ + intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); + + /* Wait for the device to ack the reset requests */ + err = __intel_wait_for_register_fw(uncore, + GEN6_GDRST, hw_domain_mask, 0, + 500, 0, + NULL); + if (err) + DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", + hw_domain_mask); + + return err; +} + +static int gen6_reset_engines(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + struct intel_engine_cs *engine; + const u32 hw_engine_mask[] = { + [RCS0] = GEN6_GRDOM_RENDER, + [BCS0] = GEN6_GRDOM_BLT, + [VCS0] = GEN6_GRDOM_MEDIA, + [VCS1] = GEN8_GRDOM_MEDIA2, + [VECS0] = GEN6_GRDOM_VECS, + }; + u32 hw_mask; + + if (engine_mask == ALL_ENGINES) { + hw_mask = GEN6_GRDOM_FULL; + } else { + intel_engine_mask_t tmp; + + hw_mask = 0; + for_each_engine_masked(engine, i915, engine_mask, tmp) { + GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); + hw_mask |= hw_engine_mask[engine->id]; + } + } + + return gen6_hw_domain_reset(i915, hw_mask); +} + +static u32 gen11_lock_sfc(struct intel_engine_cs *engine) +{ + struct intel_uncore *uncore = engine->uncore; + u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; + i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; + u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; + i915_reg_t sfc_usage; + u32 sfc_usage_bit; + u32 sfc_reset_bit; + + switch (engine->class) { + case VIDEO_DECODE_CLASS: + if ((BIT(engine->instance) & vdbox_sfc_access) == 0) + return 0; + + sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; + + sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine); + sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; + + sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine); + sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT; + sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); + break; + + case VIDEO_ENHANCEMENT_CLASS: + sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; + + sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine); + sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; + + sfc_usage = GEN11_VECS_SFC_USAGE(engine); + sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT; + sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); + break; + + default: + return 0; + } + + /* + * Tell the engine that a software reset is going to happen. The engine + * will then try to force lock the SFC (if currently locked, it will + * remain so until we tell the engine it is safe to unlock; if currently + * unlocked, it will ignore this and all new lock requests). If SFC + * ends up being locked to the engine we want to reset, we have to reset + * it as well (we will unlock it once the reset sequence is completed). + */ + rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); + + if (__intel_wait_for_register_fw(uncore, + sfc_forced_lock_ack, + sfc_forced_lock_ack_bit, + sfc_forced_lock_ack_bit, + 1000, 0, NULL)) { + DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + return 0; + } + + if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit) + return sfc_reset_bit; + + return 0; +} + +static void gen11_unlock_sfc(struct intel_engine_cs *engine) +{ + struct intel_uncore *uncore = engine->uncore; + u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; + i915_reg_t sfc_forced_lock; + u32 sfc_forced_lock_bit; + + switch (engine->class) { + case VIDEO_DECODE_CLASS: + if ((BIT(engine->instance) & vdbox_sfc_access) == 0) + return; + + sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; + break; + + case VIDEO_ENHANCEMENT_CLASS: + sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; + break; + + default: + return; + } + + rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); +} + +static int gen11_reset_engines(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + const u32 hw_engine_mask[] = { + [RCS0] = GEN11_GRDOM_RENDER, + [BCS0] = GEN11_GRDOM_BLT, + [VCS0] = GEN11_GRDOM_MEDIA, + [VCS1] = GEN11_GRDOM_MEDIA2, + [VCS2] = GEN11_GRDOM_MEDIA3, + [VCS3] = GEN11_GRDOM_MEDIA4, + [VECS0] = GEN11_GRDOM_VECS, + [VECS1] = GEN11_GRDOM_VECS2, + }; + struct intel_engine_cs *engine; + intel_engine_mask_t tmp; + u32 hw_mask; + int ret; + + if (engine_mask == ALL_ENGINES) { + hw_mask = GEN11_GRDOM_FULL; + } else { + hw_mask = 0; + for_each_engine_masked(engine, i915, engine_mask, tmp) { + GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); + hw_mask |= hw_engine_mask[engine->id]; + hw_mask |= gen11_lock_sfc(engine); + } + } + + ret = gen6_hw_domain_reset(i915, hw_mask); + + if (engine_mask != ALL_ENGINES) + for_each_engine_masked(engine, i915, engine_mask, tmp) + gen11_unlock_sfc(engine); + + return ret; +} + +static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) +{ + struct intel_uncore *uncore = engine->uncore; + const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base); + u32 request, mask, ack; + int ret; + + ack = intel_uncore_read_fw(uncore, reg); + if (ack & RESET_CTL_CAT_ERROR) { + /* + * For catastrophic errors, ready-for-reset sequence + * needs to be bypassed: HAS#396813 + */ + request = RESET_CTL_CAT_ERROR; + mask = RESET_CTL_CAT_ERROR; + + /* Catastrophic errors need to be cleared by HW */ + ack = 0; + } else if (!(ack & RESET_CTL_READY_TO_RESET)) { + request = RESET_CTL_REQUEST_RESET; + mask = RESET_CTL_READY_TO_RESET; + ack = RESET_CTL_READY_TO_RESET; + } else { + return 0; + } + + intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request)); + ret = __intel_wait_for_register_fw(uncore, reg, mask, ack, + 700, 0, NULL); + if (ret) + DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n", + engine->name, request, + intel_uncore_read_fw(uncore, reg)); + + return ret; +} + +static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) +{ + intel_uncore_write_fw(engine->uncore, + RING_RESET_CTL(engine->mmio_base), + _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); +} + +static int gen8_reset_engines(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + struct intel_engine_cs *engine; + const bool reset_non_ready = retry >= 1; + intel_engine_mask_t tmp; + int ret; + + for_each_engine_masked(engine, i915, engine_mask, tmp) { + ret = gen8_engine_reset_prepare(engine); + if (ret && !reset_non_ready) + goto skip_reset; + + /* + * If this is not the first failed attempt to prepare, + * we decide to proceed anyway. + * + * By doing so we risk context corruption and with + * some gens (kbl), possible system hang if reset + * happens during active bb execution. + * + * We rather take context corruption instead of + * failed reset with a wedged driver/gpu. And + * active bb execution case should be covered by + * i915_stop_engines we have before the reset. + */ + } + + if (INTEL_GEN(i915) >= 11) + ret = gen11_reset_engines(i915, engine_mask, retry); + else + ret = gen6_reset_engines(i915, engine_mask, retry); + +skip_reset: + for_each_engine_masked(engine, i915, engine_mask, tmp) + gen8_engine_reset_cancel(engine); + + return ret; +} + +typedef int (*reset_func)(struct drm_i915_private *, + intel_engine_mask_t engine_mask, + unsigned int retry); + +static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 8) + return gen8_reset_engines; + else if (INTEL_GEN(i915) >= 6) + return gen6_reset_engines; + else if (INTEL_GEN(i915) >= 5) + return ironlake_do_reset; + else if (IS_G4X(i915)) + return g4x_do_reset; + else if (IS_G33(i915) || IS_PINEVIEW(i915)) + return g33_do_reset; + else if (INTEL_GEN(i915) >= 3) + return i915_do_reset; + else + return NULL; +} + +int intel_gpu_reset(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask) +{ + const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; + reset_func reset; + int ret = -ETIMEDOUT; + int retry; + + reset = intel_get_gpu_reset(i915); + if (!reset) + return -ENODEV; + + /* + * If the power well sleeps during the reset, the reset + * request may be dropped and never completes (causing -EIO). + */ + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * + * FIXME: Wa for more modern gens needs to be validated + */ + if (retry) + i915_stop_engines(i915, engine_mask); + + GEM_TRACE("engine_mask=%x\n", engine_mask); + preempt_disable(); + ret = reset(i915, engine_mask, retry); + preempt_enable(); + } + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + + return ret; +} + +bool intel_has_gpu_reset(struct drm_i915_private *i915) +{ + if (!i915_modparams.reset) + return NULL; + + return intel_get_gpu_reset(i915); +} + +bool intel_has_reset_engine(struct drm_i915_private *i915) +{ + return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; +} + +int intel_reset_guc(struct drm_i915_private *i915) +{ + u32 guc_domain = + INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; + int ret; + + GEM_BUG_ON(!HAS_GUC(i915)); + + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + ret = gen6_hw_domain_reset(i915, guc_domain); + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + + return ret; +} + +/* + * Ensure irq handler finishes, and not run again. + * Also return the active request so that we only search for it once. + */ +static void reset_prepare_engine(struct intel_engine_cs *engine) +{ + /* + * During the reset sequence, we must prevent the engine from + * entering RC6. As the context state is undefined until we restart + * the engine, if it does enter RC6 during the reset, the state + * written to the powercontext is undefined and so we may lose + * GPU state upon resume, i.e. fail to restart after a reset. + */ + intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); + engine->reset.prepare(engine); +} + +static void revoke_mmaps(struct drm_i915_private *i915) +{ + int i; + + for (i = 0; i < i915->num_fence_regs; i++) { + struct drm_vma_offset_node *node; + struct i915_vma *vma; + u64 vma_offset; + + vma = READ_ONCE(i915->fence_regs[i].vma); + if (!vma) + continue; + + if (!i915_vma_has_userfault(vma)) + continue; + + GEM_BUG_ON(vma->fence != &i915->fence_regs[i]); + node = &vma->obj->base.vma_node; + vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + unmap_mapping_range(i915->drm.anon_inode->i_mapping, + drm_vma_node_offset_addr(node) + vma_offset, + vma->size, + 1); + } +} + +static void reset_prepare(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + reset_prepare_engine(engine); + + intel_uc_reset_prepare(i915); +} + +static void gt_revoke(struct drm_i915_private *i915) +{ + revoke_mmaps(i915); +} + +static int gt_reset(struct drm_i915_private *i915, + intel_engine_mask_t stalled_mask) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + /* + * Everything depends on having the GTT running, so we need to start + * there. + */ + err = i915_ggtt_enable_hw(i915); + if (err) + return err; + + for_each_engine(engine, i915, id) + intel_engine_reset(engine, stalled_mask & engine->mask); + + i915_gem_restore_fences(i915); + + return err; +} + +static void reset_finish_engine(struct intel_engine_cs *engine) +{ + engine->reset.finish(engine); + intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); +} + +struct i915_gpu_restart { + struct work_struct work; + struct drm_i915_private *i915; +}; + +static void restart_work(struct work_struct *work) +{ + struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work); + struct drm_i915_private *i915 = arg->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); + mutex_lock(&i915->drm.struct_mutex); + WRITE_ONCE(i915->gpu_error.restart, NULL); + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + /* + * Ostensibily, we always want a context loaded for powersaving, + * so if the engine is idle after the reset, send a request + * to load our scratch kernel_context. + */ + if (!intel_engine_is_idle(engine)) + continue; + + rq = i915_request_alloc(engine, i915->kernel_context); + if (!IS_ERR(rq)) + i915_request_add(rq); + } + + mutex_unlock(&i915->drm.struct_mutex); + intel_runtime_pm_put(i915, wakeref); + + kfree(arg); +} + +static void reset_finish(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + reset_finish_engine(engine); + intel_engine_signal_breadcrumbs(engine); + } +} + +static void reset_restart(struct drm_i915_private *i915) +{ + struct i915_gpu_restart *arg; + + /* + * Following the reset, ensure that we always reload context for + * powersaving, and to correct engine->last_retired_context. Since + * this requires us to submit a request, queue a worker to do that + * task for us to evade any locking here. + */ + if (READ_ONCE(i915->gpu_error.restart)) + return; + + arg = kmalloc(sizeof(*arg), GFP_KERNEL); + if (arg) { + arg->i915 = i915; + INIT_WORK(&arg->work, restart_work); + + WRITE_ONCE(i915->gpu_error.restart, arg); + queue_work(i915->wq, &arg->work); + } +} + +static void nop_submit_request(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + GEM_TRACE("%s fence %llx:%lld -> -EIO\n", + engine->name, request->fence.context, request->fence.seqno); + dma_fence_set_error(&request->fence, -EIO); + + spin_lock_irqsave(&engine->timeline.lock, flags); + __i915_request_submit(request); + i915_request_mark_complete(request); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + intel_engine_queue_breadcrumbs(engine); +} + +static void __i915_gem_set_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (test_bit(I915_WEDGED, &error->flags)) + return; + + if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { + struct drm_printer p = drm_debug_printer(__func__); + + for_each_engine(engine, i915, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); + } + + GEM_TRACE("start\n"); + + /* + * First, stop submission to hw, but do not yet complete requests by + * rolling the global seqno forward (since this would complete requests + * for which we haven't set the fence error to EIO yet). + */ + reset_prepare(i915); + + /* Even if the GPU reset fails, it should still stop the engines */ + if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) + intel_gpu_reset(i915, ALL_ENGINES); + + for_each_engine(engine, i915, id) { + engine->submit_request = nop_submit_request; + engine->schedule = NULL; + } + i915->caps.scheduler = 0; + + /* + * Make sure no request can slip through without getting completed by + * either this call here to intel_engine_write_global_seqno, or the one + * in nop_submit_request. + */ + synchronize_rcu_expedited(); + + /* Mark all executing requests as skipped */ + for_each_engine(engine, i915, id) + engine->cancel_requests(engine); + + reset_finish(i915); + + smp_mb__before_atomic(); + set_bit(I915_WEDGED, &error->flags); + + GEM_TRACE("end\n"); +} + +void i915_gem_set_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + intel_wakeref_t wakeref; + + mutex_lock(&error->wedge_mutex); + with_intel_runtime_pm(i915, wakeref) + __i915_gem_set_wedged(i915); + mutex_unlock(&error->wedge_mutex); +} + +static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct i915_timeline *tl; + + if (!test_bit(I915_WEDGED, &error->flags)) + return true; + + if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ + return false; + + GEM_TRACE("start\n"); + + /* + * Before unwedging, make sure that all pending operations + * are flushed and errored out - we may have requests waiting upon + * third party fences. We marked all inflight requests as EIO, and + * every execbuf since returned EIO, for consistency we want all + * the currently pending requests to also be marked as EIO, which + * is done inside our nop_submit_request - and so we must wait. + * + * No more can be submitted until we reset the wedged bit. + */ + mutex_lock(&i915->gt.timelines.mutex); + list_for_each_entry(tl, &i915->gt.timelines.active_list, link) { + struct i915_request *rq; + + rq = i915_active_request_get_unlocked(&tl->last_request); + if (!rq) + continue; + + /* + * All internal dependencies (i915_requests) will have + * been flushed by the set-wedge, but we may be stuck waiting + * for external fences. These should all be capped to 10s + * (I915_FENCE_TIMEOUT) so this wait should not be unbounded + * in the worst case. + */ + dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + mutex_unlock(&i915->gt.timelines.mutex); + + intel_engines_sanitize(i915, false); + + /* + * Undo nop_submit_request. We prevent all new i915 requests from + * being queued (by disallowing execbuf whilst wedged) so having + * waited for all active requests above, we know the system is idle + * and do not have to worry about a thread being inside + * engine->submit_request() as we swap over. So unlike installing + * the nop_submit_request on reset, we can do this from normal + * context and do not require stop_machine(). + */ + intel_engines_reset_default_submission(i915); + + GEM_TRACE("end\n"); + + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ + clear_bit(I915_WEDGED, &i915->gpu_error.flags); + + return true; +} + +bool i915_gem_unset_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + bool result; + + mutex_lock(&error->wedge_mutex); + result = __i915_gem_unset_wedged(i915); + mutex_unlock(&error->wedge_mutex); + + return result; +} + +static int do_reset(struct drm_i915_private *i915, + intel_engine_mask_t stalled_mask) +{ + int err, i; + + gt_revoke(i915); + + err = intel_gpu_reset(i915, ALL_ENGINES); + for (i = 0; err && i < RESET_MAX_RETRIES; i++) { + msleep(10 * (i + 1)); + err = intel_gpu_reset(i915, ALL_ENGINES); + } + if (err) + return err; + + return gt_reset(i915, stalled_mask); +} + +/** + * i915_reset - reset chip after a hang + * @i915: #drm_i915_private to reset + * @stalled_mask: mask of the stalled engines with the guilty requests + * @reason: user error message for why we are resetting + * + * Reset the chip. Useful if a hang is detected. Marks the device as wedged + * on failure. + * + * Procedure is fairly simple: + * - reset the chip using the reset reg + * - re-init context state + * - re-init hardware status page + * - re-init ring buffer + * - re-init interrupt state + * - re-init display + */ +void i915_reset(struct drm_i915_private *i915, + intel_engine_mask_t stalled_mask, + const char *reason) +{ + struct i915_gpu_error *error = &i915->gpu_error; + int ret; + + GEM_TRACE("flags=%lx\n", error->flags); + + might_sleep(); + assert_rpm_wakelock_held(i915); + GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); + + /* Clear any previous failed attempts at recovery. Time to try again. */ + if (!__i915_gem_unset_wedged(i915)) + return; + + if (reason) + dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); + error->reset_count++; + + reset_prepare(i915); + + if (!intel_has_gpu_reset(i915)) { + if (i915_modparams.reset) + dev_err(i915->drm.dev, "GPU reset not supported\n"); + else + DRM_DEBUG_DRIVER("GPU reset disabled\n"); + goto error; + } + + if (INTEL_INFO(i915)->gpu_reset_clobbers_display) + intel_runtime_pm_disable_interrupts(i915); + + if (do_reset(i915, stalled_mask)) { + dev_err(i915->drm.dev, "Failed to reset chip\n"); + goto taint; + } + + if (INTEL_INFO(i915)->gpu_reset_clobbers_display) + intel_runtime_pm_enable_interrupts(i915); + + intel_overlay_reset(i915); + + /* + * Next we need to restore the context, but we don't use those + * yet either... + * + * Ring buffer needs to be re-initialized in the KMS case, or if X + * was running at the time of the reset (i.e. we weren't VT + * switched away). + */ + ret = i915_gem_init_hw(i915); + if (ret) { + DRM_ERROR("Failed to initialise HW following reset (%d)\n", + ret); + goto error; + } + + i915_queue_hangcheck(i915); + +finish: + reset_finish(i915); + if (!__i915_wedged(error)) + reset_restart(i915); + return; + +taint: + /* + * History tells us that if we cannot reset the GPU now, we + * never will. This then impacts everything that is run + * subsequently. On failing the reset, we mark the driver + * as wedged, preventing further execution on the GPU. + * We also want to go one step further and add a taint to the + * kernel so that any subsequent faults can be traced back to + * this failure. This is important for CI, where if the + * GPU/driver fails we would like to reboot and restart testing + * rather than continue on into oblivion. For everyone else, + * the system should still plod along, but they have been warned! + */ + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +error: + __i915_gem_set_wedged(i915); + goto finish; +} + +static inline int intel_gt_reset_engine(struct drm_i915_private *i915, + struct intel_engine_cs *engine) +{ + return intel_gpu_reset(i915, engine->mask); +} + +/** + * i915_reset_engine - reset GPU engine to recover from a hang + * @engine: engine to reset + * @msg: reason for GPU reset; or NULL for no dev_notice() + * + * Reset a specific GPU engine. Useful if a hang is detected. + * Returns zero on successful reset or otherwise an error code. + * + * Procedure is: + * - identifies the request that caused the hang and it is dropped + * - reset engine (which will force the engine to idle) + * - re-init/configure engine + */ +int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) +{ + struct i915_gpu_error *error = &engine->i915->gpu_error; + int ret; + + GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); + GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + + reset_prepare_engine(engine); + + if (msg) + dev_notice(engine->i915->drm.dev, + "Resetting %s for %s\n", engine->name, msg); + error->reset_engine_count[engine->id]++; + + if (!engine->i915->guc.execbuf_client) + ret = intel_gt_reset_engine(engine->i915, engine); + else + ret = intel_guc_reset_engine(&engine->i915->guc, engine); + if (ret) { + /* If we fail here, we expect to fallback to a global reset */ + DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", + engine->i915->guc.execbuf_client ? "GuC " : "", + engine->name, ret); + goto out; + } + + /* + * The request that caused the hang is stuck on elsp, we know the + * active request and can drop it, adjust head to skip the offending + * request to resume executing remaining requests in the queue. + */ + intel_engine_reset(engine, true); + + /* + * The engine and its registers (and workarounds in case of render) + * have been reset to their default values. Follow the init_ring + * process to program RING_MODE, HWSP and re-enable submission. + */ + ret = engine->init_hw(engine); + if (ret) + goto out; + +out: + intel_engine_cancel_stop_cs(engine); + reset_finish_engine(engine); + return ret; +} + +static void i915_reset_device(struct drm_i915_private *i915, + u32 engine_mask, + const char *reason) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct kobject *kobj = &i915->drm.primary->kdev->kobj; + char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; + char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; + char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; + struct i915_wedge_me w; + + kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); + + DRM_DEBUG_DRIVER("resetting chip\n"); + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); + + /* Use a watchdog to ensure that our reset completes */ + i915_wedge_on_timeout(&w, i915, 5 * HZ) { + intel_prepare_reset(i915); + + /* Flush everyone using a resource about to be clobbered */ + synchronize_srcu_expedited(&error->reset_backoff_srcu); + + mutex_lock(&error->wedge_mutex); + i915_reset(i915, engine_mask, reason); + mutex_unlock(&error->wedge_mutex); + + intel_finish_reset(i915); + } + + if (!test_bit(I915_WEDGED, &error->flags)) + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); +} + +static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) +{ + intel_uncore_rmw(uncore, reg, 0, 0); +} + +void i915_clear_error_registers(struct drm_i915_private *i915) +{ + struct intel_uncore *uncore = &i915->uncore; + u32 eir; + + if (!IS_GEN(i915, 2)) + clear_register(uncore, PGTBL_ER); + + if (INTEL_GEN(i915) < 4) + clear_register(uncore, IPEIR(RENDER_RING_BASE)); + else + clear_register(uncore, IPEIR_I965); + + clear_register(uncore, EIR); + eir = intel_uncore_read(uncore, EIR); + if (eir) { + /* + * some errors might have become stuck, + * mask them. + */ + DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); + rmw_set(uncore, EMR, eir); + intel_uncore_write(uncore, GEN2_IIR, + I915_MASTER_ERROR_INTERRUPT); + } + + if (INTEL_GEN(i915) >= 8) { + rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); + intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); + } else if (INTEL_GEN(i915) >= 6) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + rmw_clear(uncore, + RING_FAULT_REG(engine), RING_FAULT_VALID); + intel_uncore_posting_read(uncore, + RING_FAULT_REG(engine)); + } + } +} + +/** + * i915_handle_error - handle a gpu error + * @i915: i915 device private + * @engine_mask: mask representing engines that are hung + * @flags: control flags + * @fmt: Error message format string + * + * Do some basic checking of register state at error time and + * dump it to the syslog. Also call i915_capture_error_state() to make + * sure we get a record and make it available in debugfs. Fire a uevent + * so userspace knows something bad happened (should trigger collection + * of a ring dump etc.). + */ +void i915_handle_error(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask, + unsigned long flags, + const char *fmt, ...) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct intel_engine_cs *engine; + intel_wakeref_t wakeref; + intel_engine_mask_t tmp; + char error_msg[80]; + char *msg = NULL; + + if (fmt) { + va_list args; + + va_start(args, fmt); + vscnprintf(error_msg, sizeof(error_msg), fmt, args); + va_end(args); + + msg = error_msg; + } + + /* + * In most cases it's guaranteed that we get here with an RPM + * reference held, for example because there is a pending GPU + * request that won't finish until the reset is done. This + * isn't the case at least when we get here by doing a + * simulated reset via debugfs, so get an RPM reference. + */ + wakeref = intel_runtime_pm_get(i915); + + engine_mask &= INTEL_INFO(i915)->engine_mask; + + if (flags & I915_ERROR_CAPTURE) { + i915_capture_error_state(i915, engine_mask, msg); + i915_clear_error_registers(i915); + } + + /* + * Try engine reset when available. We fall back to full reset if + * single reset fails. + */ + if (intel_has_reset_engine(i915) && !__i915_wedged(error)) { + for_each_engine_masked(engine, i915, engine_mask, tmp) { + BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); + if (test_and_set_bit(I915_RESET_ENGINE + engine->id, + &error->flags)) + continue; + + if (i915_reset_engine(engine, msg) == 0) + engine_mask &= ~engine->mask; + + clear_bit(I915_RESET_ENGINE + engine->id, + &error->flags); + wake_up_bit(&error->flags, + I915_RESET_ENGINE + engine->id); + } + } + + if (!engine_mask) + goto out; + + /* Full reset needs the mutex, stop any other user trying to do so. */ + if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) { + wait_event(error->reset_queue, + !test_bit(I915_RESET_BACKOFF, &error->flags)); + goto out; /* piggy-back on the other reset */ + } + + /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */ + synchronize_rcu_expedited(); + + /* Prevent any other reset-engine attempt. */ + for_each_engine(engine, i915, tmp) { + while (test_and_set_bit(I915_RESET_ENGINE + engine->id, + &error->flags)) + wait_on_bit(&error->flags, + I915_RESET_ENGINE + engine->id, + TASK_UNINTERRUPTIBLE); + } + + i915_reset_device(i915, engine_mask, msg); + + for_each_engine(engine, i915, tmp) { + clear_bit(I915_RESET_ENGINE + engine->id, + &error->flags); + } + + clear_bit(I915_RESET_BACKOFF, &error->flags); + wake_up_all(&error->reset_queue); + +out: + intel_runtime_pm_put(i915, wakeref); +} + +int i915_reset_trylock(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + int srcu; + + might_lock(&error->reset_backoff_srcu); + might_sleep(); + + rcu_read_lock(); + while (test_bit(I915_RESET_BACKOFF, &error->flags)) { + rcu_read_unlock(); + + if (wait_event_interruptible(error->reset_queue, + !test_bit(I915_RESET_BACKOFF, + &error->flags))) + return -EINTR; + + rcu_read_lock(); + } + srcu = srcu_read_lock(&error->reset_backoff_srcu); + rcu_read_unlock(); + + return srcu; +} + +void i915_reset_unlock(struct drm_i915_private *i915, int tag) +__releases(&i915->gpu_error.reset_backoff_srcu) +{ + struct i915_gpu_error *error = &i915->gpu_error; + + srcu_read_unlock(&error->reset_backoff_srcu, tag); +} + +int i915_terminally_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + + might_sleep(); + + if (!__i915_wedged(error)) + return 0; + + /* Reset still in progress? Maybe we will recover? */ + if (!test_bit(I915_RESET_BACKOFF, &error->flags)) + return -EIO; + + /* XXX intel_reset_finish() still takes struct_mutex!!! */ + if (mutex_is_locked(&i915->drm.struct_mutex)) + return -EAGAIN; + + if (wait_event_interruptible(error->reset_queue, + !test_bit(I915_RESET_BACKOFF, + &error->flags))) + return -EINTR; + + return __i915_wedged(error) ? -EIO : 0; +} + +bool i915_reset_flush(struct drm_i915_private *i915) +{ + int err; + + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + + flush_workqueue(i915->wq); + GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart)); + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + MAX_SCHEDULE_TIMEOUT); + mutex_unlock(&i915->drm.struct_mutex); + + return !err; +} + +static void i915_wedge_me(struct work_struct *work) +{ + struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); + + dev_err(w->i915->drm.dev, + "%s timed out, cancelling all in-flight rendering.\n", + w->name); + i915_gem_set_wedged(w->i915); +} + +void __i915_init_wedge(struct i915_wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const char *name) +{ + w->i915 = i915; + w->name = name; + + INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me); + schedule_delayed_work(&w->work, timeout); +} + +void __i915_fini_wedge(struct i915_wedge_me *w) +{ + cancel_delayed_work_sync(&w->work); + destroy_delayed_work_on_stack(&w->work); + w->i915 = NULL; +} diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h new file mode 100644 index 000000000000..8e662bb43a9b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2018 Intel Corporation + */ + +#ifndef I915_RESET_H +#define I915_RESET_H + +#include +#include +#include + +#include "gt/intel_engine_types.h" + +struct drm_i915_private; +struct i915_request; +struct intel_engine_cs; +struct intel_guc; + +__printf(4, 5) +void i915_handle_error(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask, + unsigned long flags, + const char *fmt, ...); +#define I915_ERROR_CAPTURE BIT(0) + +void i915_clear_error_registers(struct drm_i915_private *i915); + +void i915_reset(struct drm_i915_private *i915, + intel_engine_mask_t stalled_mask, + const char *reason); +int i915_reset_engine(struct intel_engine_cs *engine, + const char *reason); + +void i915_reset_request(struct i915_request *rq, bool guilty); +bool i915_reset_flush(struct drm_i915_private *i915); + +int __must_check i915_reset_trylock(struct drm_i915_private *i915); +void i915_reset_unlock(struct drm_i915_private *i915, int tag); + +int i915_terminally_wedged(struct drm_i915_private *i915); + +bool intel_has_gpu_reset(struct drm_i915_private *i915); +bool intel_has_reset_engine(struct drm_i915_private *i915); + +int intel_gpu_reset(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask); + +int intel_reset_guc(struct drm_i915_private *i915); + +struct i915_wedge_me { + struct delayed_work work; + struct drm_i915_private *i915; + const char *name; +}; + +void __i915_init_wedge(struct i915_wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const char *name); +void __i915_fini_wedge(struct i915_wedge_me *w); + +#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \ + for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \ + (W)->i915; \ + __i915_fini_wedge((W))) + +#endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c new file mode 100644 index 000000000000..ac84a383748e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -0,0 +1,2338 @@ +/* + * Copyright © 2008-2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * Zou Nan hai + * Xiang Hai hao + * + */ + +#include + +#include + +#include "i915_drv.h" +#include "i915_gem_render_state.h" +#include "i915_trace.h" +#include "intel_reset.h" +#include "intel_workarounds.h" + +/* Rough estimate of the typical request size, performing a flush, + * set-context and then emitting the batch. + */ +#define LEGACY_REQUEST_SIZE 200 + +unsigned int intel_ring_update_space(struct intel_ring *ring) +{ + unsigned int space; + + space = __intel_ring_space(ring->head, ring->emit, ring->size); + + ring->space = space; + return space; +} + +static int +gen2_render_ring_flush(struct i915_request *rq, u32 mode) +{ + unsigned int num_store_dw; + u32 cmd, *cs; + + cmd = MI_FLUSH; + num_store_dw = 0; + if (mode & EMIT_INVALIDATE) + cmd |= MI_READ_FLUSH; + if (mode & EMIT_FLUSH) + num_store_dw = 4; + + cs = intel_ring_begin(rq, 2 + 3 * num_store_dw); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = cmd; + while (num_store_dw--) { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = i915_scratch_offset(rq->i915); + *cs++ = 0; + } + *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen4_render_ring_flush(struct i915_request *rq, u32 mode) +{ + u32 cmd, *cs; + int i; + + /* + * read/write caches: + * + * I915_GEM_DOMAIN_RENDER is always invalidated, but is + * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is + * also flushed at 2d versus 3d pipeline switches. + * + * read-only caches: + * + * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if + * MI_READ_FLUSH is set, and is always flushed on 965. + * + * I915_GEM_DOMAIN_COMMAND may not exist? + * + * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is + * invalidated when MI_EXE_FLUSH is set. + * + * I915_GEM_DOMAIN_VERTEX, which exists on 965, is + * invalidated with every MI_FLUSH. + * + * TLBs: + * + * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND + * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and + * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER + * are flushed at any MI_FLUSH. + */ + + cmd = MI_FLUSH; + if (mode & EMIT_INVALIDATE) { + cmd |= MI_EXE_FLUSH; + if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5)) + cmd |= MI_INVALIDATE_ISP; + } + + i = 2; + if (mode & EMIT_INVALIDATE) + i += 20; + + cs = intel_ring_begin(rq, i); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = cmd; + + /* + * A random delay to let the CS invalidate take effect? Without this + * delay, the GPU relocation path fails as the CS does not see + * the updated contents. Just as important, if we apply the flushes + * to the EMIT_FLUSH branch (i.e. immediately after the relocation + * write and before the invalidate on the next batch), the relocations + * still fail. This implies that is a delay following invalidation + * that is required to reset the caches as opposed to a delay to + * ensure the memory is written. + */ + if (mode & EMIT_INVALIDATE) { + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + + for (i = 0; i < 12; i++) + *cs++ = MI_FLUSH; + + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + } + + *cs++ = cmd; + + intel_ring_advance(rq, cs); + + return 0; +} + +/* + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6. From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * And the workaround for these two requires this workaround first: + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. + */ +static int +gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) +{ + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; + u32 *cs; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; /* low dword */ + *cs++ = 0; /* high dword */ + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen6_render_ring_flush(struct i915_request *rq, u32 mode) +{ + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; + u32 *cs, flags = 0; + int ret; + + /* Force SNB workarounds for PIPE_CONTROL flushes */ + ret = gen6_emit_post_sync_nonzero_flush(rq); + if (ret) + return ret; + + /* Just flush everything. Experiments have shown that reducing the + * number of bits based on the write domains has little performance + * impact. + */ + if (mode & EMIT_FLUSH) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* + * Ensure that any following seqno writes only happen + * when the render cache is indeed flushed. + */ + flags |= PIPE_CONTROL_CS_STALL; + } + if (mode & EMIT_INVALIDATE) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + /* + * TLB invalidate requires a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + intel_ring_advance(rq, cs); + + return 0; +} + +static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + + /* Finally we can flush and with it emit the breadcrumb */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +static int +gen7_render_ring_cs_stall_wa(struct i915_request *rq) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen7_render_ring_flush(struct i915_request *rq, u32 mode) +{ + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; + u32 *cs, flags = 0; + + /* + * Ensure that any following seqno writes only happen when the render + * cache is indeed flushed. + * + * Workaround: 4th PIPE_CONTROL command (except the ones with only + * read-cache invalidate bits set) must have the CS_STALL bit set. We + * don't try to be clever and just set it unconditionally. + */ + flags |= PIPE_CONTROL_CS_STALL; + + /* Just flush everything. Experiments have shown that reducing the + * number of bits based on the write domains has little performance + * impact. + */ + if (mode & EMIT_FLUSH) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + } + if (mode & EMIT_INVALIDATE) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; + /* + * TLB invalidate requires a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + + /* Workaround: we must issue a pipe_control with CS-stall bit + * set before a pipe_control command that has the state cache + * invalidate bit set. */ + gen7_render_ring_cs_stall_wa(rq); + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + intel_ring_advance(rq, cs); + + return 0; +} + +static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_GLOBAL_GTT_IVB); + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +#define GEN7_XCS_WA 32 +static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + int i; + + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + + for (i = 0; i < GEN7_XCS_WA; i++) { + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + } + + *cs++ = MI_FLUSH_DW; + *cs++ = 0; + *cs++ = 0; + + *cs++ = MI_USER_INTERRUPT; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} +#undef GEN7_XCS_WA + +static void set_hwstam(struct intel_engine_cs *engine, u32 mask) +{ + /* + * Keep the render interrupt unmasked as this papers over + * lost interrupts following a reset. + */ + if (engine->class == RENDER_CLASS) { + if (INTEL_GEN(engine->i915) >= 6) + mask &= ~BIT(0); + else + mask &= ~I915_USER_INTERRUPT; + } + + intel_engine_set_hwsp_writemask(engine, mask); +} + +static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) +{ + struct drm_i915_private *dev_priv = engine->i915; + u32 addr; + + addr = lower_32_bits(phys); + if (INTEL_GEN(dev_priv) >= 4) + addr |= (phys >> 28) & 0xf0; + + I915_WRITE(HWS_PGA, addr); +} + +static struct page *status_page(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj = engine->status_page.vma->obj; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + +static void ring_setup_phys_status_page(struct intel_engine_cs *engine) +{ + set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine)))); + set_hwstam(engine, ~0u); +} + +static void set_hwsp(struct intel_engine_cs *engine, u32 offset) +{ + struct drm_i915_private *dev_priv = engine->i915; + i915_reg_t hwsp; + + /* + * The ring status page addresses are no longer next to the rest of + * the ring registers as of gen7. + */ + if (IS_GEN(dev_priv, 7)) { + switch (engine->id) { + /* + * No more rings exist on Gen7. Default case is only to shut up + * gcc switch check warning. + */ + default: + GEM_BUG_ON(engine->id); + /* fallthrough */ + case RCS0: + hwsp = RENDER_HWS_PGA_GEN7; + break; + case BCS0: + hwsp = BLT_HWS_PGA_GEN7; + break; + case VCS0: + hwsp = BSD_HWS_PGA_GEN7; + break; + case VECS0: + hwsp = VEBOX_HWS_PGA_GEN7; + break; + } + } else if (IS_GEN(dev_priv, 6)) { + hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); + } else { + hwsp = RING_HWS_PGA(engine->mmio_base); + } + + I915_WRITE(hwsp, offset); + POSTING_READ(hwsp); +} + +static void flush_cs_tlb(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + if (!IS_GEN_RANGE(dev_priv, 6, 7)) + return; + + /* ring should be idle before issuing a sync flush*/ + WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + + ENGINE_WRITE(engine, RING_INSTPM, + _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | + INSTPM_SYNC_FLUSH)); + if (intel_wait_for_register(engine->uncore, + RING_INSTPM(engine->mmio_base), + INSTPM_SYNC_FLUSH, 0, + 1000)) + DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", + engine->name); +} + +static void ring_setup_status_page(struct intel_engine_cs *engine) +{ + set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma)); + set_hwstam(engine, ~0u); + + flush_cs_tlb(engine); +} + +static bool stop_ring(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + if (INTEL_GEN(dev_priv) > 2) { + ENGINE_WRITE(engine, + RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING)); + if (intel_wait_for_register(engine->uncore, + RING_MI_MODE(engine->mmio_base), + MODE_IDLE, + MODE_IDLE, + 1000)) { + DRM_ERROR("%s : timed out trying to stop ring\n", + engine->name); + + /* + * Sometimes we observe that the idle flag is not + * set even though the ring is empty. So double + * check before giving up. + */ + if (ENGINE_READ(engine, RING_HEAD) != + ENGINE_READ(engine, RING_TAIL)) + return false; + } + } + + ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL)); + + ENGINE_WRITE(engine, RING_HEAD, 0); + ENGINE_WRITE(engine, RING_TAIL, 0); + + /* The ring must be empty before it is disabled */ + ENGINE_WRITE(engine, RING_CTL, 0); + + return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0; +} + +static int init_ring_common(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct intel_ring *ring = engine->buffer; + int ret = 0; + + intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); + + if (!stop_ring(engine)) { + /* G45 ring initialization often fails to reset head to zero */ + DRM_DEBUG_DRIVER("%s head not reset to zero " + "ctl %08x head %08x tail %08x start %08x\n", + engine->name, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_HEAD), + ENGINE_READ(engine, RING_TAIL), + ENGINE_READ(engine, RING_START)); + + if (!stop_ring(engine)) { + DRM_ERROR("failed to set %s head to zero " + "ctl %08x head %08x tail %08x start %08x\n", + engine->name, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_HEAD), + ENGINE_READ(engine, RING_TAIL), + ENGINE_READ(engine, RING_START)); + ret = -EIO; + goto out; + } + } + + if (HWS_NEEDS_PHYSICAL(dev_priv)) + ring_setup_phys_status_page(engine); + else + ring_setup_status_page(engine); + + intel_engine_reset_breadcrumbs(engine); + + /* Enforce ordering by reading HEAD register back */ + ENGINE_READ(engine, RING_HEAD); + + /* Initialize the ring. This must happen _after_ we've cleared the ring + * registers with the above sequence (the readback of the HEAD registers + * also enforces ordering), otherwise the hw might lose the new ring + * register values. */ + ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma)); + + /* WaClearRingBufHeadRegAtInit:ctg,elk */ + if (ENGINE_READ(engine, RING_HEAD)) + DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n", + engine->name, ENGINE_READ(engine, RING_HEAD)); + + /* Check that the ring offsets point within the ring! */ + GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); + GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); + intel_ring_update_space(ring); + + /* First wake the ring up to an empty/idle ring */ + ENGINE_WRITE(engine, RING_HEAD, ring->head); + ENGINE_WRITE(engine, RING_TAIL, ring->head); + ENGINE_POSTING_READ(engine, RING_TAIL); + + ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID); + + /* If the head is still not zero, the ring is dead */ + if (intel_wait_for_register(engine->uncore, + RING_CTL(engine->mmio_base), + RING_VALID, RING_VALID, + 50)) { + DRM_ERROR("%s initialization failed " + "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", + engine->name, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_CTL) & RING_VALID, + ENGINE_READ(engine, RING_HEAD), ring->head, + ENGINE_READ(engine, RING_TAIL), ring->tail, + ENGINE_READ(engine, RING_START), + i915_ggtt_offset(ring->vma)); + ret = -EIO; + goto out; + } + + if (INTEL_GEN(dev_priv) > 2) + ENGINE_WRITE(engine, + RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); + + /* Now awake, let it get started */ + if (ring->tail != ring->head) { + ENGINE_WRITE(engine, RING_TAIL, ring->tail); + ENGINE_POSTING_READ(engine, RING_TAIL); + } + + /* Papering over lost _interrupts_ immediately following the restart */ + intel_engine_queue_breadcrumbs(engine); +out: + intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); + + return ret; +} + +static void reset_prepare(struct intel_engine_cs *engine) +{ + intel_engine_stop_cs(engine); +} + +static void reset_ring(struct intel_engine_cs *engine, bool stalled) +{ + struct i915_timeline *tl = &engine->timeline; + struct i915_request *pos, *rq; + unsigned long flags; + u32 head; + + rq = NULL; + spin_lock_irqsave(&tl->lock, flags); + list_for_each_entry(pos, &tl->requests, link) { + if (!i915_request_completed(pos)) { + rq = pos; + break; + } + } + + /* + * The guilty request will get skipped on a hung engine. + * + * Users of client default contexts do not rely on logical + * state preserved between batches so it is safe to execute + * queued requests following the hang. Non default contexts + * rely on preserved state, so skipping a batch loses the + * evolution of the state and it needs to be considered corrupted. + * Executing more queued batches on top of corrupted state is + * risky. But we take the risk by trying to advance through + * the queued requests in order to make the client behaviour + * more predictable around resets, by not throwing away random + * amount of batches it has prepared for execution. Sophisticated + * clients can use gem_reset_stats_ioctl and dma fence status + * (exported via sync_file info ioctl on explicit fences) to observe + * when it loses the context state and should rebuild accordingly. + * + * The context ban, and ultimately the client ban, mechanism are safety + * valves if client submission ends up resulting in nothing more than + * subsequent hangs. + */ + + if (rq) { + /* + * Try to restore the logical GPU state to match the + * continuation of the request queue. If we skip the + * context/PD restore, then the next request may try to execute + * assuming that its context is valid and loaded on the GPU and + * so may try to access invalid memory, prompting repeated GPU + * hangs. + * + * If the request was guilty, we still restore the logical + * state in case the next request requires it (e.g. the + * aliasing ppgtt), but skip over the hung batch. + * + * If the request was innocent, we try to replay the request + * with the restored context. + */ + i915_reset_request(rq, stalled); + + GEM_BUG_ON(rq->ring != engine->buffer); + head = rq->head; + } else { + head = engine->buffer->tail; + } + engine->buffer->head = intel_ring_wrap(engine->buffer, head); + + spin_unlock_irqrestore(&tl->lock, flags); +} + +static void reset_finish(struct intel_engine_cs *engine) +{ +} + +static int intel_rcs_ctx_init(struct i915_request *rq) +{ + int ret; + + ret = intel_engine_emit_ctx_wa(rq); + if (ret != 0) + return ret; + + ret = i915_gem_render_state_emit(rq); + if (ret) + return ret; + + return 0; +} + +static int init_render_ring(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ + if (IS_GEN_RANGE(dev_priv, 4, 6)) + I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); + + /* We need to disable the AsyncFlip performance optimisations in order + * to use MI_WAIT_FOR_EVENT within the CS. It should already be + * programmed to '1' on all products. + * + * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv + */ + if (IS_GEN_RANGE(dev_priv, 6, 7)) + I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); + + /* Required for the hardware to program scanline values for waiting */ + /* WaEnableFlushTlbInvalidationMode:snb */ + if (IS_GEN(dev_priv, 6)) + I915_WRITE(GFX_MODE, + _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); + + /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ + if (IS_GEN(dev_priv, 7)) + I915_WRITE(GFX_MODE_GEN7, + _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | + _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); + + if (IS_GEN(dev_priv, 6)) { + /* From the Sandybridge PRM, volume 1 part 3, page 24: + * "If this bit is set, STCunit will have LRA as replacement + * policy. [...] This bit must be reset. LRA replacement + * policy is not supported." + */ + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); + } + + if (IS_GEN_RANGE(dev_priv, 6, 7)) + I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + + return init_ring_common(engine); +} + +static void cancel_requests(struct intel_engine_cs *engine) +{ + struct i915_request *request; + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); + + /* Mark all submitted requests as skipped. */ + list_for_each_entry(request, &engine->timeline.requests, link) { + if (!i915_request_signaled(request)) + dma_fence_set_error(&request->fence, -EIO); + + i915_request_mark_complete(request); + } + + /* Remaining _unready_ requests will be nop'ed when submitted */ + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +static void i9xx_submit_request(struct i915_request *request) +{ + i915_request_submit(request); + + ENGINE_WRITE(request->engine, RING_TAIL, + intel_ring_set_tail(request->ring, request->tail)); +} + +static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH; + + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + + *cs++ = MI_USER_INTERRUPT; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +#define GEN5_WA_STORES 8 /* must be at least 1! */ +static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + int i; + + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH; + + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + + BUILD_BUG_ON(GEN5_WA_STORES < 1); + for (i = 0; i < GEN5_WA_STORES; i++) { + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + } + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} +#undef GEN5_WA_STORES + +static void +gen5_irq_enable(struct intel_engine_cs *engine) +{ + gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); +} + +static void +gen5_irq_disable(struct intel_engine_cs *engine) +{ + gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); +} + +static void +i9xx_irq_enable(struct intel_engine_cs *engine) +{ + engine->i915->irq_mask &= ~engine->irq_enable_mask; + intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); + intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); +} + +static void +i9xx_irq_disable(struct intel_engine_cs *engine) +{ + engine->i915->irq_mask |= engine->irq_enable_mask; + intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); +} + +static void +i8xx_irq_enable(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + dev_priv->irq_mask &= ~engine->irq_enable_mask; + I915_WRITE16(GEN2_IMR, dev_priv->irq_mask); + POSTING_READ16(RING_IMR(engine->mmio_base)); +} + +static void +i8xx_irq_disable(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + dev_priv->irq_mask |= engine->irq_enable_mask; + I915_WRITE16(GEN2_IMR, dev_priv->irq_mask); +} + +static int +bsd_ring_flush(struct i915_request *rq, u32 mode) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + return 0; +} + +static void +gen6_irq_enable(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, + ~(engine->irq_enable_mask | engine->irq_keep_mask)); + + /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ + ENGINE_POSTING_READ(engine, RING_IMR); + + gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); +} + +static void +gen6_irq_disable(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); + gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); +} + +static void +hsw_vebox_irq_enable(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask); + + /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ + ENGINE_POSTING_READ(engine, RING_IMR); + + gen6_unmask_pm_irq(engine->i915, engine->irq_enable_mask); +} + +static void +hsw_vebox_irq_disable(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~0); + gen6_mask_pm_irq(engine->i915, engine->irq_enable_mask); +} + +static int +i965_emit_bb_start(struct i915_request *rq, + u64 offset, u32 length, + unsigned int dispatch_flags) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & + I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); + *cs++ = offset; + intel_ring_advance(rq, cs); + + return 0; +} + +/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ +#define I830_BATCH_LIMIT SZ_256K +#define I830_TLB_ENTRIES (2) +#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) +static int +i830_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 *cs, cs_offset = i915_scratch_offset(rq->i915); + + GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Evict the invalid PTE TLBs */ + *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; + *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ + *cs++ = cs_offset; + *cs++ = 0xdeadbeef; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { + if (len > I830_BATCH_LIMIT) + return -ENOSPC; + + cs = intel_ring_begin(rq, 6 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Blit the batch (which has now all relocs applied) to the + * stable batch scratch bo area (so that the CS never + * stumbles over its tlb invalidation bug) ... + */ + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; + *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; + *cs++ = cs_offset; + *cs++ = 4096; + *cs++ = offset; + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* ... and execute it. */ + offset = cs_offset; + } + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(rq, cs); + + return 0; +} + +static int +i915_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(rq, cs); + + return 0; +} + +int intel_ring_pin(struct intel_ring *ring) +{ + struct i915_vma *vma = ring->vma; + enum i915_map_type map = i915_coherent_map_type(vma->vm->i915); + unsigned int flags; + void *addr; + int ret; + + GEM_BUG_ON(ring->vaddr); + + ret = i915_timeline_pin(ring->timeline); + if (ret) + return ret; + + flags = PIN_GLOBAL; + + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ + flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + + if (vma->obj->stolen) + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; + + ret = i915_vma_pin(vma, 0, 0, flags); + if (unlikely(ret)) + goto unpin_timeline; + + if (i915_vma_is_map_and_fenceable(vma)) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, map); + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto unpin_ring; + } + + vma->obj->pin_global++; + + ring->vaddr = addr; + return 0; + +unpin_ring: + i915_vma_unpin(vma); +unpin_timeline: + i915_timeline_unpin(ring->timeline); + return ret; +} + +void intel_ring_reset(struct intel_ring *ring, u32 tail) +{ + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); + + ring->tail = tail; + ring->head = tail; + ring->emit = tail; + intel_ring_update_space(ring); +} + +void intel_ring_unpin(struct intel_ring *ring) +{ + GEM_BUG_ON(!ring->vma); + GEM_BUG_ON(!ring->vaddr); + + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->tail); + + if (i915_vma_is_map_and_fenceable(ring->vma)) + i915_vma_unpin_iomap(ring->vma); + else + i915_gem_object_unpin_map(ring->vma->obj); + ring->vaddr = NULL; + + ring->vma->obj->pin_global--; + i915_vma_unpin(ring->vma); + + i915_timeline_unpin(ring->timeline); +} + +static struct i915_vma * +intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) +{ + struct i915_address_space *vm = &dev_priv->ggtt.vm; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_stolen(dev_priv, size); + if (!obj) + obj = i915_gem_object_create_internal(dev_priv, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Mark ring buffers as read-only from GPU side (so no stray overwrites) + * if supported by the platform's GGTT. + */ + if (vm->has_read_only) + i915_gem_object_set_readonly(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size) +{ + struct intel_ring *ring; + struct i915_vma *vma; + + GEM_BUG_ON(!is_power_of_2(size)); + GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + GEM_BUG_ON(timeline == &engine->timeline); + lockdep_assert_held(&engine->i915->drm.struct_mutex); + + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) + return ERR_PTR(-ENOMEM); + + kref_init(&ring->ref); + INIT_LIST_HEAD(&ring->request_list); + ring->timeline = i915_timeline_get(timeline); + + ring->size = size; + /* Workaround an erratum on the i830 which causes a hang if + * the TAIL pointer points to within the last 2 cachelines + * of the buffer. + */ + ring->effective_size = size; + if (IS_I830(engine->i915) || IS_I845G(engine->i915)) + ring->effective_size -= 2 * CACHELINE_BYTES; + + intel_ring_update_space(ring); + + vma = intel_ring_create_vma(engine->i915, size); + if (IS_ERR(vma)) { + kfree(ring); + return ERR_CAST(vma); + } + ring->vma = vma; + + return ring; +} + +void intel_ring_free(struct kref *ref) +{ + struct intel_ring *ring = container_of(ref, typeof(*ring), ref); + struct drm_i915_gem_object *obj = ring->vma->obj; + + i915_vma_close(ring->vma); + __i915_gem_object_release_unless_active(obj); + + i915_timeline_put(ring->timeline); + kfree(ring); +} + +static void __ring_context_fini(struct intel_context *ce) +{ + GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); + i915_gem_object_put(ce->state->obj); +} + +static void ring_context_destroy(struct kref *ref) +{ + struct intel_context *ce = container_of(ref, typeof(*ce), ref); + + GEM_BUG_ON(intel_context_is_pinned(ce)); + + if (ce->state) + __ring_context_fini(ce); + + intel_context_free(ce); +} + +static int __context_pin_ppgtt(struct i915_gem_context *ctx) +{ + struct i915_hw_ppgtt *ppgtt; + int err = 0; + + ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt; + if (ppgtt) + err = gen6_ppgtt_pin(ppgtt); + + return err; +} + +static void __context_unpin_ppgtt(struct i915_gem_context *ctx) +{ + struct i915_hw_ppgtt *ppgtt; + + ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt; + if (ppgtt) + gen6_ppgtt_unpin(ppgtt); +} + +static int __context_pin(struct intel_context *ce) +{ + struct i915_vma *vma; + int err; + + vma = ce->state; + if (!vma) + return 0; + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + return err; + + /* + * And mark is as a globally pinned object to let the shrinker know + * it cannot reclaim the object until we release it. + */ + vma->obj->pin_global++; + vma->obj->mm.dirty = true; + + return 0; +} + +static void __context_unpin(struct intel_context *ce) +{ + struct i915_vma *vma; + + vma = ce->state; + if (!vma) + return; + + vma->obj->pin_global--; + i915_vma_unpin(vma); +} + +static void ring_context_unpin(struct intel_context *ce) +{ + __context_unpin_ppgtt(ce->gem_context); + __context_unpin(ce); +} + +static struct i915_vma * +alloc_context_vma(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create(i915, engine->context_size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Try to make the context utilize L3 as well as LLC. + * + * On VLV we don't have L3 controls in the PTEs so we + * shouldn't touch the cache level, especially as that + * would make the object snooped which might have a + * negative performance impact. + * + * Snooping is required on non-llc platforms in execlist + * mode, but since all GGTT accesses use PAT entry 0 we + * get snooping anyway regardless of cache_level. + * + * This is only applicable for Ivy Bridge devices since + * later platforms don't have L3 control bits in the PTE. + */ + if (IS_IVYBRIDGE(i915)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); + + if (engine->default_state) { + void *defaults, *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) { + err = PTR_ERR(defaults); + goto err_map; + } + + memcpy(vaddr, defaults, engine->context_size); + i915_gem_object_unpin_map(engine->default_state); + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + } + + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + return vma; + +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int ring_context_pin(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + int err; + + /* One ringbuffer to rule them all */ + GEM_BUG_ON(!engine->buffer); + ce->ring = engine->buffer; + + if (!ce->state && engine->context_size) { + struct i915_vma *vma; + + vma = alloc_context_vma(engine); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + ce->state = vma; + } + + err = __context_pin(ce); + if (err) + return err; + + err = __context_pin_ppgtt(ce->gem_context); + if (err) + goto err_unpin; + + return 0; + +err_unpin: + __context_unpin(ce); + return err; +} + +static void ring_context_reset(struct intel_context *ce) +{ + intel_ring_reset(ce->ring, 0); +} + +static const struct intel_context_ops ring_context_ops = { + .pin = ring_context_pin, + .unpin = ring_context_unpin, + + .reset = ring_context_reset, + .destroy = ring_context_destroy, +}; + +static int intel_init_ring_buffer(struct intel_engine_cs *engine) +{ + struct i915_timeline *timeline; + struct intel_ring *ring; + int err; + + err = intel_engine_setup_common(engine); + if (err) + return err; + + timeline = i915_timeline_create(engine->i915, engine->status_page.vma); + if (IS_ERR(timeline)) { + err = PTR_ERR(timeline); + goto err; + } + GEM_BUG_ON(timeline->has_initial_breadcrumb); + + ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); + i915_timeline_put(timeline); + if (IS_ERR(ring)) { + err = PTR_ERR(ring); + goto err; + } + + err = intel_ring_pin(ring); + if (err) + goto err_ring; + + GEM_BUG_ON(engine->buffer); + engine->buffer = ring; + + err = intel_engine_init_common(engine); + if (err) + goto err_unpin; + + GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); + + return 0; + +err_unpin: + intel_ring_unpin(ring); +err_ring: + intel_ring_put(ring); +err: + intel_engine_cleanup_common(engine); + return err; +} + +void intel_engine_cleanup(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + WARN_ON(INTEL_GEN(dev_priv) > 2 && + (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + + intel_ring_unpin(engine->buffer); + intel_ring_put(engine->buffer); + + if (engine->cleanup) + engine->cleanup(engine); + + intel_engine_cleanup_common(engine); + + dev_priv->engine[engine->id] = NULL; + kfree(engine); +} + +static int load_pd_dir(struct i915_request *rq, + const struct i915_hw_ppgtt *ppgtt) +{ + const struct intel_engine_cs * const engine = rq->engine; + u32 *cs; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); + *cs++ = PP_DIR_DCLV_2G; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); + *cs++ = ppgtt->pd.base.ggtt_offset << 10; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int flush_pd_dir(struct i915_request *rq) +{ + const struct intel_engine_cs * const engine = rq->engine; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Stall until the page table load is complete */ + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); + *cs++ = i915_scratch_offset(rq->i915); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + return 0; +} + +static inline int mi_set_context(struct i915_request *rq, u32 flags) +{ + struct drm_i915_private *i915 = rq->i915; + struct intel_engine_cs *engine = rq->engine; + enum intel_engine_id id; + const int num_engines = + IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; + bool force_restore = false; + int len; + u32 *cs; + + flags |= MI_MM_SPACE_GTT; + if (IS_HASWELL(i915)) + /* These flags are for resource streamer on HSW+ */ + flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; + else + flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; + + len = 4; + if (IS_GEN(i915, 7)) + len += 2 + (num_engines ? 4 * num_engines + 6 : 0); + if (flags & MI_FORCE_RESTORE) { + GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); + flags &= ~MI_FORCE_RESTORE; + force_restore = true; + len += 2; + } + + cs = intel_ring_begin(rq, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ + if (IS_GEN(i915, 7)) { + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (num_engines) { + struct intel_engine_cs *signaller; + + *cs++ = MI_LOAD_REGISTER_IMM(num_engines); + for_each_engine(signaller, i915, id) { + if (signaller == engine) + continue; + + *cs++ = i915_mmio_reg_offset( + RING_PSMI_CTL(signaller->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); + } + } + } + + if (force_restore) { + /* + * The HW doesn't handle being told to restore the current + * context very well. Quite often it likes goes to go off and + * sulk, especially when it is meant to be reloading PP_DIR. + * A very simple fix to force the reload is to simply switch + * away from the current context and back again. + * + * Note that the kernel_context will contain random state + * following the INHIBIT_RESTORE. We accept this since we + * never use the kernel_context state; it is merely a + * placeholder we use to flush other contexts. + */ + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(engine->kernel_context->state) | + MI_MM_SPACE_GTT | + MI_RESTORE_INHIBIT; + } + + *cs++ = MI_NOOP; + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags; + /* + * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP + * WaMiSetContext_Hang:snb,ivb,vlv + */ + *cs++ = MI_NOOP; + + if (IS_GEN(i915, 7)) { + if (num_engines) { + struct intel_engine_cs *signaller; + i915_reg_t last_reg = {}; /* keep gcc quiet */ + + *cs++ = MI_LOAD_REGISTER_IMM(num_engines); + for_each_engine(signaller, i915, id) { + if (signaller == engine) + continue; + + last_reg = RING_PSMI_CTL(signaller->mmio_base); + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = _MASKED_BIT_DISABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); + } + + /* Insert a delay before the next switch! */ + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = i915_scratch_offset(rq->i915); + *cs++ = MI_NOOP; + } + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static int remap_l3(struct i915_request *rq, int slice) +{ + u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; + int i; + + if (!remap_info) + return 0; + + cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Note: We do not worry about the concurrent register cacheline hang + * here because no other code should access these registers other than + * at initialization time. + */ + *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); + for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { + *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); + *cs++ = remap_info[i]; + } + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static int switch_context(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_gem_context *ctx = rq->gem_context; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + unsigned int unwind_mm = 0; + u32 hw_flags = 0; + int ret, i; + + lockdep_assert_held(&rq->i915->drm.struct_mutex); + GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); + + if (ppgtt) { + int loops; + + /* + * Baytail takes a little more convincing that it really needs + * to reload the PD between contexts. It is not just a little + * longer, as adding more stalls after the load_pd_dir (i.e. + * adding a long loop around flush_pd_dir) is not as effective + * as reloading the PD umpteen times. 32 is derived from + * experimentation (gem_exec_parallel/fds) and has no good + * explanation. + */ + loops = 1; + if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915)) + loops = 32; + + do { + ret = load_pd_dir(rq, ppgtt); + if (ret) + goto err; + } while (--loops); + + if (ppgtt->pd_dirty_engines & engine->mask) { + unwind_mm = engine->mask; + ppgtt->pd_dirty_engines &= ~unwind_mm; + hw_flags = MI_FORCE_RESTORE; + } + } + + if (rq->hw_context->state) { + GEM_BUG_ON(engine->id != RCS0); + + /* + * The kernel context(s) is treated as pure scratch and is not + * expected to retain any state (as we sacrifice it during + * suspend and on resume it may be corrupted). This is ok, + * as nothing actually executes using the kernel context; it + * is purely used for flushing user contexts. + */ + if (i915_gem_context_is_kernel(ctx)) + hw_flags = MI_RESTORE_INHIBIT; + + ret = mi_set_context(rq, hw_flags); + if (ret) + goto err_mm; + } + + if (ppgtt) { + ret = engine->emit_flush(rq, EMIT_INVALIDATE); + if (ret) + goto err_mm; + + ret = flush_pd_dir(rq); + if (ret) + goto err_mm; + + /* + * Not only do we need a full barrier (post-sync write) after + * invalidating the TLBs, but we need to wait a little bit + * longer. Whether this is merely delaying us, or the + * subsequent flush is a key part of serialising with the + * post-sync op, this extra pass appears vital before a + * mm switch! + */ + ret = engine->emit_flush(rq, EMIT_INVALIDATE); + if (ret) + goto err_mm; + + ret = engine->emit_flush(rq, EMIT_FLUSH); + if (ret) + goto err_mm; + } + + if (ctx->remap_slice) { + for (i = 0; i < MAX_L3_SLICES; i++) { + if (!(ctx->remap_slice & BIT(i))) + continue; + + ret = remap_l3(rq, i); + if (ret) + goto err_mm; + } + + ctx->remap_slice = 0; + } + + return 0; + +err_mm: + if (unwind_mm) + ppgtt->pd_dirty_engines |= unwind_mm; +err: + return ret; +} + +static int ring_request_alloc(struct i915_request *request) +{ + int ret; + + GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); + GEM_BUG_ON(request->timeline->has_initial_breadcrumb); + + /* + * Flush enough space to reduce the likelihood of waiting after + * we start building the request - in which case we will just + * have to repeat work. + */ + request->reserved_space += LEGACY_REQUEST_SIZE; + + ret = switch_context(request); + if (ret) + return ret; + + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); + if (ret) + return ret; + + request->reserved_space -= LEGACY_REQUEST_SIZE; + return 0; +} + +static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) +{ + struct i915_request *target; + long timeout; + + lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); + + if (intel_ring_update_space(ring) >= bytes) + return 0; + + GEM_BUG_ON(list_empty(&ring->request_list)); + list_for_each_entry(target, &ring->request_list, ring_link) { + /* Would completion of this request free enough space? */ + if (bytes <= __intel_ring_space(target->postfix, + ring->emit, ring->size)) + break; + } + + if (WARN_ON(&target->ring_link == &ring->request_list)) + return -ENOSPC; + + timeout = i915_request_wait(target, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + return timeout; + + i915_request_retire_upto(target); + + intel_ring_update_space(ring); + GEM_BUG_ON(ring->space < bytes); + return 0; +} + +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) +{ + struct intel_ring *ring = rq->ring; + const unsigned int remain_usable = ring->effective_size - ring->emit; + const unsigned int bytes = num_dwords * sizeof(u32); + unsigned int need_wrap = 0; + unsigned int total_bytes; + u32 *cs; + + /* Packets must be qword aligned. */ + GEM_BUG_ON(num_dwords & 1); + + total_bytes = bytes + rq->reserved_space; + GEM_BUG_ON(total_bytes > ring->effective_size); + + if (unlikely(total_bytes > remain_usable)) { + const int remain_actual = ring->size - ring->emit; + + if (bytes > remain_usable) { + /* + * Not enough space for the basic request. So need to + * flush out the remainder and then wait for + * base + reserved. + */ + total_bytes += remain_actual; + need_wrap = remain_actual | 1; + } else { + /* + * The base request will fit but the reserved space + * falls off the end. So we don't need an immediate + * wrap and only need to effectively wait for the + * reserved size from the start of ringbuffer. + */ + total_bytes = rq->reserved_space + remain_actual; + } + } + + if (unlikely(total_bytes > ring->space)) { + int ret; + + /* + * Space is reserved in the ringbuffer for finalising the + * request, as that cannot be allowed to fail. During request + * finalisation, reserved_space is set to 0 to stop the + * overallocation and the assumption is that then we never need + * to wait (which has the risk of failing with EINTR). + * + * See also i915_request_alloc() and i915_request_add(). + */ + GEM_BUG_ON(!rq->reserved_space); + + ret = wait_for_space(ring, total_bytes); + if (unlikely(ret)) + return ERR_PTR(ret); + } + + if (unlikely(need_wrap)) { + need_wrap &= ~1; + GEM_BUG_ON(need_wrap > ring->space); + GEM_BUG_ON(ring->emit + need_wrap > ring->size); + GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); + + /* Fill the tail with MI_NOOP */ + memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); + ring->space -= need_wrap; + ring->emit = 0; + } + + GEM_BUG_ON(ring->emit > ring->size - bytes); + GEM_BUG_ON(ring->space < bytes); + cs = ring->vaddr + ring->emit; + GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); + ring->emit += bytes; + ring->space -= bytes; + + return cs; +} + +/* Align the ring tail to a cacheline boundary */ +int intel_ring_cacheline_align(struct i915_request *rq) +{ + int num_dwords; + void *cs; + + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); + if (num_dwords == 0) + return 0; + + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + + cs = intel_ring_begin(rq, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); + intel_ring_advance(rq, cs); + + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); + return 0; +} + +static void gen6_bsd_submit_request(struct i915_request *request) +{ + struct intel_uncore *uncore = request->engine->uncore; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + /* Every tail move must follow the sequence below */ + + /* Disable notification that the ring is IDLE. The GT + * will then assume that it is busy and bring it out of rc6. + */ + intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL, + _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); + + /* Clear the context id. Here be magic! */ + intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0); + + /* Wait for the ring not to be idle, i.e. for it to wake up. */ + if (__intel_wait_for_register_fw(uncore, + GEN6_BSD_SLEEP_PSMI_CONTROL, + GEN6_BSD_SLEEP_INDICATOR, + 0, + 1000, 0, NULL)) + DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); + + /* Now that the ring is fully powered up, update the tail */ + i9xx_submit_request(request); + + /* Let the ring send IDLE messages to the GT again, + * and so let it sleep to conserve power when idle. + */ + intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL, + _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); +} + +static int mi_flush_dw(struct i915_request *rq, u32 flags) +{ + u32 cmd, *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_FLUSH_DW; + + /* + * We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + /* + * Bspec vol 1c.3 - blitter engine command streamer: + * "If ENABLED, all TLBs will be invalidated once the flush + * operation is complete. This bit is only valid when the + * Post-Sync Operation field is a value of 1h or 3h." + */ + cmd |= flags; + + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags) +{ + return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0); +} + +static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) +{ + return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD); +} + +static int +hsw_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW); + /* bit0-7 is the length on GEN6+ */ + *cs++ = offset; + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen6_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965); + /* bit0-7 is the length on GEN6+ */ + *cs++ = offset; + intel_ring_advance(rq, cs); + + return 0; +} + +/* Blitter support (SandyBridge+) */ + +static int gen6_ring_flush(struct i915_request *rq, u32 mode) +{ + return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); +} + +static void intel_ring_init_irq(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + if (INTEL_GEN(dev_priv) >= 6) { + engine->irq_enable = gen6_irq_enable; + engine->irq_disable = gen6_irq_disable; + } else if (INTEL_GEN(dev_priv) >= 5) { + engine->irq_enable = gen5_irq_enable; + engine->irq_disable = gen5_irq_disable; + } else if (INTEL_GEN(dev_priv) >= 3) { + engine->irq_enable = i9xx_irq_enable; + engine->irq_disable = i9xx_irq_disable; + } else { + engine->irq_enable = i8xx_irq_enable; + engine->irq_disable = i8xx_irq_disable; + } +} + +static void i9xx_set_default_submission(struct intel_engine_cs *engine) +{ + engine->submit_request = i9xx_submit_request; + engine->cancel_requests = cancel_requests; + + engine->park = NULL; + engine->unpark = NULL; +} + +static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) +{ + i9xx_set_default_submission(engine); + engine->submit_request = gen6_bsd_submit_request; +} + +static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + /* gen8+ are only supported with execlists */ + GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); + + intel_ring_init_irq(dev_priv, engine); + + engine->init_hw = init_ring_common; + engine->reset.prepare = reset_prepare; + engine->reset.reset = reset_ring; + engine->reset.finish = reset_finish; + + engine->cops = &ring_context_ops; + engine->request_alloc = ring_request_alloc; + + /* + * Using a global execution timeline; the previous final breadcrumb is + * equivalent to our next initial bread so we can elide + * engine->emit_init_breadcrumb(). + */ + engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; + if (IS_GEN(dev_priv, 5)) + engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; + + engine->set_default_submission = i9xx_set_default_submission; + + if (INTEL_GEN(dev_priv) >= 6) + engine->emit_bb_start = gen6_emit_bb_start; + else if (INTEL_GEN(dev_priv) >= 4) + engine->emit_bb_start = i965_emit_bb_start; + else if (IS_I830(dev_priv) || IS_I845G(dev_priv)) + engine->emit_bb_start = i830_emit_bb_start; + else + engine->emit_bb_start = i915_emit_bb_start; +} + +int intel_init_render_ring_buffer(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + intel_ring_default_vfuncs(dev_priv, engine); + + if (HAS_L3_DPF(dev_priv)) + engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; + + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; + + if (INTEL_GEN(dev_priv) >= 7) { + engine->init_context = intel_rcs_ctx_init; + engine->emit_flush = gen7_render_ring_flush; + engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; + } else if (IS_GEN(dev_priv, 6)) { + engine->init_context = intel_rcs_ctx_init; + engine->emit_flush = gen6_render_ring_flush; + engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; + } else if (IS_GEN(dev_priv, 5)) { + engine->emit_flush = gen4_render_ring_flush; + } else { + if (INTEL_GEN(dev_priv) < 4) + engine->emit_flush = gen2_render_ring_flush; + else + engine->emit_flush = gen4_render_ring_flush; + engine->irq_enable_mask = I915_USER_INTERRUPT; + } + + if (IS_HASWELL(dev_priv)) + engine->emit_bb_start = hsw_emit_bb_start; + + engine->init_hw = init_render_ring; + + ret = intel_init_ring_buffer(engine); + if (ret) + return ret; + + return 0; +} + +int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + intel_ring_default_vfuncs(dev_priv, engine); + + if (INTEL_GEN(dev_priv) >= 6) { + /* gen6 bsd needs a special wa for tail updates */ + if (IS_GEN(dev_priv, 6)) + engine->set_default_submission = gen6_bsd_set_default_submission; + engine->emit_flush = gen6_bsd_ring_flush; + engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; + + if (IS_GEN(dev_priv, 6)) + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + else + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; + } else { + engine->emit_flush = bsd_ring_flush; + if (IS_GEN(dev_priv, 5)) + engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; + else + engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; + } + + return intel_init_ring_buffer(engine); +} + +int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); + + intel_ring_default_vfuncs(dev_priv, engine); + + engine->emit_flush = gen6_ring_flush; + engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; + + if (IS_GEN(dev_priv, 6)) + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + else + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; + + return intel_init_ring_buffer(engine); +} + +int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + GEM_BUG_ON(INTEL_GEN(dev_priv) < 7); + + intel_ring_default_vfuncs(dev_priv, engine); + + engine->emit_flush = gen6_ring_flush; + engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; + engine->irq_enable = hsw_vebox_irq_enable; + engine->irq_disable = hsw_vebox_irq_disable; + + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; + + return intel_init_ring_buffer(engine); +} diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c new file mode 100644 index 000000000000..7f448f3bea0b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_lrc_reg.h" +#include "intel_sseu.h" + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + bool subslice_pg = sseu->has_subslice_pg; + struct intel_sseu ctx_sseu; + u8 slices, subslices; + u32 rpcs = 0; + + /* + * No explicit RPCS request is needed to ensure full + * slice/subslice/EU enablement prior to Gen9. + */ + if (INTEL_GEN(i915) < 9) + return 0; + + /* + * If i915/perf is active, we want a stable powergating configuration + * on the system. + * + * We could choose full enablement, but on ICL we know there are use + * cases which disable slices for functional, apart for performance + * reasons. So in this case we select a known stable subset. + */ + if (!i915->perf.oa.exclusive_stream) { + ctx_sseu = *req_sseu; + } else { + ctx_sseu = intel_sseu_from_device_info(sseu); + + if (IS_GEN(i915, 11)) { + /* + * We only need subslice count so it doesn't matter + * which ones we select - just turn off low bits in the + * amount of half of all available subslices per slice. + */ + ctx_sseu.subslice_mask = + ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); + ctx_sseu.slice_mask = 0x1; + } + } + + slices = hweight8(ctx_sseu.slice_mask); + subslices = hweight8(ctx_sseu.subslice_mask); + + /* + * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits + * wide and Icelake has up to eight subslices, specfial programming is + * needed in order to correctly enable all subslices. + * + * According to documentation software must consider the configuration + * as 2x4x8 and hardware will translate this to 1x8x8. + * + * Furthemore, even though SScount is three bits, maximum documented + * value for it is four. From this some rules/restrictions follow: + * + * 1. + * If enabled subslice count is greater than four, two whole slices must + * be enabled instead. + * + * 2. + * When more than one slice is enabled, hardware ignores the subslice + * count altogether. + * + * From these restrictions it follows that it is not possible to enable + * a count of subslices between the SScount maximum of four restriction, + * and the maximum available number on a particular SKU. Either all + * subslices are enabled, or a count between one and four on the first + * slice. + */ + if (IS_GEN(i915, 11) && + slices == 1 && + subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { + GEM_BUG_ON(subslices & 1); + + subslice_pg = false; + slices *= 2; + } + + /* + * Starting in Gen9, render power gating can leave + * slice/subslice/EU in a partially enabled state. We + * must make an explicit request through RPCS for full + * enablement. + */ + if (sseu->has_slice_pg) { + u32 mask, val = slices; + + if (INTEL_GEN(i915) >= 11) { + mask = GEN11_RPCS_S_CNT_MASK; + val <<= GEN11_RPCS_S_CNT_SHIFT; + } else { + mask = GEN8_RPCS_S_CNT_MASK; + val <<= GEN8_RPCS_S_CNT_SHIFT; + } + + GEM_BUG_ON(val & ~mask); + val &= mask; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; + } + + if (subslice_pg) { + u32 val = subslices; + + val <<= GEN8_RPCS_SS_CNT_SHIFT; + + GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); + val &= GEN8_RPCS_SS_CNT_MASK; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; + } + + if (sseu->has_eu_pg) { + u32 val; + + val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); + val &= GEN8_RPCS_EU_MIN_MASK; + + rpcs |= val; + + val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); + val &= GEN8_RPCS_EU_MAX_MASK; + + rpcs |= val; + + rpcs |= GEN8_RPCS_ENABLE; + } + + return rpcs; +} diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h new file mode 100644 index 000000000000..73bc824094e8 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_SSEU_H__ +#define __INTEL_SSEU_H__ + +#include + +struct drm_i915_private; + +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ + +struct sseu_dev_info { + u8 slice_mask; + u8 subslice_mask[GEN_MAX_SLICES]; + u16 eu_total; + u8 eu_per_subslice; + u8 min_eu_in_pool; + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ + u8 subslice_7eu[3]; + u8 has_slice_pg:1; + u8 has_subslice_pg:1; + u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; +}; + +/* + * Powergating configuration for a particular (context,engine). + */ +struct intel_sseu { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; +}; + +static inline struct intel_sseu +intel_sseu_from_device_info(const struct sseu_dev_info *sseu) +{ + struct intel_sseu value = { + .slice_mask = sseu->slice_mask, + .subslice_mask = sseu->subslice_mask[0], + .min_eus_per_subslice = sseu->max_eus_per_subslice, + .max_eus_per_subslice = sseu->max_eus_per_subslice, + }; + + return value; +} + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu); + +#endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c new file mode 100644 index 000000000000..f46ed0e2f07c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -0,0 +1,1402 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_workarounds.h" + +/** + * DOC: Hardware workarounds + * + * This file is intended as a central place to implement most [1]_ of the + * required workarounds for hardware to work as originally intended. They fall + * in five basic categories depending on how/when they are applied: + * + * - Workarounds that touch registers that are saved/restored to/from the HW + * context image. The list is emitted (via Load Register Immediate commands) + * everytime a new context is created. + * - GT workarounds. The list of these WAs is applied whenever these registers + * revert to default values (on GPU reset, suspend/resume [2]_, etc..). + * - Display workarounds. The list is applied during display clock-gating + * initialization. + * - Workarounds that whitelist a privileged register, so that UMDs can manage + * them directly. This is just a special case of a MMMIO workaround (as we + * write the list of these to/be-whitelisted registers to some special HW + * registers). + * - Workaround batchbuffers, that get executed automatically by the hardware + * on every HW context restore. + * + * .. [1] Please notice that there are other WAs that, due to their nature, + * cannot be applied from a central place. Those are peppered around the rest + * of the code, as needed. + * + * .. [2] Technically, some registers are powercontext saved & restored, so they + * survive a suspend/resume. In practice, writing them again is not too + * costly and simplifies things. We can revisit this in the future. + * + * Layout + * '''''' + * + * Keep things in this file ordered by WA type, as per the above (context, GT, + * display, register whitelist, batchbuffer). Then, inside each type, keep the + * following order: + * + * - Infrastructure functions and macros + * - WAs per platform in standard gen/chrono order + * - Public functions to init or apply the given workaround type. + */ + +static void wa_init_start(struct i915_wa_list *wal, const char *name) +{ + wal->name = name; +} + +#define WA_LIST_CHUNK (1 << 4) + +static void wa_init_finish(struct i915_wa_list *wal) +{ + /* Trim unused entries. */ + if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { + struct i915_wa *list = kmemdup(wal->list, + wal->count * sizeof(*list), + GFP_KERNEL); + + if (list) { + kfree(wal->list); + wal->list = list; + } + } + + if (!wal->count) + return; + + DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n", + wal->wa_count, wal->name); +} + +static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) +{ + unsigned int addr = i915_mmio_reg_offset(wa->reg); + unsigned int start = 0, end = wal->count; + const unsigned int grow = WA_LIST_CHUNK; + struct i915_wa *wa_; + + GEM_BUG_ON(!is_power_of_2(grow)); + + if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ + struct i915_wa *list; + + list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), + GFP_KERNEL); + if (!list) { + DRM_ERROR("No space for workaround init!\n"); + return; + } + + if (wal->list) + memcpy(list, wal->list, sizeof(*wa) * wal->count); + + wal->list = list; + } + + while (start < end) { + unsigned int mid = start + (end - start) / 2; + + if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { + start = mid + 1; + } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { + end = mid; + } else { + wa_ = &wal->list[mid]; + + if ((wa->mask & ~wa_->mask) == 0) { + DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", + i915_mmio_reg_offset(wa_->reg), + wa_->mask, wa_->val); + + wa_->val &= ~wa->mask; + } + + wal->wa_count++; + wa_->val |= wa->val; + wa_->mask |= wa->mask; + wa_->read |= wa->read; + return; + } + } + + wal->wa_count++; + wa_ = &wal->list[wal->count++]; + *wa_ = *wa; + + while (wa_-- > wal->list) { + GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == + i915_mmio_reg_offset(wa_[1].reg)); + if (i915_mmio_reg_offset(wa_[1].reg) > + i915_mmio_reg_offset(wa_[0].reg)) + break; + + swap(wa_[1], wa_[0]); + } +} + +static void +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val, + .read = mask, + }; + + _wa_add(wal, &wa); +} + +static void +wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); +} + +static void +wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, ~0, val); +} + +static void +wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, val, val); +} + +static void +ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val, + /* Bonkers HW, skip verifying */ + }; + + _wa_add(wal, &wa); +} + +#define WA_SET_BIT_MASKED(addr, mask) \ + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) + +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) + +static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw,chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + + /* WaDisablePartialInstShootdown:bdw,chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for BDW and CHV; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); +} + +static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen8_ctx_workarounds_init(engine); + + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); + + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); +} + +static void chv_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen8_ctx_workarounds_init(engine); + + /* WaDisableThreadStallDopClockGating:chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); +} + +static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + if (HAS_LLC(i915)) { + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl + * + * Must match Display Engine. See + * WaCompressedResourceDisplayNewHashMode. + */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN9_PBE_COMPRESSED_HASH_SELECTION); + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); + } + + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ + if (!IS_COFFEELAKE(i915)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX | + GEN9_ENABLE_GPGPU_PREEMPTION); + + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, + GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); + + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ + if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + + /* + * Supporting preemption with fine-granularity requires changes in the + * batch buffer programming. Since we can't break old userspace, we + * need to set our default preemption level to safe value. Userspace is + * still able to use more fine-grained preemption levels, since in + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are + * not real HW workarounds, but merely a way to start using preemption + * while maintaining old contract with userspace. + */ + + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + + /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ + if (IS_GEN9_LP(i915)) + WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); +} + +static void skl_tune_iz_hashing(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i])) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); +} + +static void skl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + gen9_ctx_workarounds_init(engine); + skl_tune_iz_hashing(engine); +} + +static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen9_ctx_workarounds_init(engine); + + /* WaDisableThreadStallDopClockGating:bxt */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:bxt */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); +} + +static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen9_ctx_workarounds_init(engine); + + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); +} + +static void glk_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen9_ctx_workarounds_init(engine); + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); +} + +static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen9_ctx_workarounds_init(engine); + + /* WaToEnableHwFixForPushConstHWBug:cfl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableSbeCacheDispatchPortSharing:cfl */ + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); +} + +static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + /* WaForceContextSaveRestoreNonCoherent:cnl */ + WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); + + /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ + if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); + + /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ + if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); + + /* WaPushConstantDereferenceHoldDisable:cnl */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); + + /* FtrEnableFastAnisoL1BankingFix:cnl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); + + /* WaDisable3DMidCmdPreemption:cnl */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:cnl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + + /* WaDisableEarlyEOT:cnl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); +} + +static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + /* Wa_1604370585:icl (pre-prod) + * Formerly known as WaPushConstantDereferenceHoldDisable + */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + PUSH_CONSTANT_DEREF_DISABLE); + + /* WaForceEnableNonCoherent:icl + * This is not the same workaround as in early Gen9 platforms, where + * lacking this could cause system hangs, but coherency performance + * overhead is high and only a few compute workloads really need it + * (the register is whitelisted in hardware now, so UMDs can opt in + * for coherency if they have a good reason). + */ + WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + + /* Wa_2006611047:icl (pre-prod) + * Formerly known as WaDisableImprovedTdlClkGating + */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + GEN11_TDL_CLOCK_GATING_FIX_DISABLE); + + /* WaEnableStateCacheRedirectToCS:icl */ + WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1, + GEN11_STATE_CACHE_REDIRECT_TO_CS); + + /* Wa_2006665173:icl (pre-prod) */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) + WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, + GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); + + /* WaEnableFloatBlendOptimization:icl */ + wa_write_masked_or(wal, + GEN10_CACHE_MODE_SS, + 0, /* write-only, so skip validation */ + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); + + /* WaDisableGPGPUMidThreadPreemption:icl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); +} + +void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + wa_init_start(wal, "context"); + + if (IS_GEN(i915, 11)) + icl_ctx_workarounds_init(engine); + else if (IS_CANNONLAKE(i915)) + cnl_ctx_workarounds_init(engine); + else if (IS_COFFEELAKE(i915)) + cfl_ctx_workarounds_init(engine); + else if (IS_GEMINILAKE(i915)) + glk_ctx_workarounds_init(engine); + else if (IS_KABYLAKE(i915)) + kbl_ctx_workarounds_init(engine); + else if (IS_BROXTON(i915)) + bxt_ctx_workarounds_init(engine); + else if (IS_SKYLAKE(i915)) + skl_ctx_workarounds_init(engine); + else if (IS_CHERRYVIEW(i915)) + chv_ctx_workarounds_init(engine); + else if (IS_BROADWELL(i915)) + bdw_ctx_workarounds_init(engine); + else if (INTEL_GEN(i915) < 8) + return; + else + MISSING_CASE(INTEL_GEN(i915)); + + wa_init_finish(wal); +} + +int intel_engine_emit_ctx_wa(struct i915_request *rq) +{ + struct i915_wa_list *wal = &rq->engine->ctx_wa_list; + struct i915_wa *wa; + unsigned int i; + u32 *cs; + int ret; + + if (wal->count == 0) + return 0; + + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(rq, (wal->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(wal->count); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = wa->val; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); + if (ret) + return ret; + + return 0; +} + +static void +gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableKillLogic:bxt,skl,kbl */ + if (!IS_COFFEELAKE(i915)) + wa_write_or(wal, + GAM_ECOCHK, + ECOCHK_DIS_TLB); + + if (HAS_LLC(i915)) { + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl + * + * Must match Display Engine. See + * WaCompressedResourceDisplayNewHashMode. + */ + wa_write_or(wal, + MMCD_MISC_CTRL, + MMCD_PCLA | MMCD_HOTSPOT_EN); + } + + /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ + wa_write_or(wal, + GAM_ECOCHK, + BDW_DISABLE_HDC_INVALIDATION); +} + +static void +skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); + + /* WaDisableGafsUnitClkGating:skl */ + wa_write_or(wal, + GEN7_UCGCTL4, + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); + + /* WaInPlaceDecompressionHang:bxt */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) + wa_write_or(wal, + GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableGafsUnitClkGating:kbl */ + wa_write_or(wal, + GEN7_UCGCTL4, + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:kbl */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); +} + +static void +cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); + + /* WaDisableGafsUnitClkGating:cfl */ + wa_write_or(wal, + GEN7_UCGCTL4, + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:cfl */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + u32 mcr_slice_subslice_mask; + + /* + * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl + * L3Banks could be fused off in single slice scenario. If that is + * the case, we might need to program MCR select to a valid L3Bank + * by default, to make sure we correctly read certain registers + * later on (in the range 0xB100 - 0xB3FF). + * This might be incompatible with + * WaProgramMgsrForCorrectSliceSpecificMmioReads. + * Fortunately, this should not happen in production hardware, so + * we only assert that this is the case (instead of implementing + * something more complex that requires checking the range of every + * MMIO read). + */ + if (INTEL_GEN(i915) >= 10 && + is_power_of_2(sseu->slice_mask)) { + /* + * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches + * enabled subslice, no need to redirect MCR packet + */ + u32 slice = fls(sseu->slice_mask); + u32 fuse3 = + intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3); + u8 ss_mask = sseu->subslice_mask[slice]; + + u8 enabled_mask = (ss_mask | ss_mask >> + GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK; + u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK; + + /* + * Production silicon should have matched L3Bank and + * subslice enabled + */ + WARN_ON((enabled_mask & disabled_mask) != enabled_mask); + } + + if (INTEL_GEN(i915) >= 11) + mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | + GEN11_MCR_SUBSLICE_MASK; + else + mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | + GEN8_MCR_SUBSLICE_MASK; + /* + * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl + * Before any MMIO read into slice/subslice specific registers, MCR + * packet control register needs to be programmed to point to any + * enabled s/ss pair. Otherwise, incorrect values will be returned. + * This means each subsequent MMIO read will be forwarded to an + * specific s/ss combination, but this is OK since these registers + * are consistent across s/ss in almost all cases. In the rare + * occasions, such as INSTDONE, where this value is dependent + * on s/ss combo, the read should be done with read_subslice_reg. + */ + wa_write_masked_or(wal, + GEN8_MCR_SELECTOR, + mcr_slice_subslice_mask, + intel_calculate_mcr_s_ss_select(i915)); +} + +static void +cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + wa_init_mcr(i915, wal); + + /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ + if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) + wa_write_or(wal, + GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); + + /* WaInPlaceDecompressionHang:cnl */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + wa_init_mcr(i915, wal); + + /* WaInPlaceDecompressionHang:icl */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaModifyGamTlbPartitioning:icl */ + wa_write_masked_or(wal, + GEN11_GACB_PERF_CTRL, + GEN11_HASH_CTRL_MASK, + GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); + + /* Wa_1405766107:icl + * Formerly known as WaCL2SFHalfMaxAlloc + */ + wa_write_or(wal, + GEN11_LSN_UNSLCVC, + GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | + GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); + + /* Wa_220166154:icl + * Formerly known as WaDisCtxReload + */ + wa_write_or(wal, + GEN8_GAMW_ECO_DEV_RW_IA, + GAMW_ECO_DEV_CTX_RELOAD_DISABLE); + + /* Wa_1405779004:icl (pre-prod) */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + MSCUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl */ + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); + + /* Wa_1406838659:icl (pre-prod) */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) + wa_write_or(wal, + INF_UNIT_LEVEL_CLKGATE, + CGPSF_CLKGATE_DIS); + + /* Wa_1406463099:icl + * Formerly known as WaGamTlbPendError + */ + wa_write_or(wal, + GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_L3_COH_PIPE); +} + +static void +gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + if (IS_GEN(i915, 11)) + icl_gt_workarounds_init(i915, wal); + else if (IS_CANNONLAKE(i915)) + cnl_gt_workarounds_init(i915, wal); + else if (IS_COFFEELAKE(i915)) + cfl_gt_workarounds_init(i915, wal); + else if (IS_GEMINILAKE(i915)) + glk_gt_workarounds_init(i915, wal); + else if (IS_KABYLAKE(i915)) + kbl_gt_workarounds_init(i915, wal); + else if (IS_BROXTON(i915)) + bxt_gt_workarounds_init(i915, wal); + else if (IS_SKYLAKE(i915)) + skl_gt_workarounds_init(i915, wal); + else if (INTEL_GEN(i915) <= 8) + return; + else + MISSING_CASE(INTEL_GEN(i915)); +} + +void intel_gt_init_workarounds(struct drm_i915_private *i915) +{ + struct i915_wa_list *wal = &i915->gt_wa_list; + + wa_init_start(wal, "GT"); + gt_init_workarounds(i915, wal); + wa_init_finish(wal); +} + +static enum forcewake_domains +wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) +{ + enum forcewake_domains fw = 0; + struct i915_wa *wa; + unsigned int i; + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + fw |= intel_uncore_forcewake_for_reg(uncore, + wa->reg, + FW_REG_READ | + FW_REG_WRITE); + + return fw; +} + +static bool +wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) +{ + if ((cur ^ wa->val) & wa->read) { + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", + name, from, i915_mmio_reg_offset(wa->reg), + cur, cur & wa->read, + wa->val, wa->mask); + + return false; + } + + return true; +} + +static void +wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) +{ + enum forcewake_domains fw; + unsigned long flags; + struct i915_wa *wa; + unsigned int i; + + if (!wal->count) + return; + + fw = wal_get_fw_for_rmw(uncore, wal); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + wa_verify(wa, + intel_uncore_read_fw(uncore, wa->reg), + wal->name, "application"); + } + + intel_uncore_forcewake_put__locked(uncore, fw); + spin_unlock_irqrestore(&uncore->lock, flags); +} + +void intel_gt_apply_workarounds(struct drm_i915_private *i915) +{ + wa_list_apply(&i915->uncore, &i915->gt_wa_list); +} + +static bool wa_list_verify(struct intel_uncore *uncore, + const struct i915_wa_list *wal, + const char *from) +{ + struct i915_wa *wa; + unsigned int i; + bool ok = true; + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + ok &= wa_verify(wa, + intel_uncore_read(uncore, wa->reg), + wal->name, from); + + return ok; +} + +bool intel_gt_verify_workarounds(struct drm_i915_private *i915, + const char *from) +{ + return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from); +} + +static void +whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) +{ + struct i915_wa wa = { + .reg = reg + }; + + if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) + return; + + _wa_add(wal, &wa); +} + +static void gen9_whitelist_build(struct i915_wa_list *w) +{ + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ + whitelist_reg(w, GEN9_CTX_PREEMPT_REG); + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + whitelist_reg(w, GEN8_CS_CHICKEN1); + + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ + whitelist_reg(w, GEN8_HDC_CHICKEN1); +} + +static void skl_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); + + /* WaDisableLSQCROPERFforOCL:skl */ + whitelist_reg(w, GEN8_L3SQCREG4); +} + +static void bxt_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); +} + +static void kbl_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); + + /* WaDisableLSQCROPERFforOCL:kbl */ + whitelist_reg(w, GEN8_L3SQCREG4); +} + +static void glk_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); + + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ + whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); +} + +static void cfl_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); +} + +static void cnl_whitelist_build(struct i915_wa_list *w) +{ + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + whitelist_reg(w, GEN8_CS_CHICKEN1); +} + +static void icl_whitelist_build(struct i915_wa_list *w) +{ + /* WaAllowUMDToModifyHalfSliceChicken7:icl */ + whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); + + /* WaAllowUMDToModifySamplerMode:icl */ + whitelist_reg(w, GEN10_SAMPLER_MODE); +} + +void intel_engine_init_whitelist(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *w = &engine->whitelist; + + GEM_BUG_ON(engine->id != RCS0); + + wa_init_start(w, "whitelist"); + + if (IS_GEN(i915, 11)) + icl_whitelist_build(w); + else if (IS_CANNONLAKE(i915)) + cnl_whitelist_build(w); + else if (IS_COFFEELAKE(i915)) + cfl_whitelist_build(w); + else if (IS_GEMINILAKE(i915)) + glk_whitelist_build(w); + else if (IS_KABYLAKE(i915)) + kbl_whitelist_build(w); + else if (IS_BROXTON(i915)) + bxt_whitelist_build(w); + else if (IS_SKYLAKE(i915)) + skl_whitelist_build(w); + else if (INTEL_GEN(i915) <= 8) + return; + else + MISSING_CASE(INTEL_GEN(i915)); + + wa_init_finish(w); +} + +void intel_engine_apply_whitelist(struct intel_engine_cs *engine) +{ + const struct i915_wa_list *wal = &engine->whitelist; + struct intel_uncore *uncore = engine->uncore; + const u32 base = engine->mmio_base; + struct i915_wa *wa; + unsigned int i; + + if (!wal->count) + return; + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + intel_uncore_write(uncore, + RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(wa->reg)); + + /* And clear the rest just in case of garbage */ + for (; i < RING_MAX_NONPRIV_SLOTS; i++) + intel_uncore_write(uncore, + RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(RING_NOPID(base))); +} + +static void +rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + struct drm_i915_private *i915 = engine->i915; + + if (IS_GEN(i915, 11)) { + /* This is not an Wa. Enable for better image quality */ + wa_masked_en(wal, + _3D_CHICKEN3, + _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); + + /* WaPipelineFlushCoherentLines:icl */ + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES, + GEN8_LQSC_FLUSH_COHERENT_LINES); + + /* + * Wa_1405543622:icl + * Formerly known as WaGAPZPriorityScheme + */ + wa_write_or(wal, + GEN8_GARBCNTL, + GEN11_ARBITRATION_PRIO_ORDER_MASK); + + /* + * Wa_1604223664:icl + * Formerly known as WaL3BankAddressHashing + */ + wa_write_masked_or(wal, + GEN8_GARBCNTL, + GEN11_HASH_CTRL_EXCL_MASK, + GEN11_HASH_CTRL_EXCL_BIT0); + wa_write_masked_or(wal, + GEN11_GLBLINVL, + GEN11_BANK_HASH_ADDR_EXCL_MASK, + GEN11_BANK_HASH_ADDR_EXCL_BIT0); + + /* + * Wa_1405733216:icl + * Formerly known as WaDisableCleanEvicts + */ + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE, + GEN11_LQSC_CLEAN_EVICT_DISABLE); + + /* WaForwardProgressSoftReset:icl */ + wa_write_or(wal, + GEN10_SCRATCH_LNCF2, + PMFLUSHDONE_LNICRSDROP | + PMFLUSH_GAPL3UNBLOCK | + PMFLUSHDONE_LNEBLK); + + /* Wa_1406609255:icl (pre-prod) */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) + wa_write_or(wal, + GEN7_SARCHKMD, + GEN7_DISABLE_DEMAND_PREFETCH | + GEN7_DISABLE_SAMPLER_PREFETCH); + } + + if (IS_GEN_RANGE(i915, 9, 11)) { + /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */ + wa_masked_en(wal, + GEN7_FF_SLICE_CS_CHICKEN1, + GEN9_FFSC_PERCTX_PREEMPT_CTRL); + } + + if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { + /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ + wa_write_or(wal, + GEN8_GARBCNTL, + GEN9_GAPS_TSV_CREDIT_DISABLE); + } + + if (IS_BROXTON(i915)) { + /* WaDisablePooledEuLoadBalancingFix:bxt */ + wa_masked_en(wal, + FF_SLICE_CS_CHICKEN2, + GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); + } + + if (IS_GEN(i915, 9)) { + /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ + wa_masked_en(wal, + GEN9_CSFE_CHICKEN1_RCS, + GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); + + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ + wa_write_or(wal, + BDW_SCRATCH1, + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ + if (IS_GEN9_LP(i915)) + wa_write_masked_or(wal, + GEN8_L3SQCREG1, + L3_PRIO_CREDITS_MASK, + L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); + + /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ + wa_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES); + } +} + +static void +xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + struct drm_i915_private *i915 = engine->i915; + + /* WaKBLVECSSemaphoreWaitPoll:kbl */ + if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { + wa_write(wal, + RING_SEMA_WAIT_POLL(engine->mmio_base), + 1); + } +} + +static void +engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) + return; + + if (engine->id == RCS0) + rcs_engine_wa_init(engine, wal); + else + xcs_engine_wa_init(engine, wal); +} + +void intel_engine_init_workarounds(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->wa_list; + + if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8)) + return; + + wa_init_start(wal, engine->name); + engine_init_workarounds(engine, wal); + wa_init_finish(wal); +} + +void intel_engine_apply_workarounds(struct intel_engine_cs *engine) +{ + wa_list_apply(engine->uncore, &engine->wa_list); +} + +static struct i915_vma * +create_scratch(struct i915_address_space *vm, int count) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int size; + int err; + + size = round_up(count * sizeof(u32), PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, + i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); + if (err) + goto err_obj; + + return vma; + +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int +wa_list_srm(struct i915_request *rq, + const struct i915_wa_list *wal, + struct i915_vma *vma) +{ + const struct i915_wa *wa; + unsigned int i; + u32 srm, *cs; + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(rq->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * wal->count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + return 0; +} + +static int engine_wa_list_verify(struct intel_engine_cs *engine, + const struct i915_wa_list * const wal, + const char *from) +{ + const struct i915_wa *wa; + struct i915_request *rq; + struct i915_vma *vma; + unsigned int i; + u32 *results; + int err; + + if (!wal->count) + return 0; + + vma = create_scratch(&engine->i915->ggtt.vm, wal->count); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_request_alloc(engine, engine->kernel_context->gem_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma; + } + + err = wa_list_srm(rq, wal, vma); + if (err) + goto err_vma; + + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + err = -ETIME; + goto err_vma; + } + + results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(results)) { + err = PTR_ERR(results); + goto err_vma; + } + + err = 0; + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + if (!wa_verify(wa, results[i], wal->name, from)) + err = -ENXIO; + + i915_gem_object_unpin_map(vma->obj); + +err_vma: + i915_vma_unpin(vma); + i915_vma_put(vma); + return err; +} + +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from) +{ + return engine_wa_list_verify(engine, &engine->wa_list, from); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_workarounds.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h new file mode 100644 index 000000000000..3761a6ee58bb --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef _INTEL_WORKAROUNDS_H_ +#define _INTEL_WORKAROUNDS_H_ + +#include + +#include "intel_workarounds_types.h" + +struct drm_i915_private; +struct i915_request; +struct intel_engine_cs; + +static inline void intel_wa_list_free(struct i915_wa_list *wal) +{ + kfree(wal->list); + memset(wal, 0, sizeof(*wal)); +} + +void intel_engine_init_ctx_wa(struct intel_engine_cs *engine); +int intel_engine_emit_ctx_wa(struct i915_request *rq); + +void intel_gt_init_workarounds(struct drm_i915_private *i915); +void intel_gt_apply_workarounds(struct drm_i915_private *i915); +bool intel_gt_verify_workarounds(struct drm_i915_private *i915, + const char *from); + +void intel_engine_init_whitelist(struct intel_engine_cs *engine); +void intel_engine_apply_whitelist(struct intel_engine_cs *engine); + +void intel_engine_init_workarounds(struct intel_engine_cs *engine); +void intel_engine_apply_workarounds(struct intel_engine_cs *engine); +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h new file mode 100644 index 000000000000..42ac1fb99572 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef __INTEL_WORKAROUNDS_TYPES_H__ +#define __INTEL_WORKAROUNDS_TYPES_H__ + +#include + +#include "i915_reg.h" + +struct i915_wa { + i915_reg_t reg; + u32 mask; + u32 val; + u32 read; +}; + +struct i915_wa_list { + const char *name; + struct i915_wa *list; + unsigned int count; + unsigned int wa_count; +}; + +#endif /* __INTEL_WORKAROUNDS_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c new file mode 100644 index 000000000000..414afd2f27fe --- /dev/null +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -0,0 +1,325 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" +#include "intel_context.h" + +#include "mock_engine.h" +#include "selftests/mock_request.h" + +struct mock_ring { + struct intel_ring base; + struct i915_timeline timeline; +}; + +static void mock_timeline_pin(struct i915_timeline *tl) +{ + tl->pin_count++; +} + +static void mock_timeline_unpin(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + tl->pin_count--; +} + +static struct intel_ring *mock_ring(struct intel_engine_cs *engine) +{ + const unsigned long sz = PAGE_SIZE / 2; + struct mock_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + if (i915_timeline_init(engine->i915, &ring->timeline, NULL)) { + kfree(ring); + return NULL; + } + + kref_init(&ring->base.ref); + ring->base.size = sz; + ring->base.effective_size = sz; + ring->base.vaddr = (void *)(ring + 1); + ring->base.timeline = &ring->timeline; + + INIT_LIST_HEAD(&ring->base.request_list); + intel_ring_update_space(&ring->base); + + return &ring->base; +} + +static void mock_ring_free(struct intel_ring *base) +{ + struct mock_ring *ring = container_of(base, typeof(*ring), base); + + i915_timeline_fini(&ring->timeline); + kfree(ring); +} + +static struct i915_request *first_request(struct mock_engine *engine) +{ + return list_first_entry_or_null(&engine->hw_queue, + struct i915_request, + mock.link); +} + +static void advance(struct i915_request *request) +{ + list_del_init(&request->mock.link); + i915_request_mark_complete(request); + GEM_BUG_ON(!i915_request_completed(request)); + + intel_engine_queue_breadcrumbs(request->engine); +} + +static void hw_delay_complete(struct timer_list *t) +{ + struct mock_engine *engine = from_timer(engine, t, hw_delay); + struct i915_request *request; + unsigned long flags; + + spin_lock_irqsave(&engine->hw_lock, flags); + + /* Timer fired, first request is complete */ + request = first_request(engine); + if (request) + advance(request); + + /* + * Also immediately signal any subsequent 0-delay requests, but + * requeue the timer for the next delayed request. + */ + while ((request = first_request(engine))) { + if (request->mock.delay) { + mod_timer(&engine->hw_delay, + jiffies + request->mock.delay); + break; + } + + advance(request); + } + + spin_unlock_irqrestore(&engine->hw_lock, flags); +} + +static void mock_context_unpin(struct intel_context *ce) +{ + mock_timeline_unpin(ce->ring->timeline); +} + +static void mock_context_destroy(struct kref *ref) +{ + struct intel_context *ce = container_of(ref, typeof(*ce), ref); + + GEM_BUG_ON(intel_context_is_pinned(ce)); + + if (ce->ring) + mock_ring_free(ce->ring); + + intel_context_free(ce); +} + +static int mock_context_pin(struct intel_context *ce) +{ + if (!ce->ring) { + ce->ring = mock_ring(ce->engine); + if (!ce->ring) + return -ENOMEM; + } + + mock_timeline_pin(ce->ring->timeline); + return 0; +} + +static const struct intel_context_ops mock_context_ops = { + .pin = mock_context_pin, + .unpin = mock_context_unpin, + + .destroy = mock_context_destroy, +}; + +static int mock_request_alloc(struct i915_request *request) +{ + INIT_LIST_HEAD(&request->mock.link); + request->mock.delay = 0; + + return 0; +} + +static int mock_emit_flush(struct i915_request *request, + unsigned int flags) +{ + return 0; +} + +static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs) +{ + return cs; +} + +static void mock_submit_request(struct i915_request *request) +{ + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + unsigned long flags; + + i915_request_submit(request); + + spin_lock_irqsave(&engine->hw_lock, flags); + list_add_tail(&request->mock.link, &engine->hw_queue); + if (list_is_first(&request->mock.link, &engine->hw_queue)) { + if (request->mock.delay) + mod_timer(&engine->hw_delay, + jiffies + request->mock.delay); + else + advance(request); + } + spin_unlock_irqrestore(&engine->hw_lock, flags); +} + +static void mock_reset_prepare(struct intel_engine_cs *engine) +{ +} + +static void mock_reset(struct intel_engine_cs *engine, bool stalled) +{ + GEM_BUG_ON(stalled); +} + +static void mock_reset_finish(struct intel_engine_cs *engine) +{ +} + +static void mock_cancel_requests(struct intel_engine_cs *engine) +{ + struct i915_request *request; + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); + + /* Mark all submitted requests as skipped. */ + list_for_each_entry(request, &engine->timeline.requests, sched.link) { + if (!i915_request_signaled(request)) + dma_fence_set_error(&request->fence, -EIO); + + i915_request_mark_complete(request); + } + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name, + int id) +{ + struct mock_engine *engine; + + GEM_BUG_ON(id >= I915_NUM_ENGINES); + + engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); + if (!engine) + return NULL; + + /* minimal engine setup for requests */ + engine->base.i915 = i915; + snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); + engine->base.id = id; + engine->base.mask = BIT(id); + engine->base.status_page.addr = (void *)(engine + 1); + + engine->base.cops = &mock_context_ops; + engine->base.request_alloc = mock_request_alloc; + engine->base.emit_flush = mock_emit_flush; + engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; + engine->base.submit_request = mock_submit_request; + + engine->base.reset.prepare = mock_reset_prepare; + engine->base.reset.reset = mock_reset; + engine->base.reset.finish = mock_reset_finish; + engine->base.cancel_requests = mock_cancel_requests; + + if (i915_timeline_init(i915, &engine->base.timeline, NULL)) + goto err_free; + i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); + + intel_engine_init_breadcrumbs(&engine->base); + + /* fake hw queue */ + spin_lock_init(&engine->hw_lock); + timer_setup(&engine->hw_delay, hw_delay_complete, 0); + INIT_LIST_HEAD(&engine->hw_queue); + + engine->base.kernel_context = + intel_context_pin(i915->kernel_context, &engine->base); + if (IS_ERR(engine->base.kernel_context)) + goto err_breadcrumbs; + + return &engine->base; + +err_breadcrumbs: + intel_engine_fini_breadcrumbs(&engine->base); + i915_timeline_fini(&engine->base.timeline); +err_free: + kfree(engine); + return NULL; +} + +void mock_engine_flush(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + struct i915_request *request, *rn; + + del_timer_sync(&mock->hw_delay); + + spin_lock_irq(&mock->hw_lock); + list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link) + advance(request); + spin_unlock_irq(&mock->hw_lock); +} + +void mock_engine_reset(struct intel_engine_cs *engine) +{ +} + +void mock_engine_free(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + struct intel_context *ce; + + GEM_BUG_ON(timer_pending(&mock->hw_delay)); + + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); + + intel_context_unpin(engine->kernel_context); + + intel_engine_fini_breadcrumbs(engine); + i915_timeline_fini(&engine->timeline); + + kfree(engine); +} diff --git a/drivers/gpu/drm/i915/gt/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h new file mode 100644 index 000000000000..44b35a85e9d1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/mock_engine.h @@ -0,0 +1,49 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_ENGINE_H__ +#define __MOCK_ENGINE_H__ + +#include +#include +#include + +#include "gt/intel_engine.h" + +struct mock_engine { + struct intel_engine_cs base; + + spinlock_t hw_lock; + struct list_head hw_queue; + struct timer_list hw_delay; +}; + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name, + int id); +void mock_engine_flush(struct intel_engine_cs *engine); +void mock_engine_reset(struct intel_engine_cs *engine); +void mock_engine_free(struct intel_engine_cs *engine); + +#endif /* !__MOCK_ENGINE_H__ */ diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c new file mode 100644 index 000000000000..cfaa6b296835 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" + +static int intel_mmio_bases_check(void *arg) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { + const struct engine_info *info = &intel_engines[i]; + char name[INTEL_ENGINE_CS_MAX_NAME]; + u8 prev = U8_MAX; + + __sprint_engine_name(name, info); + + for (j = 0; j < MAX_MMIO_BASES; j++) { + u8 gen = info->mmio_bases[j].gen; + u32 base = info->mmio_bases[j].base; + + if (gen >= prev) { + pr_err("%s: %s: mmio base for gen %x " + "is before the one for gen %x\n", + __func__, name, prev, gen); + return -EINVAL; + } + + if (gen == 0) + break; + + if (!base) { + pr_err("%s: %s: invalid mmio base (%x) " + "for gen %x at entry %u\n", + __func__, name, base, gen, j); + return -EINVAL; + } + + prev = gen; + } + + pr_info("%s: min gen supported for %s = %d\n", + __func__, name, prev); + } + + return 0; +} + +int intel_engine_cs_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_mmio_bases_check), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c new file mode 100644 index 000000000000..acd33aa46068 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -0,0 +1,1919 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_selftest.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_wedge_me.h" + +#include "selftests/mock_context.h" +#include "selftests/mock_drm.h" + +#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ + +struct hang { + struct drm_i915_private *i915; + struct drm_i915_gem_object *hws; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + u32 *seqno; + u32 *batch; +}; + +static int hang_init(struct hang *h, struct drm_i915_private *i915) +{ + void *vaddr; + int err; + + memset(h, 0, sizeof(*h)); + h->i915 = i915; + + h->ctx = kernel_context(i915); + if (IS_ERR(h->ctx)) + return PTR_ERR(h->ctx); + + GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx)); + + h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->hws)) { + err = PTR_ERR(h->hws); + goto err_ctx; + } + + h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->obj)) { + err = PTR_ERR(h->obj); + goto err_hws; + } + + i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC); + vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + h->seqno = memset(vaddr, 0xff, PAGE_SIZE); + + vaddr = i915_gem_object_pin_map(h->obj, + i915_coherent_map_type(i915)); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_unpin_hws; + } + h->batch = vaddr; + + return 0; + +err_unpin_hws: + i915_gem_object_unpin_map(h->hws); +err_obj: + i915_gem_object_put(h->obj); +err_hws: + i915_gem_object_put(h->hws); +err_ctx: + kernel_context_close(h->ctx); + return err; +} + +static u64 hws_address(const struct i915_vma *hws, + const struct i915_request *rq) +{ + return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); +} + +static int move_to_active(struct i915_vma *vma, + struct i915_request *rq, + unsigned int flags) +{ + int err; + + err = i915_vma_move_to_active(vma, rq, flags); + if (err) + return err; + + if (!i915_gem_object_has_active_reference(vma->obj)) { + i915_gem_object_get(vma->obj); + i915_gem_object_set_active_reference(vma->obj); + } + + return 0; +} + +static struct i915_request * +hang_create_request(struct hang *h, struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = h->i915; + struct i915_address_space *vm = + h->ctx->ppgtt ? &h->ctx->ppgtt->vm : &i915->ggtt.vm; + struct i915_request *rq = NULL; + struct i915_vma *hws, *vma; + unsigned int flags; + u32 *batch; + int err; + + if (i915_gem_object_is_active(h->obj)) { + struct drm_i915_gem_object *obj; + void *vaddr; + + obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vaddr = i915_gem_object_pin_map(obj, + i915_coherent_map_type(h->i915)); + if (IS_ERR(vaddr)) { + i915_gem_object_put(obj); + return ERR_CAST(vaddr); + } + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + h->obj = obj; + h->batch = vaddr; + } + + vma = i915_vma_instance(h->obj, vm, NULL); + if (IS_ERR(vma)) + return ERR_CAST(vma); + + hws = i915_vma_instance(h->hws, vm, NULL); + if (IS_ERR(hws)) + return ERR_CAST(hws); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return ERR_PTR(err); + + err = i915_vma_pin(hws, 0, 0, PIN_USER); + if (err) + goto unpin_vma; + + rq = i915_request_alloc(engine, h->ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto unpin_hws; + } + + err = move_to_active(vma, rq, 0); + if (err) + goto cancel_rq; + + err = move_to_active(hws, rq, 0); + if (err) + goto cancel_rq; + + batch = h->batch; + if (INTEL_GEN(i915) >= 8) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = upper_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *batch++ = lower_32_bits(vma->node.start); + *batch++ = upper_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8; + *batch++ = lower_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6; + *batch++ = lower_32_bits(vma->node.start); + } else { + *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6; + *batch++ = lower_32_bits(vma->node.start); + } + *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + i915_gem_chipset_flush(h->i915); + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto cancel_rq; + } + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); + +cancel_rq: + if (err) { + i915_request_skip(rq, err); + i915_request_add(rq); + } +unpin_hws: + i915_vma_unpin(hws); +unpin_vma: + i915_vma_unpin(vma); + return err ? ERR_PTR(err) : rq; +} + +static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) +{ + return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); +} + +static void hang_fini(struct hang *h) +{ + *h->batch = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(h->i915); + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + i915_gem_object_unpin_map(h->hws); + i915_gem_object_put(h->hws); + + kernel_context_close(h->ctx); + + igt_flush_test(h->i915, I915_WAIT_LOCKED); +} + +static bool wait_until_running(struct hang *h, struct i915_request *rq) +{ + return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 1000)); +} + +static int igt_hang_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_request *rq; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Basic check that we can execute our hanging batch */ + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + struct igt_wedge_me w; + long timeout; + + if (!intel_engine_can_store_dword(engine)) + continue; + + rq = hang_create_request(&h, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + pr_err("Failed to create request for %s, err=%d\n", + engine->name, err); + goto fini; + } + + i915_request_get(rq); + + *h.batch = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + + i915_request_add(rq); + + timeout = 0; + igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) + timeout = i915_request_wait(rq, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (i915_reset_failed(i915)) + timeout = -EIO; + + i915_request_put(rq); + + if (timeout < 0) { + err = timeout; + pr_err("Wait for request failed on %s, err=%d\n", + engine->name, err); + goto fini; + } + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_global_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + unsigned int reset_count; + int err = 0; + + /* Check that we can issue a global GPU reset */ + + igt_global_reset_lock(i915); + + reset_count = i915_reset_count(&i915->gpu_error); + + i915_reset(i915, ALL_ENGINES, NULL); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + } + + igt_global_reset_unlock(i915); + + if (i915_reset_failed(i915)) + err = -EIO; + + return err; +} + +static int igt_wedged_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + intel_wakeref_t wakeref; + + /* Check that we can recover a wedged device with a GPU reset */ + + igt_global_reset_lock(i915); + wakeref = intel_runtime_pm_get(i915); + + i915_gem_set_wedged(i915); + + GEM_BUG_ON(!i915_reset_failed(i915)); + i915_reset(i915, ALL_ENGINES, NULL); + + intel_runtime_pm_put(i915, wakeref); + igt_global_reset_unlock(i915); + + return i915_reset_failed(i915) ? -EIO : 0; +} + +static bool wait_for_idle(struct intel_engine_cs *engine) +{ + return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; +} + +static int igt_reset_nop(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + unsigned int reset_count, count; + enum intel_engine_id id; + intel_wakeref_t wakeref; + struct drm_file *file; + IGT_TIMEOUT(end_time); + int err = 0; + + /* Check that we can reset during non-user portions of requests */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + ctx = live_context(i915, file); + mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + + i915_gem_context_clear_bannable(ctx); + wakeref = intel_runtime_pm_get(i915); + reset_count = i915_reset_count(&i915->gpu_error); + count = 0; + do { + mutex_lock(&i915->drm.struct_mutex); + for_each_engine(engine, i915, id) { + int i; + + for (i = 0; i < 16; i++) { + struct i915_request *rq; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + } + } + mutex_unlock(&i915->drm.struct_mutex); + + igt_global_reset_lock(i915); + i915_reset(i915, ALL_ENGINES, NULL); + igt_global_reset_unlock(i915); + if (i915_reset_failed(i915)) { + err = -EIO; + break; + } + + if (i915_reset_count(&i915->gpu_error) != + reset_count + ++count) { + pr_err("Full GPU reset not recorded!\n"); + err = -EINVAL; + break; + } + + if (!i915_reset_flush(i915)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("%s failed to idle after reset\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } + + err = igt_flush_test(i915, 0); + if (err) + break; + } while (time_before(jiffies, end_time)); + pr_info("%s: %d resets\n", __func__, count); + + mutex_lock(&i915->drm.struct_mutex); + err = igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + + intel_runtime_pm_put(i915, wakeref); + +out: + mock_file_free(i915, file); + if (i915_reset_failed(i915)) + err = -EIO; + return err; +} + +static int igt_reset_nop_engine(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id id; + intel_wakeref_t wakeref; + struct drm_file *file; + int err = 0; + + /* Check that we can engine-reset during non-user portions */ + + if (!intel_has_reset_engine(i915)) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + ctx = live_context(i915, file); + mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + + i915_gem_context_clear_bannable(ctx); + wakeref = intel_runtime_pm_get(i915); + for_each_engine(engine, i915, id) { + unsigned int reset_count, reset_engine_count; + unsigned int count; + IGT_TIMEOUT(end_time); + + reset_count = i915_reset_count(&i915->gpu_error); + reset_engine_count = i915_reset_engine_count(&i915->gpu_error, + engine); + count = 0; + + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + do { + int i; + + if (!wait_for_idle(engine)) { + pr_err("%s failed to idle before reset\n", + engine->name); + err = -EIO; + break; + } + + mutex_lock(&i915->drm.struct_mutex); + for (i = 0; i < 16; i++) { + struct i915_request *rq; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + } + mutex_unlock(&i915->drm.struct_mutex); + + err = i915_reset_engine(engine, NULL); + if (err) { + pr_err("i915_reset_engine failed\n"); + break; + } + + if (i915_reset_count(&i915->gpu_error) != reset_count) { + pr_err("Full GPU reset recorded! (engine reset expected)\n"); + err = -EINVAL; + break; + } + + if (i915_reset_engine_count(&i915->gpu_error, engine) != + reset_engine_count + ++count) { + pr_err("%s engine reset not recorded!\n", + engine->name); + err = -EINVAL; + break; + } + + if (!i915_reset_flush(i915)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("%s failed to idle after reset\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } + } while (time_before(jiffies, end_time)); + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + pr_info("%s(%s): %d resets\n", __func__, engine->name, count); + + if (err) + break; + + err = igt_flush_test(i915, 0); + if (err) + break; + } + + mutex_lock(&i915->drm.struct_mutex); + err = igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + + intel_runtime_pm_put(i915, wakeref); +out: + mock_file_free(i915, file); + if (i915_reset_failed(i915)) + err = -EIO; + return err; +} + +static int __igt_reset_engine(struct drm_i915_private *i915, bool active) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err = 0; + + /* Check that we can issue an engine reset on an idle engine (no-op) */ + + if (!intel_has_reset_engine(i915)) + return 0; + + if (active) { + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + return err; + } + + for_each_engine(engine, i915, id) { + unsigned int reset_count, reset_engine_count; + IGT_TIMEOUT(end_time); + + if (active && !intel_engine_can_store_dword(engine)) + continue; + + if (!wait_for_idle(engine)) { + pr_err("%s failed to idle before reset\n", + engine->name); + err = -EIO; + break; + } + + reset_count = i915_reset_count(&i915->gpu_error); + reset_engine_count = i915_reset_engine_count(&i915->gpu_error, + engine); + + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + do { + if (active) { + struct i915_request *rq; + + mutex_lock(&i915->drm.struct_mutex); + rq = hang_create_request(&h, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + mutex_unlock(&i915->drm.struct_mutex); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + mutex_unlock(&i915->drm.struct_mutex); + + if (!wait_until_running(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %llx, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + i915_request_put(rq); + err = -EIO; + break; + } + + i915_request_put(rq); + } + + err = i915_reset_engine(engine, NULL); + if (err) { + pr_err("i915_reset_engine failed\n"); + break; + } + + if (i915_reset_count(&i915->gpu_error) != reset_count) { + pr_err("Full GPU reset recorded! (engine reset expected)\n"); + err = -EINVAL; + break; + } + + if (i915_reset_engine_count(&i915->gpu_error, engine) != + ++reset_engine_count) { + pr_err("%s engine reset not recorded!\n", + engine->name); + err = -EINVAL; + break; + } + + if (!i915_reset_flush(i915)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("%s failed to idle after reset\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } + } while (time_before(jiffies, end_time)); + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + + if (err) + break; + + err = igt_flush_test(i915, 0); + if (err) + break; + } + + if (i915_reset_failed(i915)) + err = -EIO; + + if (active) { + mutex_lock(&i915->drm.struct_mutex); + hang_fini(&h); + mutex_unlock(&i915->drm.struct_mutex); + } + + return err; +} + +static int igt_reset_idle_engine(void *arg) +{ + return __igt_reset_engine(arg, false); +} + +static int igt_reset_active_engine(void *arg) +{ + return __igt_reset_engine(arg, true); +} + +struct active_engine { + struct task_struct *task; + struct intel_engine_cs *engine; + unsigned long resets; + unsigned int flags; +}; + +#define TEST_ACTIVE BIT(0) +#define TEST_OTHERS BIT(1) +#define TEST_SELF BIT(2) +#define TEST_PRIORITY BIT(3) + +static int active_request_put(struct i915_request *rq) +{ + int err = 0; + + if (!rq) + return 0; + + if (i915_request_wait(rq, 0, 5 * HZ) < 0) { + GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", + rq->engine->name, + rq->fence.context, + rq->fence.seqno); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(rq->i915); + err = -EIO; + } + + i915_request_put(rq); + + return err; +} + +static int active_engine(void *data) +{ + I915_RND_STATE(prng); + struct active_engine *arg = data; + struct intel_engine_cs *engine = arg->engine; + struct i915_request *rq[8] = {}; + struct i915_gem_context *ctx[ARRAY_SIZE(rq)]; + struct drm_file *file; + unsigned long count = 0; + int err = 0; + + file = mock_file(engine->i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + for (count = 0; count < ARRAY_SIZE(ctx); count++) { + mutex_lock(&engine->i915->drm.struct_mutex); + ctx[count] = live_context(engine->i915, file); + mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(ctx[count])) { + err = PTR_ERR(ctx[count]); + while (--count) + i915_gem_context_put(ctx[count]); + goto err_file; + } + } + + while (!kthread_should_stop()) { + unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); + struct i915_request *old = rq[idx]; + struct i915_request *new; + + mutex_lock(&engine->i915->drm.struct_mutex); + new = i915_request_alloc(engine, ctx[idx]); + if (IS_ERR(new)) { + mutex_unlock(&engine->i915->drm.struct_mutex); + err = PTR_ERR(new); + break; + } + + if (arg->flags & TEST_PRIORITY) + ctx[idx]->sched.priority = + i915_prandom_u32_max_state(512, &prng); + + rq[idx] = i915_request_get(new); + i915_request_add(new); + mutex_unlock(&engine->i915->drm.struct_mutex); + + err = active_request_put(old); + if (err) + break; + + cond_resched(); + } + + for (count = 0; count < ARRAY_SIZE(rq); count++) { + int err__ = active_request_put(rq[count]); + + /* Keep the first error */ + if (!err) + err = err__; + } + +err_file: + mock_file_free(engine->i915, file); + return err; +} + +static int __igt_reset_engines(struct drm_i915_private *i915, + const char *test_name, + unsigned int flags) +{ + struct intel_engine_cs *engine, *other; + enum intel_engine_id id, tmp; + struct hang h; + int err = 0; + + /* Check that issuing a reset on one engine does not interfere + * with any other engine. + */ + + if (!intel_has_reset_engine(i915)) + return 0; + + if (flags & TEST_ACTIVE) { + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + return err; + + if (flags & TEST_PRIORITY) + h.ctx->sched.priority = 1024; + } + + for_each_engine(engine, i915, id) { + struct active_engine threads[I915_NUM_ENGINES] = {}; + unsigned long global = i915_reset_count(&i915->gpu_error); + unsigned long count = 0, reported; + IGT_TIMEOUT(end_time); + + if (flags & TEST_ACTIVE && + !intel_engine_can_store_dword(engine)) + continue; + + if (!wait_for_idle(engine)) { + pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", + engine->name, test_name); + err = -EIO; + break; + } + + memset(threads, 0, sizeof(threads)); + for_each_engine(other, i915, tmp) { + struct task_struct *tsk; + + threads[tmp].resets = + i915_reset_engine_count(&i915->gpu_error, + other); + + if (!(flags & TEST_OTHERS)) + continue; + + if (other == engine && !(flags & TEST_SELF)) + continue; + + threads[tmp].engine = other; + threads[tmp].flags = flags; + + tsk = kthread_run(active_engine, &threads[tmp], + "igt/%s", other->name); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + goto unwind; + } + + threads[tmp].task = tsk; + get_task_struct(tsk); + } + + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + do { + struct i915_request *rq = NULL; + + if (flags & TEST_ACTIVE) { + mutex_lock(&i915->drm.struct_mutex); + rq = hang_create_request(&h, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + mutex_unlock(&i915->drm.struct_mutex); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + mutex_unlock(&i915->drm.struct_mutex); + + if (!wait_until_running(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %llx, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + i915_request_put(rq); + err = -EIO; + break; + } + } + + err = i915_reset_engine(engine, NULL); + if (err) { + pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", + engine->name, test_name, err); + break; + } + + count++; + + if (rq) { + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("i915_reset_engine(%s:%s):" + " failed to complete request after reset\n", + engine->name, test_name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + i915_request_put(rq); + + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + break; + } + + i915_request_put(rq); + } + + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("i915_reset_engine(%s:%s):" + " failed to idle after reset\n", + engine->name, test_name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } + } while (time_before(jiffies, end_time)); + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + pr_info("i915_reset_engine(%s:%s): %lu resets\n", + engine->name, test_name, count); + + reported = i915_reset_engine_count(&i915->gpu_error, engine); + reported -= threads[engine->id].resets; + if (reported != count) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", + engine->name, test_name, count, reported); + if (!err) + err = -EINVAL; + } + +unwind: + for_each_engine(other, i915, tmp) { + int ret; + + if (!threads[tmp].task) + continue; + + ret = kthread_stop(threads[tmp].task); + if (ret) { + pr_err("kthread for other engine %s failed, err=%d\n", + other->name, ret); + if (!err) + err = ret; + } + put_task_struct(threads[tmp].task); + + if (other != engine && + threads[tmp].resets != + i915_reset_engine_count(&i915->gpu_error, other)) { + pr_err("Innocent engine %s was reset (count=%ld)\n", + other->name, + i915_reset_engine_count(&i915->gpu_error, + other) - + threads[tmp].resets); + if (!err) + err = -EINVAL; + } + } + + if (global != i915_reset_count(&i915->gpu_error)) { + pr_err("Global reset (count=%ld)!\n", + i915_reset_count(&i915->gpu_error) - global); + if (!err) + err = -EINVAL; + } + + if (err) + break; + + err = igt_flush_test(i915, 0); + if (err) + break; + } + + if (i915_reset_failed(i915)) + err = -EIO; + + if (flags & TEST_ACTIVE) { + mutex_lock(&i915->drm.struct_mutex); + hang_fini(&h); + mutex_unlock(&i915->drm.struct_mutex); + } + + return err; +} + +static int igt_reset_engines(void *arg) +{ + static const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "idle", 0 }, + { "active", TEST_ACTIVE }, + { "others-idle", TEST_OTHERS }, + { "others-active", TEST_OTHERS | TEST_ACTIVE }, + { + "others-priority", + TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY + }, + { + "self-priority", + TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, + }, + { } + }; + struct drm_i915_private *i915 = arg; + typeof(*phases) *p; + int err; + + for (p = phases; p->name; p++) { + if (p->flags & TEST_PRIORITY) { + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + continue; + } + + err = __igt_reset_engines(arg, p->name, p->flags); + if (err) + return err; + } + + return 0; +} + +static u32 fake_hangcheck(struct drm_i915_private *i915, + intel_engine_mask_t mask) +{ + u32 count = i915_reset_count(&i915->gpu_error); + + i915_reset(i915, mask, NULL); + + return count; +} + +static int igt_reset_wait(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_request *rq; + unsigned int reset_count; + struct hang h; + long timeout; + int err; + + if (!intel_engine_can_store_dword(i915->engine[RCS0])) + return 0; + + /* Check that we detect a stuck waiter and issue a reset */ + + igt_global_reset_lock(i915); + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + rq = hang_create_request(&h, i915->engine[RCS0]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (!wait_until_running(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %llx, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); + + i915_gem_set_wedged(i915); + + err = -EIO; + goto out_rq; + } + + reset_count = fake_hangcheck(i915, ALL_ENGINES); + + timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); + if (timeout < 0) { + pr_err("i915_request_wait failed on a stuck request: err=%ld\n", + timeout); + err = timeout; + goto out_rq; + } + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + goto out_rq; + } + +out_rq: + i915_request_put(rq); +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + igt_global_reset_unlock(i915); + + if (i915_reset_failed(i915)) + return -EIO; + + return err; +} + +struct evict_vma { + struct completion completion; + struct i915_vma *vma; +}; + +static int evict_vma(void *data) +{ + struct evict_vma *arg = data; + struct i915_address_space *vm = arg->vma->vm; + struct drm_i915_private *i915 = vm->i915; + struct drm_mm_node evict = arg->vma->node; + int err; + + complete(&arg->completion); + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_evict_for_node(vm, &evict, 0); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int evict_fence(void *data) +{ + struct evict_vma *arg = data; + struct drm_i915_private *i915 = arg->vma->vm->i915; + int err; + + complete(&arg->completion); + + mutex_lock(&i915->drm.struct_mutex); + + /* Mark the fence register as dirty to force the mmio update. */ + err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); + if (err) { + pr_err("Invalid Y-tiling settings; err:%d\n", err); + goto out_unlock; + } + + err = i915_vma_pin_fence(arg->vma); + if (err) { + pr_err("Unable to pin Y-tiled fence; err:%d\n", err); + goto out_unlock; + } + + i915_vma_unpin_fence(arg->vma); + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int __igt_reset_evict_vma(struct drm_i915_private *i915, + struct i915_address_space *vm, + int (*fn)(void *), + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + struct task_struct *tsk = NULL; + struct i915_request *rq; + struct evict_vma arg; + struct hang h; + int err; + + if (!intel_engine_can_store_dword(i915->engine[RCS0])) + return 0; + + /* Check that we can recover an unbind stuck on a hanging request */ + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + obj = i915_gem_object_create_internal(i915, SZ_1M); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto fini; + } + + if (flags & EXEC_OBJECT_NEEDS_FENCE) { + err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512); + if (err) { + pr_err("Invalid X-tiling settings; err:%d\n", err); + goto out_obj; + } + } + + arg.vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(arg.vma)) { + err = PTR_ERR(arg.vma); + goto out_obj; + } + + rq = hang_create_request(&h, i915->engine[RCS0]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_obj; + } + + err = i915_vma_pin(arg.vma, 0, 0, + i915_vma_is_ggtt(arg.vma) ? + PIN_GLOBAL | PIN_MAPPABLE : + PIN_USER); + if (err) { + i915_request_add(rq); + goto out_obj; + } + + if (flags & EXEC_OBJECT_NEEDS_FENCE) { + err = i915_vma_pin_fence(arg.vma); + if (err) { + pr_err("Unable to pin X-tiled fence; err:%d\n", err); + i915_vma_unpin(arg.vma); + i915_request_add(rq); + goto out_obj; + } + } + + err = i915_vma_move_to_active(arg.vma, rq, flags); + + if (flags & EXEC_OBJECT_NEEDS_FENCE) + i915_vma_unpin_fence(arg.vma); + i915_vma_unpin(arg.vma); + + i915_request_get(rq); + i915_request_add(rq); + if (err) + goto out_rq; + + mutex_unlock(&i915->drm.struct_mutex); + + if (!wait_until_running(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %llx, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); + + i915_gem_set_wedged(i915); + goto out_reset; + } + + init_completion(&arg.completion); + + tsk = kthread_run(fn, &arg, "igt/evict_vma"); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + tsk = NULL; + goto out_reset; + } + get_task_struct(tsk); + + wait_for_completion(&arg.completion); + + if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("igt/evict_vma kthread did not wait\n"); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); + + i915_gem_set_wedged(i915); + goto out_reset; + } + +out_reset: + igt_global_reset_lock(i915); + fake_hangcheck(rq->i915, rq->engine->mask); + igt_global_reset_unlock(i915); + + if (tsk) { + struct igt_wedge_me w; + + /* The reset, even indirectly, should take less than 10ms. */ + igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) + err = kthread_stop(tsk); + + put_task_struct(tsk); + } + + mutex_lock(&i915->drm.struct_mutex); +out_rq: + i915_request_put(rq); +out_obj: + i915_gem_object_put(obj); +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_reset_failed(i915)) + return -EIO; + + return err; +} + +static int igt_reset_evict_ggtt(void *arg) +{ + struct drm_i915_private *i915 = arg; + + return __igt_reset_evict_vma(i915, &i915->ggtt.vm, + evict_vma, EXEC_OBJECT_WRITE); +} + +static int igt_reset_evict_ppgtt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx; + struct drm_file *file; + int err; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + ctx = live_context(i915, file); + mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + + err = 0; + if (ctx->ppgtt) /* aliasing == global gtt locking, covered above */ + err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm, + evict_vma, EXEC_OBJECT_WRITE); + +out: + mock_file_free(i915, file); + return err; +} + +static int igt_reset_evict_fence(void *arg) +{ + struct drm_i915_private *i915 = arg; + + return __igt_reset_evict_vma(i915, &i915->ggtt.vm, + evict_fence, EXEC_OBJECT_NEEDS_FENCE); +} + +static int wait_for_others(struct drm_i915_private *i915, + struct intel_engine_cs *exclude) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (engine == exclude) + continue; + + if (!wait_for_idle(engine)) + return -EIO; + } + + return 0; +} + +static int igt_reset_queue(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Check that we replay pending requests following a hang */ + + igt_global_reset_lock(i915); + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + struct i915_request *prev; + IGT_TIMEOUT(end_time); + unsigned int count; + + if (!intel_engine_can_store_dword(engine)) + continue; + + prev = hang_create_request(&h, engine); + if (IS_ERR(prev)) { + err = PTR_ERR(prev); + goto fini; + } + + i915_request_get(prev); + i915_request_add(prev); + + count = 0; + do { + struct i915_request *rq; + unsigned int reset_count; + + rq = hang_create_request(&h, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_request_get(rq); + i915_request_add(rq); + + /* + * XXX We don't handle resetting the kernel context + * very well. If we trigger a device reset twice in + * quick succession while the kernel context is + * executing, we may end up skipping the breadcrumb. + * This is really only a problem for the selftest as + * normally there is a large interlude between resets + * (hangcheck), or we focus on resetting just one + * engine and so avoid repeatedly resetting innocents. + */ + err = wait_for_others(i915, engine); + if (err) { + pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", + __func__, engine->name); + i915_request_put(rq); + i915_request_put(prev); + + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + goto fini; + } + + if (!wait_until_running(&h, prev)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s(%s): Failed to start request %llx, at %x\n", + __func__, engine->name, + prev->fence.seqno, hws_seqno(&h, prev)); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + i915_request_put(rq); + i915_request_put(prev); + + i915_gem_set_wedged(i915); + + err = -EIO; + goto fini; + } + + reset_count = fake_hangcheck(i915, BIT(id)); + + if (prev->fence.error != -EIO) { + pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", + prev->fence.error); + i915_request_put(rq); + i915_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (rq->fence.error) { + pr_err("Fence error status not zero [%d] after unrelated reset\n", + rq->fence.error); + i915_request_put(rq); + i915_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + i915_request_put(rq); + i915_request_put(prev); + err = -EINVAL; + goto fini; + } + + i915_request_put(prev); + prev = rq; + count++; + } while (time_before(jiffies, end_time)); + pr_info("%s: Completed %d resets\n", engine->name, count); + + *h.batch = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + + i915_request_put(prev); + + err = igt_flush_test(i915, I915_WAIT_LOCKED); + if (err) + break; + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + igt_global_reset_unlock(i915); + + if (i915_reset_failed(i915)) + return -EIO; + + return err; +} + +static int igt_handle_error(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine = i915->engine[RCS0]; + struct hang h; + struct i915_request *rq; + struct i915_gpu_state *error; + int err; + + /* Check that we can issue a global GPU and engine reset */ + + if (!intel_has_reset_engine(i915)) + return 0; + + if (!engine || !intel_engine_can_store_dword(engine)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + err = hang_init(&h, i915); + if (err) + goto err_unlock; + + rq = hang_create_request(&h, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_fini; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (!wait_until_running(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %llx, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); + + i915_gem_set_wedged(i915); + + err = -EIO; + goto err_request; + } + + mutex_unlock(&i915->drm.struct_mutex); + + /* Temporarily disable error capture */ + error = xchg(&i915->gpu_error.first_error, (void *)-1); + + i915_handle_error(i915, engine->mask, 0, NULL); + + xchg(&i915->gpu_error.first_error, error); + + mutex_lock(&i915->drm.struct_mutex); + + if (rq->fence.error != -EIO) { + pr_err("Guilty request not identified!\n"); + err = -EINVAL; + goto err_request; + } + +err_request: + i915_request_put(rq); +err_fini: + hang_fini(&h); +err_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static void __preempt_begin(void) +{ + preempt_disable(); +} + +static void __preempt_end(void) +{ + preempt_enable(); +} + +static void __softirq_begin(void) +{ + local_bh_disable(); +} + +static void __softirq_end(void) +{ + local_bh_enable(); +} + +static void __hardirq_begin(void) +{ + local_irq_disable(); +} + +static void __hardirq_end(void) +{ + local_irq_enable(); +} + +struct atomic_section { + const char *name; + void (*critical_section_begin)(void); + void (*critical_section_end)(void); +}; + +static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, + const struct atomic_section *p, + const char *mode) +{ + struct tasklet_struct * const t = &engine->execlists.tasklet; + int err; + + GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", + engine->name, mode, p->name); + + tasklet_disable_nosync(t); + p->critical_section_begin(); + + err = i915_reset_engine(engine, NULL); + + p->critical_section_end(); + tasklet_enable(t); + + if (err) + pr_err("i915_reset_engine(%s:%s) failed under %s\n", + engine->name, mode, p->name); + + return err; +} + +static int igt_atomic_reset_engine(struct intel_engine_cs *engine, + const struct atomic_section *p) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_request *rq; + struct hang h; + int err; + + err = __igt_atomic_reset_engine(engine, p, "idle"); + if (err) + return err; + + err = hang_init(&h, i915); + if (err) + return err; + + rq = hang_create_request(&h, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (wait_until_running(&h, rq)) { + err = __igt_atomic_reset_engine(engine, p, "active"); + } else { + pr_err("%s(%s): Failed to start request %llx, at %x\n", + __func__, engine->name, + rq->fence.seqno, hws_seqno(&h, rq)); + i915_gem_set_wedged(i915); + err = -EIO; + } + + if (err == 0) { + struct igt_wedge_me w; + + igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/) + i915_request_wait(rq, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (i915_reset_failed(i915)) + err = -EIO; + } + + i915_request_put(rq); +out: + hang_fini(&h); + return err; +} + +static void force_reset(struct drm_i915_private *i915) +{ + i915_gem_set_wedged(i915); + i915_reset(i915, 0, NULL); +} + +static int igt_atomic_reset(void *arg) +{ + static const struct atomic_section phases[] = { + { "preempt", __preempt_begin, __preempt_end }, + { "softirq", __softirq_begin, __softirq_end }, + { "hardirq", __hardirq_begin, __hardirq_end }, + { } + }; + struct drm_i915_private *i915 = arg; + intel_wakeref_t wakeref; + int err = 0; + + /* Check that the resets are usable from atomic context */ + + igt_global_reset_lock(i915); + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + /* Flush any requests before we get started and check basics */ + force_reset(i915); + if (i915_reset_failed(i915)) + goto unlock; + + if (intel_has_gpu_reset(i915)) { + const typeof(*phases) *p; + + for (p = phases; p->name; p++) { + GEM_TRACE("intel_gpu_reset under %s\n", p->name); + + p->critical_section_begin(); + err = intel_gpu_reset(i915, ALL_ENGINES); + p->critical_section_end(); + + if (err) { + pr_err("intel_gpu_reset failed under %s\n", + p->name); + goto out; + } + } + + force_reset(i915); + } + + if (USES_GUC_SUBMISSION(i915)) + goto unlock; + + if (intel_has_reset_engine(i915)) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + const typeof(*phases) *p; + + for (p = phases; p->name; p++) { + err = igt_atomic_reset_engine(engine, p); + if (err) + goto out; + } + } + } + +out: + /* As we poke around the guts, do a full reset before continuing. */ + force_reset(i915); + +unlock: + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + igt_global_reset_unlock(i915); + + return err; +} + +int intel_hangcheck_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_global_reset), /* attempt to recover GPU first */ + SUBTEST(igt_wedged_reset), + SUBTEST(igt_hang_sanitycheck), + SUBTEST(igt_reset_nop), + SUBTEST(igt_reset_nop_engine), + SUBTEST(igt_reset_idle_engine), + SUBTEST(igt_reset_active_engine), + SUBTEST(igt_reset_engines), + SUBTEST(igt_reset_queue), + SUBTEST(igt_reset_wait), + SUBTEST(igt_reset_evict_ggtt), + SUBTEST(igt_reset_evict_ppgtt), + SUBTEST(igt_reset_evict_fence), + SUBTEST(igt_handle_error), + SUBTEST(igt_atomic_reset), + }; + intel_wakeref_t wakeref; + bool saved_hangcheck; + int err; + + if (!intel_has_gpu_reset(i915)) + return 0; + + if (i915_terminally_wedged(i915)) + return -EIO; /* we're long past hope of a successful reset */ + + wakeref = intel_runtime_pm_get(i915); + saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); + drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */ + + err = i915_subtests(tests, i915); + + mutex_lock(&i915->drm.struct_mutex); + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + + i915_modparams.enable_hangcheck = saved_hangcheck; + intel_runtime_pm_put(i915, wakeref); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c new file mode 100644 index 000000000000..cd0551f97c2f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -0,0 +1,1324 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include + +#include "gt/intel_reset.h" +#include "i915_selftest.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_live_test.h" +#include "selftests/igt_spinner.h" +#include "selftests/mock_context.h" + +static int live_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id id; + struct igt_spinner spin; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (igt_spinner_init(&spin, i915)) + goto err_unlock; + + ctx = kernel_context(i915); + if (!ctx) + goto err_spin; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin, rq)) { + GEM_TRACE("spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx; + } + + igt_spinner_end(&spin); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_ctx; + } + } + + err = 0; +err_ctx: + kernel_context_close(ctx); +err_spin: + igt_spinner_fini(&spin); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_busywait_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct intel_engine_cs *engine; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + u32 *map; + + /* + * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can + * preempt the busywaits used to synchronise between rings. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_unlock; + ctx_hi->sched.priority = INT_MAX; + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = INT_MIN; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_ctx_lo; + } + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto err_obj; + } + + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_map; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + for_each_engine(engine, i915, id) { + struct i915_request *lo, *hi; + struct igt_live_test t; + u32 *cs; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + err = -EIO; + goto err_vma; + } + + /* + * We create two requests. The low priority request + * busywaits on a semaphore (inside the ringbuffer where + * is should be preemptible) and the high priority requests + * uses a MI_STORE_DWORD_IMM to update the semaphore value + * allowing the first request to complete. If preemption + * fails, we hang instead. + */ + + lo = i915_request_alloc(engine, ctx_lo); + if (IS_ERR(lo)) { + err = PTR_ERR(lo); + goto err_vma; + } + + cs = intel_ring_begin(lo, 8); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(lo); + goto err_vma; + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma); + *cs++ = 0; + *cs++ = 1; + + /* XXX Do we need a flush + invalidate here? */ + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma); + *cs++ = 0; + + intel_ring_advance(lo, cs); + i915_request_add(lo); + + if (wait_for(READ_ONCE(*map), 10)) { + err = -ETIMEDOUT; + goto err_vma; + } + + /* Low priority request should be busywaiting now */ + if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) { + pr_err("%s: Busywaiting request did not!\n", + engine->name); + err = -EIO; + goto err_vma; + } + + hi = i915_request_alloc(engine, ctx_hi); + if (IS_ERR(hi)) { + err = PTR_ERR(hi); + goto err_vma; + } + + cs = intel_ring_begin(hi, 4); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(hi); + goto err_vma; + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma); + *cs++ = 0; + *cs++ = 0; + + intel_ring_advance(hi, cs); + i915_request_add(hi); + + if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to preempt semaphore busywait!\n", + engine->name); + + intel_engine_dump(engine, &p, "%s\n", engine->name); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(i915); + err = -EIO; + goto err_vma; + } + GEM_BUG_ON(READ_ONCE(*map)); + + if (igt_live_test_end(&t)) { + err = -EIO; + goto err_vma; + } + } + + err = 0; +err_vma: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + pr_err("Logical preemption supported, but not exposed\n"); + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (igt_spinner_init(&spin_hi, i915)) + goto err_unlock; + + if (igt_spinner_init(&spin_lo, i915)) + goto err_spin_hi; + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + + for_each_engine(engine, i915, id) { + struct igt_live_test t; + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + err = -EIO; + goto err_ctx_lo; + } + + rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + GEM_TRACE("lo spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_hi, rq)) { + GEM_TRACE("hi spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + igt_spinner_end(&spin_hi); + igt_spinner_end(&spin_lo); + + if (igt_live_test_end(&t)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); +err_spin_hi: + igt_spinner_fini(&spin_hi); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_late_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + struct i915_sched_attr attr = {}; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (igt_spinner_init(&spin_hi, i915)) + goto err_unlock; + + if (igt_spinner_init(&spin_lo, i915)) + goto err_spin_hi; + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_spin_lo; + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + + for_each_engine(engine, i915, id) { + struct igt_live_test t; + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + err = -EIO; + goto err_ctx_lo; + } + + rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + pr_err("First context failed to start\n"); + goto err_wedged; + } + + rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, + MI_NOOP); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (igt_wait_for_spinner(&spin_hi, rq)) { + pr_err("Second context overtook first?\n"); + goto err_wedged; + } + + attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + engine->schedule(rq, &attr); + + if (!igt_wait_for_spinner(&spin_hi, rq)) { + pr_err("High priority context failed to preempt the low priority context\n"); + GEM_TRACE_DUMP(); + goto err_wedged; + } + + igt_spinner_end(&spin_hi); + igt_spinner_end(&spin_lo); + + if (igt_live_test_end(&t)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); +err_spin_hi: + igt_spinner_fini(&spin_hi); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&spin_hi); + igt_spinner_end(&spin_lo); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; +} + +struct preempt_client { + struct igt_spinner spin; + struct i915_gem_context *ctx; +}; + +static int preempt_client_init(struct drm_i915_private *i915, + struct preempt_client *c) +{ + c->ctx = kernel_context(i915); + if (!c->ctx) + return -ENOMEM; + + if (igt_spinner_init(&c->spin, i915)) + goto err_ctx; + + return 0; + +err_ctx: + kernel_context_close(c->ctx); + return -ENOMEM; +} + +static void preempt_client_fini(struct preempt_client *c) +{ + igt_spinner_fini(&c->spin); + kernel_context_close(c->ctx); +} + +static int live_suppress_self_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) + }; + struct preempt_client a, b; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + /* + * Verify that if a preemption request does not cause a change in + * the current execution order, the preempt-to-idle injection is + * skipped and that we do not accidentally apply it after the CS + * completion event. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + if (USES_GUC_SUBMISSION(i915)) + return 0; /* presume black blox */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (preempt_client_init(i915, &a)) + goto err_unlock; + if (preempt_client_init(i915, &b)) + goto err_client_a; + + for_each_engine(engine, i915, id) { + struct i915_request *rq_a, *rq_b; + int depth; + + if (!intel_engine_has_preemption(engine)) + continue; + + engine->execlists.preempt_hang.count = 0; + + rq_a = igt_spinner_create_request(&a.spin, + a.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_a)) { + err = PTR_ERR(rq_a); + goto err_client_b; + } + + i915_request_add(rq_a); + if (!igt_wait_for_spinner(&a.spin, rq_a)) { + pr_err("First client failed to start\n"); + goto err_wedged; + } + + for (depth = 0; depth < 8; depth++) { + rq_b = igt_spinner_create_request(&b.spin, + b.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_b)) { + err = PTR_ERR(rq_b); + goto err_client_b; + } + i915_request_add(rq_b); + + GEM_BUG_ON(i915_request_completed(rq_a)); + engine->schedule(rq_a, &attr); + igt_spinner_end(&a.spin); + + if (!igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client failed to start\n"); + goto err_wedged; + } + + swap(a, b); + rq_a = rq_b; + } + igt_spinner_end(&a.spin); + + if (engine->execlists.preempt_hang.count) { + pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n", + engine->execlists.preempt_hang.count, + depth); + err = -EINVAL; + goto err_client_b; + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&b); +err_client_a: + preempt_client_fini(&a); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&b.spin); + igt_spinner_end(&a.spin); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_client_b; +} + +static int __i915_sw_fence_call +dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + return NOTIFY_DONE; +} + +static struct i915_request *dummy_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = kzalloc(sizeof(*rq), GFP_KERNEL); + if (!rq) + return NULL; + + INIT_LIST_HEAD(&rq->active_list); + rq->engine = engine; + + i915_sched_node_init(&rq->sched); + + /* mark this request as permanently incomplete */ + rq->fence.seqno = 1; + BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ + rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; + GEM_BUG_ON(i915_request_completed(rq)); + + i915_sw_fence_init(&rq->submit, dummy_notify); + i915_sw_fence_commit(&rq->submit); + + return rq; +} + +static void dummy_request_free(struct i915_request *dummy) +{ + i915_request_mark_complete(dummy); + i915_sched_node_fini(&dummy->sched); + i915_sw_fence_fini(&dummy->submit); + + dma_fence_free(&dummy->fence); +} + +static int live_suppress_wait_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct preempt_client client[4]; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + int i; + + /* + * Waiters are given a little priority nudge, but not enough + * to actually cause any preemption. Double check that we do + * not needlessly generate preempt-to-idle cycles. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (preempt_client_init(i915, &client[0])) /* ELSP[0] */ + goto err_unlock; + if (preempt_client_init(i915, &client[1])) /* ELSP[1] */ + goto err_client_0; + if (preempt_client_init(i915, &client[2])) /* head of queue */ + goto err_client_1; + if (preempt_client_init(i915, &client[3])) /* bystander */ + goto err_client_2; + + for_each_engine(engine, i915, id) { + int depth; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (!engine->emit_init_breadcrumb) + continue; + + for (depth = 0; depth < ARRAY_SIZE(client); depth++) { + struct i915_request *rq[ARRAY_SIZE(client)]; + struct i915_request *dummy; + + engine->execlists.preempt_hang.count = 0; + + dummy = dummy_request(engine); + if (!dummy) + goto err_client_3; + + for (i = 0; i < ARRAY_SIZE(client); i++) { + rq[i] = igt_spinner_create_request(&client[i].spin, + client[i].ctx, engine, + MI_NOOP); + if (IS_ERR(rq[i])) { + err = PTR_ERR(rq[i]); + goto err_wedged; + } + + /* Disable NEWCLIENT promotion */ + __i915_active_request_set(&rq[i]->timeline->last_request, + dummy); + i915_request_add(rq[i]); + } + + dummy_request_free(dummy); + + GEM_BUG_ON(i915_request_completed(rq[0])); + if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { + pr_err("%s: First client failed to start\n", + engine->name); + goto err_wedged; + } + GEM_BUG_ON(!i915_request_started(rq[0])); + + if (i915_request_wait(rq[depth], + I915_WAIT_LOCKED | + I915_WAIT_PRIORITY, + 1) != -ETIME) { + pr_err("%s: Waiter depth:%d completed!\n", + engine->name, depth); + goto err_wedged; + } + + for (i = 0; i < ARRAY_SIZE(client); i++) + igt_spinner_end(&client[i].spin); + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + goto err_wedged; + + if (engine->execlists.preempt_hang.count) { + pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", + engine->name, + engine->execlists.preempt_hang.count, + depth); + err = -EINVAL; + goto err_client_3; + } + } + } + + err = 0; +err_client_3: + preempt_client_fini(&client[3]); +err_client_2: + preempt_client_fini(&client[2]); +err_client_1: + preempt_client_fini(&client[1]); +err_client_0: + preempt_client_fini(&client[0]); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + for (i = 0; i < ARRAY_SIZE(client); i++) + igt_spinner_end(&client[i].spin); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_client_3; +} + +static int live_chain_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct preempt_client hi, lo; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + /* + * Build a chain AB...BA between two contexts (A, B) and request + * preemption of the last request. It should then complete before + * the previously submitted spinner in B. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (preempt_client_init(i915, &hi)) + goto err_unlock; + + if (preempt_client_init(i915, &lo)) + goto err_client_hi; + + for_each_engine(engine, i915, id) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + struct igt_live_test t; + struct i915_request *rq; + int ring_size, count, i; + + if (!intel_engine_has_preemption(engine)) + continue; + + rq = igt_spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + + ring_size = rq->wa_tail - rq->head; + if (ring_size < 0) + ring_size += rq->ring->size; + ring_size = rq->ring->size / ring_size; + pr_debug("%s(%s): Using maximum of %d requests\n", + __func__, engine->name, ring_size); + + igt_spinner_end(&lo.spin); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 2) < 0) { + pr_err("Timed out waiting to flush %s\n", engine->name); + goto err_wedged; + } + + if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + err = -EIO; + goto err_wedged; + } + + for_each_prime_number_from(count, 1, ring_size) { + rq = igt_spinner_create_request(&hi.spin, + hi.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + if (!igt_wait_for_spinner(&hi.spin, rq)) + goto err_wedged; + + rq = igt_spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + + for (i = 0; i < count; i++) { + rq = i915_request_alloc(engine, lo.ctx); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + } + + rq = i915_request_alloc(engine, hi.ctx); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + engine->schedule(rq, &attr); + + igt_spinner_end(&hi.spin); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("Failed to preempt over chain of %d\n", + count); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + goto err_wedged; + } + igt_spinner_end(&lo.spin); + + rq = i915_request_alloc(engine, lo.ctx); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("Failed to flush low priority chain of %d requests\n", + count); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + goto err_wedged; + } + } + + if (igt_live_test_end(&t)) { + err = -EIO; + goto err_wedged; + } + } + + err = 0; +err_client_lo: + preempt_client_fini(&lo); +err_client_hi: + preempt_client_fini(&hi); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&hi.spin); + igt_spinner_end(&lo.spin); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_client_lo; +} + +static int live_preempt_hang(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + if (!intel_has_reset_engine(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (igt_spinner_init(&spin_hi, i915)) + goto err_unlock; + + if (igt_spinner_init(&spin_lo, i915)) + goto err_spin_hi; + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + GEM_TRACE("lo spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + init_completion(&engine->execlists.preempt_hang.completion); + engine->execlists.preempt_hang.inject_hang = true; + + i915_request_add(rq); + + if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion, + HZ / 10)) { + pr_err("Preemption did not occur within timeout!"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + i915_reset_engine(engine, NULL); + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + + engine->execlists.preempt_hang.inject_hang = false; + + if (!igt_wait_for_spinner(&spin_hi, rq)) { + GEM_TRACE("hi spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + igt_spinner_end(&spin_hi); + igt_spinner_end(&spin_lo); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); +err_spin_hi: + igt_spinner_fini(&spin_hi); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int random_range(struct rnd_state *rnd, int min, int max) +{ + return i915_prandom_u32_max_state(max - min, rnd) + min; +} + +static int random_priority(struct rnd_state *rnd) +{ + return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); +} + +struct preempt_smoke { + struct drm_i915_private *i915; + struct i915_gem_context **contexts; + struct intel_engine_cs *engine; + struct drm_i915_gem_object *batch; + unsigned int ncontext; + struct rnd_state prng; + unsigned long count; +}; + +static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) +{ + return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, + &smoke->prng)]; +} + +static int smoke_submit(struct preempt_smoke *smoke, + struct i915_gem_context *ctx, int prio, + struct drm_i915_gem_object *batch) +{ + struct i915_request *rq; + struct i915_vma *vma = NULL; + int err = 0; + + if (batch) { + vma = i915_vma_instance(batch, &ctx->ppgtt->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + } + + ctx->sched.priority = prio; + + rq = i915_request_alloc(smoke->engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto unpin; + } + + if (vma) { + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + } + + i915_request_add(rq); + +unpin: + if (vma) + i915_vma_unpin(vma); + + return err; +} + +static int smoke_crescendo_thread(void *arg) +{ + struct preempt_smoke *smoke = arg; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_gem_context *ctx = smoke_context(smoke); + int err; + + mutex_lock(&smoke->i915->drm.struct_mutex); + err = smoke_submit(smoke, + ctx, count % I915_PRIORITY_MAX, + smoke->batch); + mutex_unlock(&smoke->i915->drm.struct_mutex); + if (err) + return err; + + count++; + } while (!__igt_timeout(end_time, NULL)); + + smoke->count = count; + return 0; +} + +static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) +#define BATCH BIT(0) +{ + struct task_struct *tsk[I915_NUM_ENGINES] = {}; + struct preempt_smoke arg[I915_NUM_ENGINES]; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long count; + int err = 0; + + mutex_unlock(&smoke->i915->drm.struct_mutex); + + for_each_engine(engine, smoke->i915, id) { + arg[id] = *smoke; + arg[id].engine = engine; + if (!(flags & BATCH)) + arg[id].batch = NULL; + arg[id].count = 0; + + tsk[id] = kthread_run(smoke_crescendo_thread, &arg, + "igt/smoke:%d", id); + if (IS_ERR(tsk[id])) { + err = PTR_ERR(tsk[id]); + break; + } + get_task_struct(tsk[id]); + } + + count = 0; + for_each_engine(engine, smoke->i915, id) { + int status; + + if (IS_ERR_OR_NULL(tsk[id])) + continue; + + status = kthread_stop(tsk[id]); + if (status && !err) + err = status; + + count += arg[id].count; + + put_task_struct(tsk[id]); + } + + mutex_lock(&smoke->i915->drm.struct_mutex); + + pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", + count, flags, + RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + return 0; +} + +static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) +{ + enum intel_engine_id id; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + for_each_engine(smoke->engine, smoke->i915, id) { + struct i915_gem_context *ctx = smoke_context(smoke); + int err; + + err = smoke_submit(smoke, + ctx, random_priority(&smoke->prng), + flags & BATCH ? smoke->batch : NULL); + if (err) + return err; + + count++; + } + } while (!__igt_timeout(end_time, NULL)); + + pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", + count, flags, + RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + return 0; +} + +static int live_preempt_smoke(void *arg) +{ + struct preempt_smoke smoke = { + .i915 = arg, + .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), + .ncontext = 1024, + }; + const unsigned int phase[] = { 0, BATCH }; + intel_wakeref_t wakeref; + struct igt_live_test t; + int err = -ENOMEM; + u32 *cs; + int n; + + if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915)) + return 0; + + smoke.contexts = kmalloc_array(smoke.ncontext, + sizeof(*smoke.contexts), + GFP_KERNEL); + if (!smoke.contexts) + return -ENOMEM; + + mutex_lock(&smoke.i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(smoke.i915); + + smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); + if (IS_ERR(smoke.batch)) { + err = PTR_ERR(smoke.batch); + goto err_unlock; + } + + cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_batch; + } + for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) + cs[n] = MI_ARB_CHECK; + cs[n] = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(smoke.batch); + i915_gem_object_unpin_map(smoke.batch); + + if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) { + err = -EIO; + goto err_batch; + } + + for (n = 0; n < smoke.ncontext; n++) { + smoke.contexts[n] = kernel_context(smoke.i915); + if (!smoke.contexts[n]) + goto err_ctx; + } + + for (n = 0; n < ARRAY_SIZE(phase); n++) { + err = smoke_crescendo(&smoke, phase[n]); + if (err) + goto err_ctx; + + err = smoke_random(&smoke, phase[n]); + if (err) + goto err_ctx; + } + +err_ctx: + if (igt_live_test_end(&t)) + err = -EIO; + + for (n = 0; n < smoke.ncontext; n++) { + if (!smoke.contexts[n]) + break; + kernel_context_close(smoke.contexts[n]); + } + +err_batch: + i915_gem_object_put(smoke.batch); +err_unlock: + intel_runtime_pm_put(smoke.i915, wakeref); + mutex_unlock(&smoke.i915->drm.struct_mutex); + kfree(smoke.contexts); + + return err; +} + +int intel_execlists_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_sanitycheck), + SUBTEST(live_busywait_preempt), + SUBTEST(live_preempt), + SUBTEST(live_late_preempt), + SUBTEST(live_suppress_self_preempt), + SUBTEST(live_suppress_wait_preempt), + SUBTEST(live_chain_preempt), + SUBTEST(live_preempt_hang), + SUBTEST(live_preempt_smoke), + }; + + if (!HAS_EXECLISTS(i915)) + return 0; + + if (i915_terminally_wedged(i915)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c new file mode 100644 index 000000000000..e61e47421ed2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -0,0 +1,1172 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "i915_selftest.h" +#include "intel_reset.h" + +#include "selftests/igt_flush_test.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_spinner.h" +#include "selftests/igt_wedge_me.h" +#include "selftests/mock_context.h" +#include "selftests/mock_drm.h" + +static const struct wo_register { + enum intel_platform platform; + u32 reg; +} wo_registers[] = { + { INTEL_GEMINILAKE, 0x731c } +}; + +#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 4) +struct wa_lists { + struct i915_wa_list gt_wa_list; + struct { + char name[REF_NAME_MAX]; + struct i915_wa_list wa_list; + } engine[I915_NUM_ENGINES]; +}; + +static void +reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + memset(lists, 0, sizeof(*lists)); + + wa_init_start(&lists->gt_wa_list, "GT_REF"); + gt_init_workarounds(i915, &lists->gt_wa_list); + wa_init_finish(&lists->gt_wa_list); + + for_each_engine(engine, i915, id) { + struct i915_wa_list *wal = &lists->engine[id].wa_list; + char *name = lists->engine[id].name; + + snprintf(name, REF_NAME_MAX, "%s_REF", engine->name); + + wa_init_start(wal, name); + engine_init_workarounds(engine, wal); + wa_init_finish(wal); + } +} + +static void +reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + intel_wa_list_free(&lists->engine[id].wa_list); + + intel_wa_list_free(&lists->gt_wa_list); +} + +static struct drm_i915_gem_object * +read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + const u32 base = engine->mmio_base; + struct drm_i915_gem_object *result; + intel_wakeref_t wakeref; + struct i915_request *rq; + struct i915_vma *vma; + u32 srm, *cs; + int err; + int i; + + result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(result)) + return result; + + i915_gem_object_set_cache_coherency(result, I915_CACHE_LLC); + + cs = i915_gem_object_pin_map(result, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_obj; + } + memset(cs, 0xc5, PAGE_SIZE); + i915_gem_object_flush_map(result); + i915_gem_object_unpin_map(result); + + vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_obj; + + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_pin; + } + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto err_req; + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(ctx->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_req; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i)); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + i915_gem_object_get(result); + i915_gem_object_set_active_reference(result); + + i915_request_add(rq); + i915_vma_unpin(vma); + + return result; + +err_req: + i915_request_add(rq); +err_pin: + i915_vma_unpin(vma); +err_obj: + i915_gem_object_put(result); + return ERR_PTR(err); +} + +static u32 +get_whitelist_reg(const struct intel_engine_cs *engine, unsigned int i) +{ + i915_reg_t reg = i < engine->whitelist.count ? + engine->whitelist.list[i].reg : + RING_NOPID(engine->mmio_base); + + return i915_mmio_reg_offset(reg); +} + +static void +print_results(const struct intel_engine_cs *engine, const u32 *results) +{ + unsigned int i; + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(engine, i); + u32 actual = results[i]; + + pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n", + i, expected, actual); + } +} + +static int check_whitelist(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *results; + struct igt_wedge_me wedge; + u32 *vaddr; + int err; + int i; + + results = read_nonprivs(ctx, engine); + if (IS_ERR(results)) + return PTR_ERR(results); + + err = 0; + igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */ + err = i915_gem_object_set_to_cpu_domain(results, false); + if (i915_terminally_wedged(ctx->i915)) + err = -EIO; + if (err) + goto out_put; + + vaddr = i915_gem_object_pin_map(results, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(engine, i); + u32 actual = vaddr[i]; + + if (expected != actual) { + print_results(engine, vaddr); + pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n", + i, expected, actual); + + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(results); +out_put: + i915_gem_object_put(results); + return err; +} + +static int do_device_reset(struct intel_engine_cs *engine) +{ + i915_reset(engine->i915, engine->mask, "live_workarounds"); + return 0; +} + +static int do_engine_reset(struct intel_engine_cs *engine) +{ + return i915_reset_engine(engine, "live_workarounds"); +} + +static int +switch_to_scratch_context(struct intel_engine_cs *engine, + struct igt_spinner *spin) +{ + struct i915_gem_context *ctx; + struct i915_request *rq; + intel_wakeref_t wakeref; + int err = 0; + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + GEM_BUG_ON(i915_gem_context_is_bannable(ctx)); + + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); + + kernel_context_close(ctx); + + if (IS_ERR(rq)) { + spin = NULL; + err = PTR_ERR(rq); + goto err; + } + + i915_request_add(rq); + + if (spin && !igt_wait_for_spinner(spin, rq)) { + pr_err("Spinner failed to start\n"); + err = -ETIMEDOUT; + } + +err: + if (err && spin) + igt_spinner_end(spin); + + return err; +} + +static int check_whitelist_across_reset(struct intel_engine_cs *engine, + int (*reset)(struct intel_engine_cs *), + const char *name) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_gem_context *ctx; + struct igt_spinner spin; + intel_wakeref_t wakeref; + int err; + + pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n", + engine->whitelist.count, name); + + err = igt_spinner_init(&spin, i915); + if (err) + return err; + + ctx = kernel_context(i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(ctx, engine); + if (err) { + pr_err("Invalid whitelist *before* %s reset!\n", name); + goto out; + } + + err = switch_to_scratch_context(engine, &spin); + if (err) + goto out; + + with_intel_runtime_pm(i915, wakeref) + err = reset(engine); + + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + + if (err) { + pr_err("%s reset failed\n", name); + goto out; + } + + err = check_whitelist(ctx, engine); + if (err) { + pr_err("Whitelist not preserved in context across %s reset!\n", + name); + goto out; + } + + kernel_context_close(ctx); + + ctx = kernel_context(i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(ctx, engine); + if (err) { + pr_err("Invalid whitelist *after* %s reset in fresh context!\n", + name); + goto out; + } + +out: + kernel_context_close(ctx); + return err; +} + +static struct i915_vma *create_batch(struct i915_gem_context *ctx) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_obj; + + err = i915_gem_object_set_to_wc_domain(obj, true); + if (err) + goto err_obj; + + return vma; + +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static u32 reg_write(u32 old, u32 new, u32 rsvd) +{ + if (rsvd == 0x0000ffff) { + old &= ~(new >> 16); + old |= new & (new >> 16); + } else { + old &= ~rsvd; + old |= new & rsvd; + } + + return old; +} + +static bool wo_register(struct intel_engine_cs *engine, u32 reg) +{ + enum intel_platform platform = INTEL_INFO(engine->i915)->platform; + int i; + + for (i = 0; i < ARRAY_SIZE(wo_registers); i++) { + if (wo_registers[i].platform == platform && + wo_registers[i].reg == reg) + return true; + } + + return false; +} + +static int check_dirty_whitelist(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + const u32 values[] = { + 0x00000000, + 0x01010101, + 0x10100101, + 0x03030303, + 0x30300303, + 0x05050505, + 0x50500505, + 0x0f0f0f0f, + 0xf00ff00f, + 0x10101010, + 0xf0f01010, + 0x30303030, + 0xa0a03030, + 0x50505050, + 0xc0c05050, + 0xf0f0f0f0, + 0x11111111, + 0x33333333, + 0x55555555, + 0x0000ffff, + 0x00ff00ff, + 0xff0000ff, + 0xffff00ff, + 0xffffffff, + }; + struct i915_vma *scratch; + struct i915_vma *batch; + int err = 0, i, v; + u32 *cs, *results; + + scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + batch = create_batch(ctx); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_scratch; + } + + for (i = 0; i < engine->whitelist.count; i++) { + u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + u64 addr = scratch->node.start; + struct i915_request *rq; + u32 srm, lrm, rsvd; + u32 expect; + int idx; + + if (wo_register(engine, reg)) + continue; + + srm = MI_STORE_REGISTER_MEM; + lrm = MI_LOAD_REGISTER_MEM; + if (INTEL_GEN(ctx->i915) >= 8) + lrm++, srm++; + + pr_debug("%s: Writing garbage to %x\n", + engine->name, reg); + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_batch; + } + + /* SRM original */ + *cs++ = srm; + *cs++ = reg; + *cs++ = lower_32_bits(addr); + *cs++ = upper_32_bits(addr); + + idx = 1; + for (v = 0; v < ARRAY_SIZE(values); v++) { + /* LRI garbage */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = reg; + *cs++ = values[v]; + + /* SRM result */ + *cs++ = srm; + *cs++ = reg; + *cs++ = lower_32_bits(addr + sizeof(u32) * idx); + *cs++ = upper_32_bits(addr + sizeof(u32) * idx); + idx++; + } + for (v = 0; v < ARRAY_SIZE(values); v++) { + /* LRI garbage */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = reg; + *cs++ = ~values[v]; + + /* SRM result */ + *cs++ = srm; + *cs++ = reg; + *cs++ = lower_32_bits(addr + sizeof(u32) * idx); + *cs++ = upper_32_bits(addr + sizeof(u32) * idx); + idx++; + } + GEM_BUG_ON(idx * sizeof(u32) > scratch->size); + + /* LRM original -- don't leave garbage in the context! */ + *cs++ = lrm; + *cs++ = reg; + *cs++ = lower_32_bits(addr); + *cs++ = upper_32_bits(addr); + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(batch->obj); + i915_gem_object_unpin_map(batch->obj); + i915_gem_chipset_flush(ctx->i915); + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_batch; + } + + if (engine->emit_init_breadcrumb) { /* Be nice if we hang */ + err = engine->emit_init_breadcrumb(rq); + if (err) + goto err_request; + } + + err = engine->emit_bb_start(rq, + batch->node.start, PAGE_SIZE, + 0); + if (err) + goto err_request; + +err_request: + i915_request_add(rq); + if (err) + goto out_batch; + + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + pr_err("%s: Futzing %x timedout; cancelling test\n", + engine->name, reg); + i915_gem_set_wedged(ctx->i915); + err = -EIO; + goto out_batch; + } + + results = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(results)) { + err = PTR_ERR(results); + goto out_batch; + } + + GEM_BUG_ON(values[ARRAY_SIZE(values) - 1] != 0xffffffff); + rsvd = results[ARRAY_SIZE(values)]; /* detect write masking */ + if (!rsvd) { + pr_err("%s: Unable to write to whitelisted register %x\n", + engine->name, reg); + err = -EINVAL; + goto out_unpin; + } + + expect = results[0]; + idx = 1; + for (v = 0; v < ARRAY_SIZE(values); v++) { + expect = reg_write(expect, values[v], rsvd); + if (results[idx] != expect) + err++; + idx++; + } + for (v = 0; v < ARRAY_SIZE(values); v++) { + expect = reg_write(expect, ~values[v], rsvd); + if (results[idx] != expect) + err++; + idx++; + } + if (err) { + pr_err("%s: %d mismatch between values written to whitelisted register [%x], and values read back!\n", + engine->name, err, reg); + + pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n", + engine->name, reg, results[0], rsvd); + + expect = results[0]; + idx = 1; + for (v = 0; v < ARRAY_SIZE(values); v++) { + u32 w = values[v]; + + expect = reg_write(expect, w, rsvd); + pr_info("Wrote %08x, read %08x, expect %08x\n", + w, results[idx], expect); + idx++; + } + for (v = 0; v < ARRAY_SIZE(values); v++) { + u32 w = ~values[v]; + + expect = reg_write(expect, w, rsvd); + pr_info("Wrote %08x, read %08x, expect %08x\n", + w, results[idx], expect); + idx++; + } + + err = -EINVAL; + } +out_unpin: + i915_gem_object_unpin_map(scratch->obj); + if (err) + break; + } + + if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED)) + err = -EIO; +out_batch: + i915_vma_unpin_and_release(&batch, 0); +out_scratch: + i915_vma_unpin_and_release(&scratch, 0); + return err; +} + +static int live_dirty_whitelist(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id id; + intel_wakeref_t wakeref; + struct drm_file *file; + int err = 0; + + /* Can the user write to the whitelisted registers? */ + + if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */ + return 0; + + wakeref = intel_runtime_pm_get(i915); + + mutex_unlock(&i915->drm.struct_mutex); + file = mock_file(i915); + mutex_lock(&i915->drm.struct_mutex); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto out_rpm; + } + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + for_each_engine(engine, i915, id) { + if (engine->whitelist.count == 0) + continue; + + err = check_dirty_whitelist(ctx, engine); + if (err) + goto out_file; + } + +out_file: + mutex_unlock(&i915->drm.struct_mutex); + mock_file_free(i915, file); + mutex_lock(&i915->drm.struct_mutex); +out_rpm: + intel_runtime_pm_put(i915, wakeref); + return err; +} + +static int live_reset_whitelist(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine = i915->engine[RCS0]; + int err = 0; + + /* If we reset the gpu, we should not lose the RING_NONPRIV */ + + if (!engine || engine->whitelist.count == 0) + return 0; + + igt_global_reset_lock(i915); + + if (intel_has_reset_engine(i915)) { + err = check_whitelist_across_reset(engine, + do_engine_reset, + "engine"); + if (err) + goto out; + } + + if (intel_has_gpu_reset(i915)) { + err = check_whitelist_across_reset(engine, + do_device_reset, + "device"); + if (err) + goto out; + } + +out: + igt_global_reset_unlock(i915); + return err; +} + +static int read_whitelisted_registers(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct i915_vma *results) +{ + intel_wakeref_t wakeref; + struct i915_request *rq; + int i, err = 0; + u32 srm, *cs; + + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + srm = MI_STORE_REGISTER_MEM; + if (INTEL_GEN(ctx->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * engine->whitelist.count); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_req; + } + + for (i = 0; i < engine->whitelist.count; i++) { + u64 offset = results->node.start + sizeof(u32) * i; + + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + } + intel_ring_advance(rq, cs); + +err_req: + i915_request_add(rq); + + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + err = -EIO; + + return err; +} + +static int scrub_whitelisted_registers(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_wakeref_t wakeref; + struct i915_request *rq; + struct i915_vma *batch; + int i, err = 0; + u32 *cs; + + batch = create_batch(ctx); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_batch; + } + + *cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count); + for (i = 0; i < engine->whitelist.count; i++) { + *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + *cs++ = 0xffffffff; + } + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(batch->obj); + i915_gem_chipset_flush(ctx->i915); + + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + if (engine->emit_init_breadcrumb) { /* Be nice if we hang */ + err = engine->emit_init_breadcrumb(rq); + if (err) + goto err_request; + } + + /* Perform the writes from an unprivileged "user" batch */ + err = engine->emit_bb_start(rq, batch->node.start, 0, 0); + +err_request: + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + err = -EIO; + +err_unpin: + i915_gem_object_unpin_map(batch->obj); +err_batch: + i915_vma_unpin_and_release(&batch, 0); + return err; +} + +struct regmask { + i915_reg_t reg; + unsigned long gen_mask; +}; + +static bool find_reg(struct drm_i915_private *i915, + i915_reg_t reg, + const struct regmask *tbl, + unsigned long count) +{ + u32 offset = i915_mmio_reg_offset(reg); + + while (count--) { + if (INTEL_INFO(i915)->gen_mask & tbl->gen_mask && + i915_mmio_reg_offset(tbl->reg) == offset) + return true; + tbl++; + } + + return false; +} + +static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg) +{ + /* Alas, we must pardon some whitelists. Mistakes already made */ + static const struct regmask pardon[] = { + { GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) }, + { GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) }, + }; + + return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon)); +} + +static bool result_eq(struct intel_engine_cs *engine, + u32 a, u32 b, i915_reg_t reg) +{ + if (a != b && !pardon_reg(engine->i915, reg)) { + pr_err("Whitelisted register 0x%4x not context saved: A=%08x, B=%08x\n", + i915_mmio_reg_offset(reg), a, b); + return false; + } + + return true; +} + +static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg) +{ + /* Some registers do not seem to behave and our writes unreadable */ + static const struct regmask wo[] = { + { GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) }, + }; + + return find_reg(i915, reg, wo, ARRAY_SIZE(wo)); +} + +static bool result_neq(struct intel_engine_cs *engine, + u32 a, u32 b, i915_reg_t reg) +{ + if (a == b && !writeonly_reg(engine->i915, reg)) { + pr_err("Whitelist register 0x%4x:%08x was unwritable\n", + i915_mmio_reg_offset(reg), a); + return false; + } + + return true; +} + +static int +check_whitelisted_registers(struct intel_engine_cs *engine, + struct i915_vma *A, + struct i915_vma *B, + bool (*fn)(struct intel_engine_cs *engine, + u32 a, u32 b, + i915_reg_t reg)) +{ + u32 *a, *b; + int i, err; + + a = i915_gem_object_pin_map(A->obj, I915_MAP_WB); + if (IS_ERR(a)) + return PTR_ERR(a); + + b = i915_gem_object_pin_map(B->obj, I915_MAP_WB); + if (IS_ERR(b)) { + err = PTR_ERR(b); + goto err_a; + } + + err = 0; + for (i = 0; i < engine->whitelist.count; i++) { + if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) + err = -EINVAL; + } + + i915_gem_object_unpin_map(B->obj); +err_a: + i915_gem_object_unpin_map(A->obj); + return err; +} + +static int live_isolated_whitelist(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct { + struct i915_gem_context *ctx; + struct i915_vma *scratch[2]; + } client[2] = {}; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int i, err = 0; + + /* + * Check that a write into a whitelist register works, but + * invisible to a second context. + */ + + if (!intel_engines_has_context_isolation(i915)) + return 0; + + if (!i915->kernel_context->ppgtt) + return 0; + + for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_gem_context *c; + + c = kernel_context(i915); + if (IS_ERR(c)) { + err = PTR_ERR(c); + goto err; + } + + client[i].scratch[0] = create_scratch(&c->ppgtt->vm, 1024); + if (IS_ERR(client[i].scratch[0])) { + err = PTR_ERR(client[i].scratch[0]); + kernel_context_close(c); + goto err; + } + + client[i].scratch[1] = create_scratch(&c->ppgtt->vm, 1024); + if (IS_ERR(client[i].scratch[1])) { + err = PTR_ERR(client[i].scratch[1]); + i915_vma_unpin_and_release(&client[i].scratch[0], 0); + kernel_context_close(c); + goto err; + } + + client[i].ctx = c; + } + + for_each_engine(engine, i915, id) { + if (!engine->whitelist.count) + continue; + + /* Read default values */ + err = read_whitelisted_registers(client[0].ctx, engine, + client[0].scratch[0]); + if (err) + goto err; + + /* Try to overwrite registers (should only affect ctx0) */ + err = scrub_whitelisted_registers(client[0].ctx, engine); + if (err) + goto err; + + /* Read values from ctx1, we expect these to be defaults */ + err = read_whitelisted_registers(client[1].ctx, engine, + client[1].scratch[0]); + if (err) + goto err; + + /* Verify that both reads return the same default values */ + err = check_whitelisted_registers(engine, + client[0].scratch[0], + client[1].scratch[0], + result_eq); + if (err) + goto err; + + /* Read back the updated values in ctx0 */ + err = read_whitelisted_registers(client[0].ctx, engine, + client[0].scratch[1]); + if (err) + goto err; + + /* User should be granted privilege to overwhite regs */ + err = check_whitelisted_registers(engine, + client[0].scratch[0], + client[0].scratch[1], + result_neq); + if (err) + goto err; + } + +err: + for (i = 0; i < ARRAY_SIZE(client); i++) { + if (!client[i].ctx) + break; + + i915_vma_unpin_and_release(&client[i].scratch[1], 0); + i915_vma_unpin_and_release(&client[i].scratch[0], 0); + kernel_context_close(client[i].ctx); + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + return err; +} + +static bool verify_gt_engine_wa(struct drm_i915_private *i915, + struct wa_lists *lists, const char *str) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + bool ok = true; + + ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); + + for_each_engine(engine, i915, id) { + ok &= engine_wa_list_verify(engine, + &lists->engine[id].wa_list, + str) == 0; + } + + return ok; +} + +static int +live_gpu_reset_gt_engine_workarounds(void *arg) +{ + struct drm_i915_private *i915 = arg; + intel_wakeref_t wakeref; + struct wa_lists lists; + bool ok; + + if (!intel_has_gpu_reset(i915)) + return 0; + + pr_info("Verifying after GPU reset...\n"); + + igt_global_reset_lock(i915); + wakeref = intel_runtime_pm_get(i915); + + reference_lists_init(i915, &lists); + + ok = verify_gt_engine_wa(i915, &lists, "before reset"); + if (!ok) + goto out; + + i915_reset(i915, ALL_ENGINES, "live_workarounds"); + + ok = verify_gt_engine_wa(i915, &lists, "after reset"); + +out: + reference_lists_fini(i915, &lists); + intel_runtime_pm_put(i915, wakeref); + igt_global_reset_unlock(i915); + + return ok ? 0 : -ESRCH; +} + +static int +live_engine_reset_gt_engine_workarounds(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct igt_spinner spin; + enum intel_engine_id id; + struct i915_request *rq; + intel_wakeref_t wakeref; + struct wa_lists lists; + int ret = 0; + + if (!intel_has_reset_engine(i915)) + return 0; + + ctx = kernel_context(i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + igt_global_reset_lock(i915); + wakeref = intel_runtime_pm_get(i915); + + reference_lists_init(i915, &lists); + + for_each_engine(engine, i915, id) { + bool ok; + + pr_info("Verifying after %s reset...\n", engine->name); + + ok = verify_gt_engine_wa(i915, &lists, "before reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } + + i915_reset_engine(engine, "live_workarounds"); + + ok = verify_gt_engine_wa(i915, &lists, "after idle reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } + + ret = igt_spinner_init(&spin, i915); + if (ret) + goto err; + + rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + igt_spinner_fini(&spin); + goto err; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("Spinner failed to start\n"); + igt_spinner_fini(&spin); + ret = -ETIMEDOUT; + goto err; + } + + i915_reset_engine(engine, "live_workarounds"); + + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + + ok = verify_gt_engine_wa(i915, &lists, "after busy reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } + } + +err: + reference_lists_fini(i915, &lists); + intel_runtime_pm_put(i915, wakeref); + igt_global_reset_unlock(i915); + kernel_context_close(ctx); + + igt_flush_test(i915, I915_WAIT_LOCKED); + + return ret; +} + +int intel_workarounds_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_dirty_whitelist), + SUBTEST(live_reset_whitelist), + SUBTEST(live_isolated_whitelist), + SUBTEST(live_gpu_reset_gt_engine_workarounds), + SUBTEST(live_engine_reset_gt_engine_workarounds), + }; + int err; + + if (i915_terminally_wedged(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 503d548a55f7..e9fadcb4d592 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -25,8 +25,9 @@ * */ +#include "gt/intel_engine.h" + #include "i915_drv.h" -#include "intel_ringbuffer.h" /** * DOC: batch buffer command parser diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e0bfdf31032c..b3fbd9e361ae 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,7 +32,8 @@ #include #include -#include "i915_reset.h" +#include "gt/intel_reset.h" + #include "intel_dp.h" #include "intel_drv.h" #include "intel_fbc.h" diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6354c68c94b3..ac416d2c02ca 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -47,10 +47,12 @@ #include #include +#include "gt/intel_workarounds.h" +#include "gt/intel_reset.h" + #include "i915_drv.h" #include "i915_pmu.h" #include "i915_query.h" -#include "i915_reset.h" #include "i915_trace.h" #include "i915_vgpu.h" #include "intel_audio.h" @@ -62,7 +64,6 @@ #include "intel_pm.h" #include "intel_sprite.h" #include "intel_uc.h" -#include "intel_workarounds.h" static struct drm_driver driver; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e6f9a5ddac3d..d37832ffb471 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -62,18 +62,19 @@ #include "i915_reg.h" #include "i915_utils.h" +#include "gt/intel_lrc.h" +#include "gt/intel_engine.h" +#include "gt/intel_workarounds.h" + #include "intel_bios.h" #include "intel_device_info.h" #include "intel_display.h" #include "intel_dpll_mgr.h" #include "intel_frontbuffer.h" -#include "intel_lrc.h" #include "intel_opregion.h" -#include "intel_ringbuffer.h" #include "intel_uc.h" #include "intel_uncore.h" #include "intel_wopcm.h" -#include "intel_workarounds.h" #include "i915_gem.h" #include "i915_gem_context.h" diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a2bf94c3cfca..21adeb340357 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -39,19 +39,20 @@ #include #include +#include "gt/intel_mocs.h" +#include "gt/intel_reset.h" +#include "gt/intel_workarounds.h" + #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gemfs.h" #include "i915_globals.h" -#include "i915_reset.h" #include "i915_trace.h" #include "i915_vgpu.h" #include "intel_drv.h" #include "intel_frontbuffer.h" -#include "intel_mocs.h" #include "intel_pm.h" -#include "intel_workarounds.h" static void i915_gem_flush_free_objects(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c02a30612df9..37dff694456c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -86,13 +86,16 @@ */ #include + #include + +#include "gt/intel_lrc_reg.h" +#include "gt/intel_workarounds.h" + #include "i915_drv.h" #include "i915_globals.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "intel_lrc_reg.h" -#include "intel_workarounds.h" #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1) #define I915_CONTEXT_PARAM_VM 0x9 diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 23dcb01bfd82..cec278ab04e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -27,9 +27,10 @@ #include "i915_gem_context_types.h" +#include "gt/intel_context.h" + #include "i915_gem.h" #include "i915_scheduler.h" -#include "intel_context.h" #include "intel_device_info.h" struct drm_device; diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h index e2ec58b10fb2..d282a6ab3b9f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -17,8 +17,9 @@ #include #include +#include "gt/intel_context_types.h" + #include "i915_scheduler.h" -#include "intel_context_types.h" struct pid; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3557233de0f5..8f5db787b7f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -37,7 +37,6 @@ #include "i915_drv.h" #include "i915_vgpu.h" -#include "i915_reset.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b51e779732c3..f85b75db1f98 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,8 +38,8 @@ #include #include +#include "gt/intel_reset.h" #include "i915_request.h" -#include "i915_reset.h" #include "i915_selftest.h" #include "i915_timeline.h" diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 5dc761e85d9d..b419d0f59275 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -13,8 +13,9 @@ #include +#include "gt/intel_engine.h" + #include "intel_device_info.h" -#include "intel_ringbuffer.h" #include "intel_uc_fw.h" #include "i915_gem.h" diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 56da457bed21..a87f790335c1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -195,6 +195,8 @@ #include #include +#include "gt/intel_lrc_reg.h" + #include "i915_drv.h" #include "i915_oa_hsw.h" #include "i915_oa_bdw.h" @@ -210,7 +212,6 @@ #include "i915_oa_cflgt3.h" #include "i915_oa_cnl.h" #include "i915_oa_icl.h" -#include "intel_lrc_reg.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 46a52da3db29..35e502481f29 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -6,8 +6,10 @@ #include #include + +#include "gt/intel_engine.h" + #include "i915_pmu.h" -#include "intel_ringbuffer.h" #include "i915_drv.h" /* Frequency for the sampling timer for events which need it. */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b1f00b59bb95..64ca8b3ea12f 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -32,7 +32,6 @@ #include "i915_active.h" #include "i915_drv.h" #include "i915_globals.h" -#include "i915_reset.h" #include "intel_pm.h" struct execute_cb { diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c deleted file mode 100644 index 1092d16c289c..000000000000 --- a/drivers/gpu/drm/i915/i915_reset.c +++ /dev/null @@ -1,1471 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2008-2018 Intel Corporation - */ - -#include -#include - -#include "i915_drv.h" -#include "i915_gpu_error.h" -#include "i915_reset.h" - -#include "intel_guc.h" - -#define RESET_MAX_RETRIES 3 - -/* XXX How to handle concurrent GGTT updates using tiling registers? */ -#define RESET_UNDER_STOP_MACHINE 0 - -static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) -{ - intel_uncore_rmw(uncore, reg, 0, set); -} - -static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) -{ - intel_uncore_rmw(uncore, reg, clr, 0); -} - -static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set) -{ - intel_uncore_rmw_fw(uncore, reg, 0, set); -} - -static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) -{ - intel_uncore_rmw_fw(uncore, reg, clr, 0); -} - -static void engine_skip_context(struct i915_request *rq) -{ - struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *hung_ctx = rq->gem_context; - - lockdep_assert_held(&engine->timeline.lock); - - if (!i915_request_is_active(rq)) - return; - - list_for_each_entry_continue(rq, &engine->timeline.requests, link) - if (rq->gem_context == hung_ctx) - i915_request_skip(rq, -EIO); -} - -static void client_mark_guilty(struct drm_i915_file_private *file_priv, - const struct i915_gem_context *ctx) -{ - unsigned int score; - unsigned long prev_hang; - - if (i915_gem_context_is_banned(ctx)) - score = I915_CLIENT_SCORE_CONTEXT_BAN; - else - score = 0; - - prev_hang = xchg(&file_priv->hang_timestamp, jiffies); - if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) - score += I915_CLIENT_SCORE_HANG_FAST; - - if (score) { - atomic_add(score, &file_priv->ban_score); - - DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", - ctx->name, score, - atomic_read(&file_priv->ban_score)); - } -} - -static bool context_mark_guilty(struct i915_gem_context *ctx) -{ - unsigned long prev_hang; - bool banned; - int i; - - atomic_inc(&ctx->guilty_count); - - /* Cool contexts are too cool to be banned! (Used for reset testing.) */ - if (!i915_gem_context_is_bannable(ctx)) - return false; - - /* Record the timestamp for the last N hangs */ - prev_hang = ctx->hang_timestamp[0]; - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++) - ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1]; - ctx->hang_timestamp[i] = jiffies; - - /* If we have hung N+1 times in rapid succession, we ban the context! */ - banned = !i915_gem_context_is_recoverable(ctx); - if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES)) - banned = true; - if (banned) { - DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", - ctx->name, atomic_read(&ctx->guilty_count)); - i915_gem_context_set_banned(ctx); - } - - if (!IS_ERR_OR_NULL(ctx->file_priv)) - client_mark_guilty(ctx->file_priv, ctx); - - return banned; -} - -static void context_mark_innocent(struct i915_gem_context *ctx) -{ - atomic_inc(&ctx->active_count); -} - -void i915_reset_request(struct i915_request *rq, bool guilty) -{ - GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n", - rq->engine->name, - rq->fence.context, - rq->fence.seqno, - yesno(guilty)); - - lockdep_assert_held(&rq->engine->timeline.lock); - GEM_BUG_ON(i915_request_completed(rq)); - - if (guilty) { - i915_request_skip(rq, -EIO); - if (context_mark_guilty(rq->gem_context)) - engine_skip_context(rq); - } else { - dma_fence_set_error(&rq->fence, -EAGAIN); - context_mark_innocent(rq->gem_context); - } -} - -static void gen3_stop_engine(struct intel_engine_cs *engine) -{ - struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; - - GEM_TRACE("%s\n", engine->name); - - if (intel_engine_stop_cs(engine)) - GEM_TRACE("%s: timed out on STOP_RING\n", engine->name); - - intel_uncore_write_fw(uncore, - RING_HEAD(base), - intel_uncore_read_fw(uncore, RING_TAIL(base))); - intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ - - intel_uncore_write_fw(uncore, RING_HEAD(base), 0); - intel_uncore_write_fw(uncore, RING_TAIL(base), 0); - intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); - - /* The ring must be empty before it is disabled */ - intel_uncore_write_fw(uncore, RING_CTL(base), 0); - - /* Check acts as a post */ - if (intel_uncore_read_fw(uncore, RING_HEAD(base))) - GEM_TRACE("%s: ring head [%x] not parked\n", - engine->name, - intel_uncore_read_fw(uncore, RING_HEAD(base))); -} - -static void i915_stop_engines(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (INTEL_GEN(i915) < 3) - return; - - for_each_engine_masked(engine, i915, engine_mask, tmp) - gen3_stop_engine(engine); -} - -static bool i915_in_reset(struct pci_dev *pdev) -{ - u8 gdrst; - - pci_read_config_byte(pdev, I915_GDRST, &gdrst); - return gdrst & GRDOM_RESET_STATUS; -} - -static int i915_do_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned int retry) -{ - struct pci_dev *pdev = i915->drm.pdev; - int err; - - /* Assert reset for at least 20 usec, and wait for acknowledgement. */ - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - udelay(50); - err = wait_for_atomic(i915_in_reset(pdev), 50); - - /* Clear the reset request. */ - pci_write_config_byte(pdev, I915_GDRST, 0); - udelay(50); - if (!err) - err = wait_for_atomic(!i915_in_reset(pdev), 50); - - return err; -} - -static bool g4x_reset_complete(struct pci_dev *pdev) -{ - u8 gdrst; - - pci_read_config_byte(pdev, I915_GDRST, &gdrst); - return (gdrst & GRDOM_RESET_ENABLE) == 0; -} - -static int g33_do_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned int retry) -{ - struct pci_dev *pdev = i915->drm.pdev; - - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - return wait_for_atomic(g4x_reset_complete(pdev), 50); -} - -static int g4x_do_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned int retry) -{ - struct pci_dev *pdev = i915->drm.pdev; - struct intel_uncore *uncore = &i915->uncore; - int ret; - - /* WaVcpClkGateDisableForMediaReset:ctg,elk */ - rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); - intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); - - pci_write_config_byte(pdev, I915_GDRST, - GRDOM_MEDIA | GRDOM_RESET_ENABLE); - ret = wait_for_atomic(g4x_reset_complete(pdev), 50); - if (ret) { - DRM_DEBUG_DRIVER("Wait for media reset failed\n"); - goto out; - } - - pci_write_config_byte(pdev, I915_GDRST, - GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for_atomic(g4x_reset_complete(pdev), 50); - if (ret) { - DRM_DEBUG_DRIVER("Wait for render reset failed\n"); - goto out; - } - -out: - pci_write_config_byte(pdev, I915_GDRST, 0); - - rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); - intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); - - return ret; -} - -static int ironlake_do_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned int retry) -{ - struct intel_uncore *uncore = &i915->uncore; - int ret; - - intel_uncore_write_fw(uncore, ILK_GDSR, - ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); - ret = __intel_wait_for_register_fw(uncore, ILK_GDSR, - ILK_GRDOM_RESET_ENABLE, 0, - 5000, 0, - NULL); - if (ret) { - DRM_DEBUG_DRIVER("Wait for render reset failed\n"); - goto out; - } - - intel_uncore_write_fw(uncore, ILK_GDSR, - ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); - ret = __intel_wait_for_register_fw(uncore, ILK_GDSR, - ILK_GRDOM_RESET_ENABLE, 0, - 5000, 0, - NULL); - if (ret) { - DRM_DEBUG_DRIVER("Wait for media reset failed\n"); - goto out; - } - -out: - intel_uncore_write_fw(uncore, ILK_GDSR, 0); - intel_uncore_posting_read_fw(uncore, ILK_GDSR); - return ret; -} - -/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ -static int gen6_hw_domain_reset(struct drm_i915_private *i915, - u32 hw_domain_mask) -{ - struct intel_uncore *uncore = &i915->uncore; - int err; - - /* - * GEN6_GDRST is not in the gt power well, no need to check - * for fifo space for the write or forcewake the chip for - * the read - */ - intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); - - /* Wait for the device to ack the reset requests */ - err = __intel_wait_for_register_fw(uncore, - GEN6_GDRST, hw_domain_mask, 0, - 500, 0, - NULL); - if (err) - DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", - hw_domain_mask); - - return err; -} - -static int gen6_reset_engines(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned int retry) -{ - struct intel_engine_cs *engine; - const u32 hw_engine_mask[] = { - [RCS0] = GEN6_GRDOM_RENDER, - [BCS0] = GEN6_GRDOM_BLT, - [VCS0] = GEN6_GRDOM_MEDIA, - [VCS1] = GEN8_GRDOM_MEDIA2, - [VECS0] = GEN6_GRDOM_VECS, - }; - u32 hw_mask; - - if (engine_mask == ALL_ENGINES) { - hw_mask = GEN6_GRDOM_FULL; - } else { - intel_engine_mask_t tmp; - - hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) { - GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); - hw_mask |= hw_engine_mask[engine->id]; - } - } - - return gen6_hw_domain_reset(i915, hw_mask); -} - -static u32 gen11_lock_sfc(struct intel_engine_cs *engine) -{ - struct intel_uncore *uncore = engine->uncore; - u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; - i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; - u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; - i915_reg_t sfc_usage; - u32 sfc_usage_bit; - u32 sfc_reset_bit; - - switch (engine->class) { - case VIDEO_DECODE_CLASS: - if ((BIT(engine->instance) & vdbox_sfc_access) == 0) - return 0; - - sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; - - sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine); - sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; - - sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine); - sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT; - sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); - break; - - case VIDEO_ENHANCEMENT_CLASS: - sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; - - sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine); - sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; - - sfc_usage = GEN11_VECS_SFC_USAGE(engine); - sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT; - sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); - break; - - default: - return 0; - } - - /* - * Tell the engine that a software reset is going to happen. The engine - * will then try to force lock the SFC (if currently locked, it will - * remain so until we tell the engine it is safe to unlock; if currently - * unlocked, it will ignore this and all new lock requests). If SFC - * ends up being locked to the engine we want to reset, we have to reset - * it as well (we will unlock it once the reset sequence is completed). - */ - rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); - - if (__intel_wait_for_register_fw(uncore, - sfc_forced_lock_ack, - sfc_forced_lock_ack_bit, - sfc_forced_lock_ack_bit, - 1000, 0, NULL)) { - DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); - return 0; - } - - if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit) - return sfc_reset_bit; - - return 0; -} - -static void gen11_unlock_sfc(struct intel_engine_cs *engine) -{ - struct intel_uncore *uncore = engine->uncore; - u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; - i915_reg_t sfc_forced_lock; - u32 sfc_forced_lock_bit; - - switch (engine->class) { - case VIDEO_DECODE_CLASS: - if ((BIT(engine->instance) & vdbox_sfc_access) == 0) - return; - - sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; - break; - - case VIDEO_ENHANCEMENT_CLASS: - sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; - break; - - default: - return; - } - - rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); -} - -static int gen11_reset_engines(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned int retry) -{ - const u32 hw_engine_mask[] = { - [RCS0] = GEN11_GRDOM_RENDER, - [BCS0] = GEN11_GRDOM_BLT, - [VCS0] = GEN11_GRDOM_MEDIA, - [VCS1] = GEN11_GRDOM_MEDIA2, - [VCS2] = GEN11_GRDOM_MEDIA3, - [VCS3] = GEN11_GRDOM_MEDIA4, - [VECS0] = GEN11_GRDOM_VECS, - [VECS1] = GEN11_GRDOM_VECS2, - }; - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - u32 hw_mask; - int ret; - - if (engine_mask == ALL_ENGINES) { - hw_mask = GEN11_GRDOM_FULL; - } else { - hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) { - GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); - hw_mask |= hw_engine_mask[engine->id]; - hw_mask |= gen11_lock_sfc(engine); - } - } - - ret = gen6_hw_domain_reset(i915, hw_mask); - - if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, i915, engine_mask, tmp) - gen11_unlock_sfc(engine); - - return ret; -} - -static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) -{ - struct intel_uncore *uncore = engine->uncore; - const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base); - u32 request, mask, ack; - int ret; - - ack = intel_uncore_read_fw(uncore, reg); - if (ack & RESET_CTL_CAT_ERROR) { - /* - * For catastrophic errors, ready-for-reset sequence - * needs to be bypassed: HAS#396813 - */ - request = RESET_CTL_CAT_ERROR; - mask = RESET_CTL_CAT_ERROR; - - /* Catastrophic errors need to be cleared by HW */ - ack = 0; - } else if (!(ack & RESET_CTL_READY_TO_RESET)) { - request = RESET_CTL_REQUEST_RESET; - mask = RESET_CTL_READY_TO_RESET; - ack = RESET_CTL_READY_TO_RESET; - } else { - return 0; - } - - intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request)); - ret = __intel_wait_for_register_fw(uncore, reg, mask, ack, - 700, 0, NULL); - if (ret) - DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n", - engine->name, request, - intel_uncore_read_fw(uncore, reg)); - - return ret; -} - -static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) -{ - intel_uncore_write_fw(engine->uncore, - RING_RESET_CTL(engine->mmio_base), - _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); -} - -static int gen8_reset_engines(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned int retry) -{ - struct intel_engine_cs *engine; - const bool reset_non_ready = retry >= 1; - intel_engine_mask_t tmp; - int ret; - - for_each_engine_masked(engine, i915, engine_mask, tmp) { - ret = gen8_engine_reset_prepare(engine); - if (ret && !reset_non_ready) - goto skip_reset; - - /* - * If this is not the first failed attempt to prepare, - * we decide to proceed anyway. - * - * By doing so we risk context corruption and with - * some gens (kbl), possible system hang if reset - * happens during active bb execution. - * - * We rather take context corruption instead of - * failed reset with a wedged driver/gpu. And - * active bb execution case should be covered by - * i915_stop_engines we have before the reset. - */ - } - - if (INTEL_GEN(i915) >= 11) - ret = gen11_reset_engines(i915, engine_mask, retry); - else - ret = gen6_reset_engines(i915, engine_mask, retry); - -skip_reset: - for_each_engine_masked(engine, i915, engine_mask, tmp) - gen8_engine_reset_cancel(engine); - - return ret; -} - -typedef int (*reset_func)(struct drm_i915_private *, - intel_engine_mask_t engine_mask, - unsigned int retry); - -static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) -{ - if (INTEL_GEN(i915) >= 8) - return gen8_reset_engines; - else if (INTEL_GEN(i915) >= 6) - return gen6_reset_engines; - else if (INTEL_GEN(i915) >= 5) - return ironlake_do_reset; - else if (IS_G4X(i915)) - return g4x_do_reset; - else if (IS_G33(i915) || IS_PINEVIEW(i915)) - return g33_do_reset; - else if (INTEL_GEN(i915) >= 3) - return i915_do_reset; - else - return NULL; -} - -int intel_gpu_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) -{ - const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; - reset_func reset; - int ret = -ETIMEDOUT; - int retry; - - reset = intel_get_gpu_reset(i915); - if (!reset) - return -ENODEV; - - /* - * If the power well sleeps during the reset, the reset - * request may be dropped and never completes (causing -EIO). - */ - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { - /* - * We stop engines, otherwise we might get failed reset and a - * dead gpu (on elk). Also as modern gpu as kbl can suffer - * from system hang if batchbuffer is progressing when - * the reset is issued, regardless of READY_TO_RESET ack. - * Thus assume it is best to stop engines on all gens - * where we have a gpu reset. - * - * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) - * - * WaMediaResetMainRingCleanup:ctg,elk (presumably) - * - * FIXME: Wa for more modern gens needs to be validated - */ - if (retry) - i915_stop_engines(i915, engine_mask); - - GEM_TRACE("engine_mask=%x\n", engine_mask); - preempt_disable(); - ret = reset(i915, engine_mask, retry); - preempt_enable(); - } - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - - return ret; -} - -bool intel_has_gpu_reset(struct drm_i915_private *i915) -{ - if (!i915_modparams.reset) - return NULL; - - return intel_get_gpu_reset(i915); -} - -bool intel_has_reset_engine(struct drm_i915_private *i915) -{ - return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; -} - -int intel_reset_guc(struct drm_i915_private *i915) -{ - u32 guc_domain = - INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; - int ret; - - GEM_BUG_ON(!HAS_GUC(i915)); - - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - ret = gen6_hw_domain_reset(i915, guc_domain); - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - - return ret; -} - -/* - * Ensure irq handler finishes, and not run again. - * Also return the active request so that we only search for it once. - */ -static void reset_prepare_engine(struct intel_engine_cs *engine) -{ - /* - * During the reset sequence, we must prevent the engine from - * entering RC6. As the context state is undefined until we restart - * the engine, if it does enter RC6 during the reset, the state - * written to the powercontext is undefined and so we may lose - * GPU state upon resume, i.e. fail to restart after a reset. - */ - intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); - engine->reset.prepare(engine); -} - -static void revoke_mmaps(struct drm_i915_private *i915) -{ - int i; - - for (i = 0; i < i915->num_fence_regs; i++) { - struct drm_vma_offset_node *node; - struct i915_vma *vma; - u64 vma_offset; - - vma = READ_ONCE(i915->fence_regs[i].vma); - if (!vma) - continue; - - if (!i915_vma_has_userfault(vma)) - continue; - - GEM_BUG_ON(vma->fence != &i915->fence_regs[i]); - node = &vma->obj->base.vma_node; - vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; - unmap_mapping_range(i915->drm.anon_inode->i_mapping, - drm_vma_node_offset_addr(node) + vma_offset, - vma->size, - 1); - } -} - -static void reset_prepare(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) - reset_prepare_engine(engine); - - intel_uc_reset_prepare(i915); -} - -static void gt_revoke(struct drm_i915_private *i915) -{ - revoke_mmaps(i915); -} - -static int gt_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - /* - * Everything depends on having the GTT running, so we need to start - * there. - */ - err = i915_ggtt_enable_hw(i915); - if (err) - return err; - - for_each_engine(engine, i915, id) - intel_engine_reset(engine, stalled_mask & engine->mask); - - i915_gem_restore_fences(i915); - - return err; -} - -static void reset_finish_engine(struct intel_engine_cs *engine) -{ - engine->reset.finish(engine); - intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); -} - -struct i915_gpu_restart { - struct work_struct work; - struct drm_i915_private *i915; -}; - -static void restart_work(struct work_struct *work) -{ - struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work); - struct drm_i915_private *i915 = arg->i915; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - - wakeref = intel_runtime_pm_get(i915); - mutex_lock(&i915->drm.struct_mutex); - WRITE_ONCE(i915->gpu_error.restart, NULL); - - for_each_engine(engine, i915, id) { - struct i915_request *rq; - - /* - * Ostensibily, we always want a context loaded for powersaving, - * so if the engine is idle after the reset, send a request - * to load our scratch kernel_context. - */ - if (!intel_engine_is_idle(engine)) - continue; - - rq = i915_request_alloc(engine, i915->kernel_context); - if (!IS_ERR(rq)) - i915_request_add(rq); - } - - mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915, wakeref); - - kfree(arg); -} - -static void reset_finish(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - reset_finish_engine(engine); - intel_engine_signal_breadcrumbs(engine); - } -} - -static void reset_restart(struct drm_i915_private *i915) -{ - struct i915_gpu_restart *arg; - - /* - * Following the reset, ensure that we always reload context for - * powersaving, and to correct engine->last_retired_context. Since - * this requires us to submit a request, queue a worker to do that - * task for us to evade any locking here. - */ - if (READ_ONCE(i915->gpu_error.restart)) - return; - - arg = kmalloc(sizeof(*arg), GFP_KERNEL); - if (arg) { - arg->i915 = i915; - INIT_WORK(&arg->work, restart_work); - - WRITE_ONCE(i915->gpu_error.restart, arg); - queue_work(i915->wq, &arg->work); - } -} - -static void nop_submit_request(struct i915_request *request) -{ - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - - GEM_TRACE("%s fence %llx:%lld -> -EIO\n", - engine->name, request->fence.context, request->fence.seqno); - dma_fence_set_error(&request->fence, -EIO); - - spin_lock_irqsave(&engine->timeline.lock, flags); - __i915_request_submit(request); - i915_request_mark_complete(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); - - intel_engine_queue_breadcrumbs(engine); -} - -static void __i915_gem_set_wedged(struct drm_i915_private *i915) -{ - struct i915_gpu_error *error = &i915->gpu_error; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (test_bit(I915_WEDGED, &error->flags)) - return; - - if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { - struct drm_printer p = drm_debug_printer(__func__); - - for_each_engine(engine, i915, id) - intel_engine_dump(engine, &p, "%s\n", engine->name); - } - - GEM_TRACE("start\n"); - - /* - * First, stop submission to hw, but do not yet complete requests by - * rolling the global seqno forward (since this would complete requests - * for which we haven't set the fence error to EIO yet). - */ - reset_prepare(i915); - - /* Even if the GPU reset fails, it should still stop the engines */ - if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_gpu_reset(i915, ALL_ENGINES); - - for_each_engine(engine, i915, id) { - engine->submit_request = nop_submit_request; - engine->schedule = NULL; - } - i915->caps.scheduler = 0; - - /* - * Make sure no request can slip through without getting completed by - * either this call here to intel_engine_write_global_seqno, or the one - * in nop_submit_request. - */ - synchronize_rcu_expedited(); - - /* Mark all executing requests as skipped */ - for_each_engine(engine, i915, id) - engine->cancel_requests(engine); - - reset_finish(i915); - - smp_mb__before_atomic(); - set_bit(I915_WEDGED, &error->flags); - - GEM_TRACE("end\n"); -} - -void i915_gem_set_wedged(struct drm_i915_private *i915) -{ - struct i915_gpu_error *error = &i915->gpu_error; - intel_wakeref_t wakeref; - - mutex_lock(&error->wedge_mutex); - with_intel_runtime_pm(i915, wakeref) - __i915_gem_set_wedged(i915); - mutex_unlock(&error->wedge_mutex); -} - -static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) -{ - struct i915_gpu_error *error = &i915->gpu_error; - struct i915_timeline *tl; - - if (!test_bit(I915_WEDGED, &error->flags)) - return true; - - if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ - return false; - - GEM_TRACE("start\n"); - - /* - * Before unwedging, make sure that all pending operations - * are flushed and errored out - we may have requests waiting upon - * third party fences. We marked all inflight requests as EIO, and - * every execbuf since returned EIO, for consistency we want all - * the currently pending requests to also be marked as EIO, which - * is done inside our nop_submit_request - and so we must wait. - * - * No more can be submitted until we reset the wedged bit. - */ - mutex_lock(&i915->gt.timelines.mutex); - list_for_each_entry(tl, &i915->gt.timelines.active_list, link) { - struct i915_request *rq; - - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) - continue; - - /* - * All internal dependencies (i915_requests) will have - * been flushed by the set-wedge, but we may be stuck waiting - * for external fences. These should all be capped to 10s - * (I915_FENCE_TIMEOUT) so this wait should not be unbounded - * in the worst case. - */ - dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); - } - mutex_unlock(&i915->gt.timelines.mutex); - - intel_engines_sanitize(i915, false); - - /* - * Undo nop_submit_request. We prevent all new i915 requests from - * being queued (by disallowing execbuf whilst wedged) so having - * waited for all active requests above, we know the system is idle - * and do not have to worry about a thread being inside - * engine->submit_request() as we swap over. So unlike installing - * the nop_submit_request on reset, we can do this from normal - * context and do not require stop_machine(). - */ - intel_engines_reset_default_submission(i915); - - GEM_TRACE("end\n"); - - smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ - clear_bit(I915_WEDGED, &i915->gpu_error.flags); - - return true; -} - -bool i915_gem_unset_wedged(struct drm_i915_private *i915) -{ - struct i915_gpu_error *error = &i915->gpu_error; - bool result; - - mutex_lock(&error->wedge_mutex); - result = __i915_gem_unset_wedged(i915); - mutex_unlock(&error->wedge_mutex); - - return result; -} - -static int do_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask) -{ - int err, i; - - gt_revoke(i915); - - err = intel_gpu_reset(i915, ALL_ENGINES); - for (i = 0; err && i < RESET_MAX_RETRIES; i++) { - msleep(10 * (i + 1)); - err = intel_gpu_reset(i915, ALL_ENGINES); - } - if (err) - return err; - - return gt_reset(i915, stalled_mask); -} - -/** - * i915_reset - reset chip after a hang - * @i915: #drm_i915_private to reset - * @stalled_mask: mask of the stalled engines with the guilty requests - * @reason: user error message for why we are resetting - * - * Reset the chip. Useful if a hang is detected. Marks the device as wedged - * on failure. - * - * Procedure is fairly simple: - * - reset the chip using the reset reg - * - re-init context state - * - re-init hardware status page - * - re-init ring buffer - * - re-init interrupt state - * - re-init display - */ -void i915_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask, - const char *reason) -{ - struct i915_gpu_error *error = &i915->gpu_error; - int ret; - - GEM_TRACE("flags=%lx\n", error->flags); - - might_sleep(); - assert_rpm_wakelock_held(i915); - GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - - /* Clear any previous failed attempts at recovery. Time to try again. */ - if (!__i915_gem_unset_wedged(i915)) - return; - - if (reason) - dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); - error->reset_count++; - - reset_prepare(i915); - - if (!intel_has_gpu_reset(i915)) { - if (i915_modparams.reset) - dev_err(i915->drm.dev, "GPU reset not supported\n"); - else - DRM_DEBUG_DRIVER("GPU reset disabled\n"); - goto error; - } - - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_runtime_pm_disable_interrupts(i915); - - if (do_reset(i915, stalled_mask)) { - dev_err(i915->drm.dev, "Failed to reset chip\n"); - goto taint; - } - - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_runtime_pm_enable_interrupts(i915); - - intel_overlay_reset(i915); - - /* - * Next we need to restore the context, but we don't use those - * yet either... - * - * Ring buffer needs to be re-initialized in the KMS case, or if X - * was running at the time of the reset (i.e. we weren't VT - * switched away). - */ - ret = i915_gem_init_hw(i915); - if (ret) { - DRM_ERROR("Failed to initialise HW following reset (%d)\n", - ret); - goto error; - } - - i915_queue_hangcheck(i915); - -finish: - reset_finish(i915); - if (!__i915_wedged(error)) - reset_restart(i915); - return; - -taint: - /* - * History tells us that if we cannot reset the GPU now, we - * never will. This then impacts everything that is run - * subsequently. On failing the reset, we mark the driver - * as wedged, preventing further execution on the GPU. - * We also want to go one step further and add a taint to the - * kernel so that any subsequent faults can be traced back to - * this failure. This is important for CI, where if the - * GPU/driver fails we would like to reboot and restart testing - * rather than continue on into oblivion. For everyone else, - * the system should still plod along, but they have been warned! - */ - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); -error: - __i915_gem_set_wedged(i915); - goto finish; -} - -static inline int intel_gt_reset_engine(struct drm_i915_private *i915, - struct intel_engine_cs *engine) -{ - return intel_gpu_reset(i915, engine->mask); -} - -/** - * i915_reset_engine - reset GPU engine to recover from a hang - * @engine: engine to reset - * @msg: reason for GPU reset; or NULL for no dev_notice() - * - * Reset a specific GPU engine. Useful if a hang is detected. - * Returns zero on successful reset or otherwise an error code. - * - * Procedure is: - * - identifies the request that caused the hang and it is dropped - * - reset engine (which will force the engine to idle) - * - re-init/configure engine - */ -int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) -{ - struct i915_gpu_error *error = &engine->i915->gpu_error; - int ret; - - GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); - GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - - reset_prepare_engine(engine); - - if (msg) - dev_notice(engine->i915->drm.dev, - "Resetting %s for %s\n", engine->name, msg); - error->reset_engine_count[engine->id]++; - - if (!engine->i915->guc.execbuf_client) - ret = intel_gt_reset_engine(engine->i915, engine); - else - ret = intel_guc_reset_engine(&engine->i915->guc, engine); - if (ret) { - /* If we fail here, we expect to fallback to a global reset */ - DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->i915->guc.execbuf_client ? "GuC " : "", - engine->name, ret); - goto out; - } - - /* - * The request that caused the hang is stuck on elsp, we know the - * active request and can drop it, adjust head to skip the offending - * request to resume executing remaining requests in the queue. - */ - intel_engine_reset(engine, true); - - /* - * The engine and its registers (and workarounds in case of render) - * have been reset to their default values. Follow the init_ring - * process to program RING_MODE, HWSP and re-enable submission. - */ - ret = engine->init_hw(engine); - if (ret) - goto out; - -out: - intel_engine_cancel_stop_cs(engine); - reset_finish_engine(engine); - return ret; -} - -static void i915_reset_device(struct drm_i915_private *i915, - u32 engine_mask, - const char *reason) -{ - struct i915_gpu_error *error = &i915->gpu_error; - struct kobject *kobj = &i915->drm.primary->kdev->kobj; - char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; - char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; - char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; - struct i915_wedge_me w; - - kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); - - DRM_DEBUG_DRIVER("resetting chip\n"); - kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); - - /* Use a watchdog to ensure that our reset completes */ - i915_wedge_on_timeout(&w, i915, 5 * HZ) { - intel_prepare_reset(i915); - - /* Flush everyone using a resource about to be clobbered */ - synchronize_srcu_expedited(&error->reset_backoff_srcu); - - mutex_lock(&error->wedge_mutex); - i915_reset(i915, engine_mask, reason); - mutex_unlock(&error->wedge_mutex); - - intel_finish_reset(i915); - } - - if (!test_bit(I915_WEDGED, &error->flags)) - kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); -} - -static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) -{ - intel_uncore_rmw(uncore, reg, 0, 0); -} - -void i915_clear_error_registers(struct drm_i915_private *i915) -{ - struct intel_uncore *uncore = &i915->uncore; - u32 eir; - - if (!IS_GEN(i915, 2)) - clear_register(uncore, PGTBL_ER); - - if (INTEL_GEN(i915) < 4) - clear_register(uncore, IPEIR(RENDER_RING_BASE)); - else - clear_register(uncore, IPEIR_I965); - - clear_register(uncore, EIR); - eir = intel_uncore_read(uncore, EIR); - if (eir) { - /* - * some errors might have become stuck, - * mask them. - */ - DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); - rmw_set(uncore, EMR, eir); - intel_uncore_write(uncore, GEN2_IIR, - I915_MASTER_ERROR_INTERRUPT); - } - - if (INTEL_GEN(i915) >= 8) { - rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); - intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); - } else if (INTEL_GEN(i915) >= 6) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - rmw_clear(uncore, - RING_FAULT_REG(engine), RING_FAULT_VALID); - intel_uncore_posting_read(uncore, - RING_FAULT_REG(engine)); - } - } -} - -/** - * i915_handle_error - handle a gpu error - * @i915: i915 device private - * @engine_mask: mask representing engines that are hung - * @flags: control flags - * @fmt: Error message format string - * - * Do some basic checking of register state at error time and - * dump it to the syslog. Also call i915_capture_error_state() to make - * sure we get a record and make it available in debugfs. Fire a uevent - * so userspace knows something bad happened (should trigger collection - * of a ring dump etc.). - */ -void i915_handle_error(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned long flags, - const char *fmt, ...) -{ - struct i915_gpu_error *error = &i915->gpu_error; - struct intel_engine_cs *engine; - intel_wakeref_t wakeref; - intel_engine_mask_t tmp; - char error_msg[80]; - char *msg = NULL; - - if (fmt) { - va_list args; - - va_start(args, fmt); - vscnprintf(error_msg, sizeof(error_msg), fmt, args); - va_end(args); - - msg = error_msg; - } - - /* - * In most cases it's guaranteed that we get here with an RPM - * reference held, for example because there is a pending GPU - * request that won't finish until the reset is done. This - * isn't the case at least when we get here by doing a - * simulated reset via debugfs, so get an RPM reference. - */ - wakeref = intel_runtime_pm_get(i915); - - engine_mask &= INTEL_INFO(i915)->engine_mask; - - if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(i915, engine_mask, msg); - i915_clear_error_registers(i915); - } - - /* - * Try engine reset when available. We fall back to full reset if - * single reset fails. - */ - if (intel_has_reset_engine(i915) && !__i915_wedged(error)) { - for_each_engine_masked(engine, i915, engine_mask, tmp) { - BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); - if (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &error->flags)) - continue; - - if (i915_reset_engine(engine, msg) == 0) - engine_mask &= ~engine->mask; - - clear_bit(I915_RESET_ENGINE + engine->id, - &error->flags); - wake_up_bit(&error->flags, - I915_RESET_ENGINE + engine->id); - } - } - - if (!engine_mask) - goto out; - - /* Full reset needs the mutex, stop any other user trying to do so. */ - if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) { - wait_event(error->reset_queue, - !test_bit(I915_RESET_BACKOFF, &error->flags)); - goto out; /* piggy-back on the other reset */ - } - - /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */ - synchronize_rcu_expedited(); - - /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, i915, tmp) { - while (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &error->flags)) - wait_on_bit(&error->flags, - I915_RESET_ENGINE + engine->id, - TASK_UNINTERRUPTIBLE); - } - - i915_reset_device(i915, engine_mask, msg); - - for_each_engine(engine, i915, tmp) { - clear_bit(I915_RESET_ENGINE + engine->id, - &error->flags); - } - - clear_bit(I915_RESET_BACKOFF, &error->flags); - wake_up_all(&error->reset_queue); - -out: - intel_runtime_pm_put(i915, wakeref); -} - -int i915_reset_trylock(struct drm_i915_private *i915) -{ - struct i915_gpu_error *error = &i915->gpu_error; - int srcu; - - might_lock(&error->reset_backoff_srcu); - might_sleep(); - - rcu_read_lock(); - while (test_bit(I915_RESET_BACKOFF, &error->flags)) { - rcu_read_unlock(); - - if (wait_event_interruptible(error->reset_queue, - !test_bit(I915_RESET_BACKOFF, - &error->flags))) - return -EINTR; - - rcu_read_lock(); - } - srcu = srcu_read_lock(&error->reset_backoff_srcu); - rcu_read_unlock(); - - return srcu; -} - -void i915_reset_unlock(struct drm_i915_private *i915, int tag) -__releases(&i915->gpu_error.reset_backoff_srcu) -{ - struct i915_gpu_error *error = &i915->gpu_error; - - srcu_read_unlock(&error->reset_backoff_srcu, tag); -} - -int i915_terminally_wedged(struct drm_i915_private *i915) -{ - struct i915_gpu_error *error = &i915->gpu_error; - - might_sleep(); - - if (!__i915_wedged(error)) - return 0; - - /* Reset still in progress? Maybe we will recover? */ - if (!test_bit(I915_RESET_BACKOFF, &error->flags)) - return -EIO; - - /* XXX intel_reset_finish() still takes struct_mutex!!! */ - if (mutex_is_locked(&i915->drm.struct_mutex)) - return -EAGAIN; - - if (wait_event_interruptible(error->reset_queue, - !test_bit(I915_RESET_BACKOFF, - &error->flags))) - return -EINTR; - - return __i915_wedged(error) ? -EIO : 0; -} - -bool i915_reset_flush(struct drm_i915_private *i915) -{ - int err; - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - flush_workqueue(i915->wq); - GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart)); - - mutex_lock(&i915->drm.struct_mutex); - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - MAX_SCHEDULE_TIMEOUT); - mutex_unlock(&i915->drm.struct_mutex); - - return !err; -} - -static void i915_wedge_me(struct work_struct *work) -{ - struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); - - dev_err(w->i915->drm.dev, - "%s timed out, cancelling all in-flight rendering.\n", - w->name); - i915_gem_set_wedged(w->i915); -} - -void __i915_init_wedge(struct i915_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name) -{ - w->i915 = i915; - w->name = name; - - INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -void __i915_fini_wedge(struct i915_wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h deleted file mode 100644 index 3c0450289b8f..000000000000 --- a/drivers/gpu/drm/i915/i915_reset.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2008-2018 Intel Corporation - */ - -#ifndef I915_RESET_H -#define I915_RESET_H - -#include -#include -#include - -#include "intel_engine_types.h" - -struct drm_i915_private; -struct i915_request; -struct intel_engine_cs; -struct intel_guc; - -__printf(4, 5) -void i915_handle_error(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned long flags, - const char *fmt, ...); -#define I915_ERROR_CAPTURE BIT(0) - -void i915_clear_error_registers(struct drm_i915_private *i915); - -void i915_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask, - const char *reason); -int i915_reset_engine(struct intel_engine_cs *engine, - const char *reason); - -void i915_reset_request(struct i915_request *rq, bool guilty); -bool i915_reset_flush(struct drm_i915_private *i915); - -int __must_check i915_reset_trylock(struct drm_i915_private *i915); -void i915_reset_unlock(struct drm_i915_private *i915, int tag); - -int i915_terminally_wedged(struct drm_i915_private *i915); - -bool intel_has_gpu_reset(struct drm_i915_private *i915); -bool intel_has_reset_engine(struct drm_i915_private *i915); - -int intel_gpu_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask); - -int intel_reset_guc(struct drm_i915_private *i915); - -struct i915_wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const char *name; -}; - -void __i915_init_wedge(struct i915_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name); -void __i915_fini_wedge(struct i915_wedge_me *w); - -#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \ - (W)->i915; \ - __i915_fini_wedge((W))) - -#endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index f1af3916a808..166a457884b2 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -9,8 +9,8 @@ #include +#include "gt/intel_engine_types.h" #include "i915_priolist_types.h" -#include "intel_engine_types.h" struct drm_i915_private; struct i915_request; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 12893304c8f8..b5286f3d8146 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -8,9 +8,10 @@ #include +#include "gt/intel_engine.h" + #include "i915_drv.h" #include "intel_drv.h" -#include "intel_ringbuffer.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM i915 diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 36726392e737..d4d308b6d1d8 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -22,11 +22,12 @@ * */ +#include "gt/intel_engine.h" + #include "i915_vma.h" #include "i915_drv.h" #include "i915_globals.h" -#include "intel_ringbuffer.h" #include "intel_frontbuffer.h" #include diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c deleted file mode 100644 index 3cbffd400b1b..000000000000 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include - -#include "i915_drv.h" - -static void irq_enable(struct intel_engine_cs *engine) -{ - if (!engine->irq_enable) - return; - - /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); -} - -static void irq_disable(struct intel_engine_cs *engine) -{ - if (!engine->irq_disable) - return; - - /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); -} - -static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) -{ - lockdep_assert_held(&b->irq_lock); - - GEM_BUG_ON(!b->irq_enabled); - if (!--b->irq_enabled) - irq_disable(container_of(b, - struct intel_engine_cs, - breadcrumbs)); - - b->irq_armed = false; -} - -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - if (!b->irq_armed) - return; - - spin_lock_irq(&b->irq_lock); - if (b->irq_armed) - __intel_breadcrumbs_disarm_irq(b); - spin_unlock_irq(&b->irq_lock); -} - -static inline bool __request_completed(const struct i915_request *rq) -{ - return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); -} - -void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_context *ce, *cn; - struct list_head *pos, *next; - LIST_HEAD(signal); - - spin_lock(&b->irq_lock); - - if (b->irq_armed && list_empty(&b->signalers)) - __intel_breadcrumbs_disarm_irq(b); - - list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { - GEM_BUG_ON(list_empty(&ce->signals)); - - list_for_each_safe(pos, next, &ce->signals) { - struct i915_request *rq = - list_entry(pos, typeof(*rq), signal_link); - - if (!__request_completed(rq)) - break; - - GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, - &rq->fence.flags)); - - /* - * Queue for execution after dropping the signaling - * spinlock as the callback chain may end up adding - * more signalers to the same context or engine. - */ - i915_request_get(rq); - - /* - * We may race with direct invocation of - * dma_fence_signal(), e.g. i915_request_retire(), - * so we need to acquire our reference to the request - * before we cancel the breadcrumb. - */ - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - list_add_tail(&rq->signal_link, &signal); - } - - /* - * We process the list deletion in bulk, only using a list_add - * (not list_move) above but keeping the status of - * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit. - */ - if (!list_is_first(pos, &ce->signals)) { - /* Advance the list to the first incomplete request */ - __list_del_many(&ce->signals, pos); - if (&ce->signals == pos) /* now empty */ - list_del_init(&ce->signal_link); - } - } - - spin_unlock(&b->irq_lock); - - list_for_each_safe(pos, next, &signal) { - struct i915_request *rq = - list_entry(pos, typeof(*rq), signal_link); - - dma_fence_signal(&rq->fence); - i915_request_put(rq); - } -} - -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) -{ - local_irq_disable(); - intel_engine_breadcrumbs_irq(engine); - local_irq_enable(); -} - -static void signal_irq_work(struct irq_work *work) -{ - struct intel_engine_cs *engine = - container_of(work, typeof(*engine), breadcrumbs.irq_work); - - intel_engine_breadcrumbs_irq(engine); -} - -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->irq_lock); - if (!b->irq_enabled++) - irq_enable(engine); - GEM_BUG_ON(!b->irq_enabled); /* no overflow! */ - spin_unlock_irq(&b->irq_lock); -} - -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->irq_lock); - GEM_BUG_ON(!b->irq_enabled); /* no underflow! */ - if (!--b->irq_enabled) - irq_disable(engine); - spin_unlock_irq(&b->irq_lock); -} - -static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) -{ - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - lockdep_assert_held(&b->irq_lock); - if (b->irq_armed) - return; - - /* - * The breadcrumb irq will be disarmed on the interrupt after the - * waiters are signaled. This gives us a single interrupt window in - * which we can add a new waiter and avoid the cost of re-enabling - * the irq. - */ - b->irq_armed = true; - - /* - * Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. This is tracked - * by i915->gt.awake, we can forgo holding our own wakref - * for the interrupt as before i915->gt.awake is released (when - * the driver is idle) we disarm the breadcrumbs. - */ - - if (!b->irq_enabled++) - irq_enable(engine); -} - -void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_init(&b->irq_lock); - INIT_LIST_HEAD(&b->signalers); - - init_irq_work(&b->irq_work, signal_irq_work); -} - -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; - - spin_lock_irqsave(&b->irq_lock, flags); - - if (b->irq_enabled) - irq_enable(engine); - else - irq_disable(engine); - - spin_unlock_irqrestore(&b->irq_lock, flags); -} - -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) -{ -} - -bool i915_request_enable_breadcrumb(struct i915_request *rq) -{ - struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); - - if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) - return true; - - spin_lock(&b->irq_lock); - if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) && - !__request_completed(rq)) { - struct intel_context *ce = rq->hw_context; - struct list_head *pos; - - __intel_breadcrumbs_arm_irq(b); - - /* - * We keep the seqno in retirement order, so we can break - * inside intel_engine_breadcrumbs_irq as soon as we've passed - * the last completed request (or seen a request that hasn't - * event started). We could iterate the timeline->requests list, - * but keeping a separate signalers_list has the advantage of - * hopefully being much smaller than the full list and so - * provides faster iteration and detection when there are no - * more interrupts required for this context. - * - * We typically expect to add new signalers in order, so we - * start looking for our insertion point from the tail of - * the list. - */ - list_for_each_prev(pos, &ce->signals) { - struct i915_request *it = - list_entry(pos, typeof(*it), signal_link); - - if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) - break; - } - list_add(&rq->signal_link, pos); - if (pos == &ce->signals) /* catch transitions from empty list */ - list_move_tail(&ce->signal_link, &b->signalers); - - set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - } - spin_unlock(&b->irq_lock); - - return !__request_completed(rq); -} - -void i915_request_cancel_breadcrumb(struct i915_request *rq) -{ - struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - - if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) - return; - - spin_lock(&b->irq_lock); - if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { - struct intel_context *ce = rq->hw_context; - - list_del(&rq->signal_link); - if (list_empty(&ce->signals)) - list_del_init(&ce->signal_link); - - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - } - spin_unlock(&b->irq_lock); -} - -void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, - struct drm_printer *p) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_context *ce; - struct i915_request *rq; - - if (list_empty(&b->signalers)) - return; - - drm_printf(p, "Signals:\n"); - - spin_lock_irq(&b->irq_lock); - list_for_each_entry(ce, &b->signalers, signal_link) { - list_for_each_entry(rq, &ce->signals, signal_link) { - drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", - rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - "", - jiffies_to_msecs(jiffies - rq->emitted_jiffies)); - } - } - spin_unlock_irq(&b->irq_lock); -} diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c deleted file mode 100644 index 961d1445833d..000000000000 --- a/drivers/gpu/drm/i915/intel_context.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "i915_gem_context.h" -#include "i915_globals.h" -#include "intel_context.h" -#include "intel_ringbuffer.h" - -static struct i915_global_context { - struct i915_global base; - struct kmem_cache *slab_ce; -} global; - -struct intel_context *intel_context_alloc(void) -{ - return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); -} - -void intel_context_free(struct intel_context *ce) -{ - kmem_cache_free(global.slab_ce, ce); -} - -struct intel_context * -intel_context_lookup(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct intel_context *ce = NULL; - struct rb_node *p; - - spin_lock(&ctx->hw_contexts_lock); - p = ctx->hw_contexts.rb_node; - while (p) { - struct intel_context *this = - rb_entry(p, struct intel_context, node); - - if (this->engine == engine) { - GEM_BUG_ON(this->gem_context != ctx); - ce = this; - break; - } - - if (this->engine < engine) - p = p->rb_right; - else - p = p->rb_left; - } - spin_unlock(&ctx->hw_contexts_lock); - - return ce; -} - -struct intel_context * -__intel_context_insert(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context *ce) -{ - struct rb_node **p, *parent; - int err = 0; - - spin_lock(&ctx->hw_contexts_lock); - - parent = NULL; - p = &ctx->hw_contexts.rb_node; - while (*p) { - struct intel_context *this; - - parent = *p; - this = rb_entry(parent, struct intel_context, node); - - if (this->engine == engine) { - err = -EEXIST; - ce = this; - break; - } - - if (this->engine < engine) - p = &parent->rb_right; - else - p = &parent->rb_left; - } - if (!err) { - rb_link_node(&ce->node, parent, p); - rb_insert_color(&ce->node, &ctx->hw_contexts); - } - - spin_unlock(&ctx->hw_contexts_lock); - - return ce; -} - -void __intel_context_remove(struct intel_context *ce) -{ - struct i915_gem_context *ctx = ce->gem_context; - - spin_lock(&ctx->hw_contexts_lock); - rb_erase(&ce->node, &ctx->hw_contexts); - spin_unlock(&ctx->hw_contexts_lock); -} - -static struct intel_context * -intel_context_instance(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct intel_context *ce, *pos; - - ce = intel_context_lookup(ctx, engine); - if (likely(ce)) - return ce; - - ce = intel_context_alloc(); - if (!ce) - return ERR_PTR(-ENOMEM); - - intel_context_init(ce, ctx, engine); - - pos = __intel_context_insert(ctx, engine, ce); - if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */ - intel_context_free(ce); - - GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos); - return pos; -} - -struct intel_context * -intel_context_pin_lock(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) - __acquires(ce->pin_mutex) -{ - struct intel_context *ce; - - ce = intel_context_instance(ctx, engine); - if (IS_ERR(ce)) - return ce; - - if (mutex_lock_interruptible(&ce->pin_mutex)) - return ERR_PTR(-EINTR); - - return ce; -} - -struct intel_context * -intel_context_pin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct intel_context *ce; - int err; - - ce = intel_context_instance(ctx, engine); - if (IS_ERR(ce)) - return ce; - - if (likely(atomic_inc_not_zero(&ce->pin_count))) - return ce; - - if (mutex_lock_interruptible(&ce->pin_mutex)) - return ERR_PTR(-EINTR); - - if (likely(!atomic_read(&ce->pin_count))) { - err = ce->ops->pin(ce); - if (err) - goto err; - - i915_gem_context_get(ctx); - GEM_BUG_ON(ce->gem_context != ctx); - - mutex_lock(&ctx->mutex); - list_add(&ce->active_link, &ctx->active_engines); - mutex_unlock(&ctx->mutex); - - intel_context_get(ce); - smp_mb__before_atomic(); /* flush pin before it is visible */ - } - - atomic_inc(&ce->pin_count); - GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ - - mutex_unlock(&ce->pin_mutex); - return ce; - -err: - mutex_unlock(&ce->pin_mutex); - return ERR_PTR(err); -} - -void intel_context_unpin(struct intel_context *ce) -{ - if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) - return; - - /* We may be called from inside intel_context_pin() to evict another */ - intel_context_get(ce); - mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); - - if (likely(atomic_dec_and_test(&ce->pin_count))) { - ce->ops->unpin(ce); - - mutex_lock(&ce->gem_context->mutex); - list_del(&ce->active_link); - mutex_unlock(&ce->gem_context->mutex); - - i915_gem_context_put(ce->gem_context); - intel_context_put(ce); - } - - mutex_unlock(&ce->pin_mutex); - intel_context_put(ce); -} - -static void intel_context_retire(struct i915_active_request *active, - struct i915_request *rq) -{ - struct intel_context *ce = - container_of(active, typeof(*ce), active_tracker); - - intel_context_unpin(ce); -} - -void -intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - kref_init(&ce->ref); - - ce->gem_context = ctx; - ce->engine = engine; - ce->ops = engine->cops; - ce->sseu = engine->sseu; - - INIT_LIST_HEAD(&ce->signal_link); - INIT_LIST_HEAD(&ce->signals); - - mutex_init(&ce->pin_mutex); - - i915_active_request_init(&ce->active_tracker, - NULL, intel_context_retire); -} - -static void i915_global_context_shrink(void) -{ - kmem_cache_shrink(global.slab_ce); -} - -static void i915_global_context_exit(void) -{ - kmem_cache_destroy(global.slab_ce); -} - -static struct i915_global_context global = { { - .shrink = i915_global_context_shrink, - .exit = i915_global_context_exit, -} }; - -int __init i915_global_context_init(void) -{ - global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); - if (!global.slab_ce) - return -ENOMEM; - - i915_global_register(&global.base); - return 0; -} diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h deleted file mode 100644 index ebc861b1a49e..000000000000 --- a/drivers/gpu/drm/i915/intel_context.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef __INTEL_CONTEXT_H__ -#define __INTEL_CONTEXT_H__ - -#include - -#include "intel_context_types.h" -#include "intel_engine_types.h" - -struct intel_context *intel_context_alloc(void); -void intel_context_free(struct intel_context *ce); - -void intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - -/** - * intel_context_lookup - Find the matching HW context for this (ctx, engine) - * @ctx - the parent GEM context - * @engine - the target HW engine - * - * May return NULL if the HW context hasn't been instantiated (i.e. unused). - */ -struct intel_context * -intel_context_lookup(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - -/** - * intel_context_pin_lock - Stablises the 'pinned' status of the HW context - * @ctx - the parent GEM context - * @engine - the target HW engine - * - * Acquire a lock on the pinned status of the HW context, such that the context - * can neither be bound to the GPU or unbound whilst the lock is held, i.e. - * intel_context_is_pinned() remains stable. - */ -struct intel_context * -intel_context_pin_lock(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - -static inline bool -intel_context_is_pinned(struct intel_context *ce) -{ - return atomic_read(&ce->pin_count); -} - -static inline void intel_context_pin_unlock(struct intel_context *ce) -__releases(ce->pin_mutex) -{ - mutex_unlock(&ce->pin_mutex); -} - -struct intel_context * -__intel_context_insert(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context *ce); -void -__intel_context_remove(struct intel_context *ce); - -struct intel_context * -intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine); - -static inline void __intel_context_pin(struct intel_context *ce) -{ - GEM_BUG_ON(!intel_context_is_pinned(ce)); - atomic_inc(&ce->pin_count); -} - -void intel_context_unpin(struct intel_context *ce); - -static inline struct intel_context *intel_context_get(struct intel_context *ce) -{ - kref_get(&ce->ref); - return ce; -} - -static inline void intel_context_put(struct intel_context *ce) -{ - kref_put(&ce->ref, ce->ops->destroy); -} - -#endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h deleted file mode 100644 index 9ec4f787c908..000000000000 --- a/drivers/gpu/drm/i915/intel_context_types.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef __INTEL_CONTEXT_TYPES__ -#define __INTEL_CONTEXT_TYPES__ - -#include -#include -#include -#include -#include - -#include "i915_active_types.h" -#include "intel_sseu.h" - -struct i915_gem_context; -struct i915_vma; -struct intel_context; -struct intel_ring; - -struct intel_context_ops { - int (*pin)(struct intel_context *ce); - void (*unpin)(struct intel_context *ce); - - void (*reset)(struct intel_context *ce); - void (*destroy)(struct kref *kref); -}; - -struct intel_context { - struct kref ref; - - struct i915_gem_context *gem_context; - struct intel_engine_cs *engine; - struct intel_engine_cs *active; - - struct list_head active_link; - struct list_head signal_link; - struct list_head signals; - - struct i915_vma *state; - struct intel_ring *ring; - - u32 *lrc_reg_state; - u64 lrc_desc; - - atomic_t pin_count; - struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ - - /** - * active_tracker: Active tracker for the external rq activity - * on this intel_context object. - */ - struct i915_active_request active_tracker; - - const struct intel_context_ops *ops; - struct rb_node node; - - /** sseu: Control eu/slice partitioning */ - struct intel_sseu sseu; -}; - -#endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 1598c7079ffd..5a2e17d6146b 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -27,9 +27,11 @@ #include -#include "intel_engine_types.h" +#include "gt/intel_engine_types.h" +#include "gt/intel_context_types.h" +#include "gt/intel_sseu.h" + #include "intel_display.h" -#include "intel_sseu.h" struct drm_printer; struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 62d663e506ab..c3d1d38ccf4d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -46,7 +46,6 @@ #include "i915_drv.h" #include "i915_gem_clflush.h" -#include "i915_reset.h" #include "i915_trace.h" #include "intel_atomic_plane.h" #include "intel_color.h" diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c deleted file mode 100644 index 202b4b7a24f1..000000000000 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ /dev/null @@ -1,1761 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include - -#include "i915_drv.h" -#include "i915_reset.h" -#include "intel_ringbuffer.h" -#include "intel_lrc.h" - -/* Haswell does have the CXT_SIZE register however it does not appear to be - * valid. Now, docs explain in dwords what is in the context object. The full - * size is 70720 bytes, however, the power context and execlist context will - * never be saved (power context is stored elsewhere, and execlists don't work - * on HSW) - so the final size, including the extra state required for the - * Resource Streamer, is 66944 bytes, which rounds to 17 pages. - */ -#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) - -#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) -#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) -#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) - -#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) - -struct engine_class_info { - const char *name; - int (*init_legacy)(struct intel_engine_cs *engine); - int (*init_execlists)(struct intel_engine_cs *engine); - - u8 uabi_class; -}; - -static const struct engine_class_info intel_engine_classes[] = { - [RENDER_CLASS] = { - .name = "rcs", - .init_execlists = logical_render_ring_init, - .init_legacy = intel_init_render_ring_buffer, - .uabi_class = I915_ENGINE_CLASS_RENDER, - }, - [COPY_ENGINE_CLASS] = { - .name = "bcs", - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_blt_ring_buffer, - .uabi_class = I915_ENGINE_CLASS_COPY, - }, - [VIDEO_DECODE_CLASS] = { - .name = "vcs", - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_bsd_ring_buffer, - .uabi_class = I915_ENGINE_CLASS_VIDEO, - }, - [VIDEO_ENHANCEMENT_CLASS] = { - .name = "vecs", - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_vebox_ring_buffer, - .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, - }, -}; - -#define MAX_MMIO_BASES 3 -struct engine_info { - unsigned int hw_id; - u8 class; - u8 instance; - /* mmio bases table *must* be sorted in reverse gen order */ - struct engine_mmio_base { - u32 gen : 8; - u32 base : 24; - } mmio_bases[MAX_MMIO_BASES]; -}; - -static const struct engine_info intel_engines[] = { - [RCS0] = { - .hw_id = RCS0_HW, - .class = RENDER_CLASS, - .instance = 0, - .mmio_bases = { - { .gen = 1, .base = RENDER_RING_BASE } - }, - }, - [BCS0] = { - .hw_id = BCS0_HW, - .class = COPY_ENGINE_CLASS, - .instance = 0, - .mmio_bases = { - { .gen = 6, .base = BLT_RING_BASE } - }, - }, - [VCS0] = { - .hw_id = VCS0_HW, - .class = VIDEO_DECODE_CLASS, - .instance = 0, - .mmio_bases = { - { .gen = 11, .base = GEN11_BSD_RING_BASE }, - { .gen = 6, .base = GEN6_BSD_RING_BASE }, - { .gen = 4, .base = BSD_RING_BASE } - }, - }, - [VCS1] = { - .hw_id = VCS1_HW, - .class = VIDEO_DECODE_CLASS, - .instance = 1, - .mmio_bases = { - { .gen = 11, .base = GEN11_BSD2_RING_BASE }, - { .gen = 8, .base = GEN8_BSD2_RING_BASE } - }, - }, - [VCS2] = { - .hw_id = VCS2_HW, - .class = VIDEO_DECODE_CLASS, - .instance = 2, - .mmio_bases = { - { .gen = 11, .base = GEN11_BSD3_RING_BASE } - }, - }, - [VCS3] = { - .hw_id = VCS3_HW, - .class = VIDEO_DECODE_CLASS, - .instance = 3, - .mmio_bases = { - { .gen = 11, .base = GEN11_BSD4_RING_BASE } - }, - }, - [VECS0] = { - .hw_id = VECS0_HW, - .class = VIDEO_ENHANCEMENT_CLASS, - .instance = 0, - .mmio_bases = { - { .gen = 11, .base = GEN11_VEBOX_RING_BASE }, - { .gen = 7, .base = VEBOX_RING_BASE } - }, - }, - [VECS1] = { - .hw_id = VECS1_HW, - .class = VIDEO_ENHANCEMENT_CLASS, - .instance = 1, - .mmio_bases = { - { .gen = 11, .base = GEN11_VEBOX2_RING_BASE } - }, - }, -}; - -/** - * ___intel_engine_context_size() - return the size of the context for an engine - * @dev_priv: i915 device private - * @class: engine class - * - * Each engine class may require a different amount of space for a context - * image. - * - * Return: size (in bytes) of an engine class specific context image - * - * Note: this size includes the HWSP, which is part of the context image - * in LRC mode, but does not include the "shared data page" used with - * GuC submission. The caller should account for this if using the GuC. - */ -static u32 -__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) -{ - u32 cxt_size; - - BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); - - switch (class) { - case RENDER_CLASS: - switch (INTEL_GEN(dev_priv)) { - default: - MISSING_CASE(INTEL_GEN(dev_priv)); - return DEFAULT_LR_CONTEXT_RENDER_SIZE; - case 11: - return GEN11_LR_CONTEXT_RENDER_SIZE; - case 10: - return GEN10_LR_CONTEXT_RENDER_SIZE; - case 9: - return GEN9_LR_CONTEXT_RENDER_SIZE; - case 8: - return GEN8_LR_CONTEXT_RENDER_SIZE; - case 7: - if (IS_HASWELL(dev_priv)) - return HSW_CXT_TOTAL_SIZE; - - cxt_size = I915_READ(GEN7_CXT_SIZE); - return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, - PAGE_SIZE); - case 6: - cxt_size = I915_READ(CXT_SIZE); - return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, - PAGE_SIZE); - case 5: - case 4: - case 3: - case 2: - /* For the special day when i810 gets merged. */ - case 1: - return 0; - } - break; - default: - MISSING_CASE(class); - /* fall through */ - case VIDEO_DECODE_CLASS: - case VIDEO_ENHANCEMENT_CLASS: - case COPY_ENGINE_CLASS: - if (INTEL_GEN(dev_priv) < 8) - return 0; - return GEN8_LR_CONTEXT_OTHER_SIZE; - } -} - -static u32 __engine_mmio_base(struct drm_i915_private *i915, - const struct engine_mmio_base *bases) -{ - int i; - - for (i = 0; i < MAX_MMIO_BASES; i++) - if (INTEL_GEN(i915) >= bases[i].gen) - break; - - GEM_BUG_ON(i == MAX_MMIO_BASES); - GEM_BUG_ON(!bases[i].base); - - return bases[i].base; -} - -static void __sprint_engine_name(char *name, const struct engine_info *info) -{ - WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", - intel_engine_classes[info->class].name, - info->instance) >= INTEL_ENGINE_CS_MAX_NAME); -} - -void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask) -{ - /* - * Though they added more rings on g4x/ilk, they did not add - * per-engine HWSTAM until gen6. - */ - if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS) - return; - - if (INTEL_GEN(engine->i915) >= 3) - ENGINE_WRITE(engine, RING_HWSTAM, mask); - else - ENGINE_WRITE16(engine, RING_HWSTAM, mask); -} - -static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) -{ - /* Mask off all writes into the unknown HWSP */ - intel_engine_set_hwsp_writemask(engine, ~0u); -} - -static int -intel_engine_setup(struct drm_i915_private *dev_priv, - enum intel_engine_id id) -{ - const struct engine_info *info = &intel_engines[id]; - struct intel_engine_cs *engine; - - GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); - - BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); - BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); - - if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) - return -EINVAL; - - if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) - return -EINVAL; - - if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance])) - return -EINVAL; - - GEM_BUG_ON(dev_priv->engine[id]); - engine = kzalloc(sizeof(*engine), GFP_KERNEL); - if (!engine) - return -ENOMEM; - - BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); - - engine->id = id; - engine->mask = BIT(id); - engine->i915 = dev_priv; - engine->uncore = &dev_priv->uncore; - __sprint_engine_name(engine->name, info); - engine->hw_id = engine->guc_id = info->hw_id; - engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases); - engine->class = info->class; - engine->instance = info->instance; - - engine->uabi_class = intel_engine_classes[info->class].uabi_class; - - engine->context_size = __intel_engine_context_size(dev_priv, - engine->class); - if (WARN_ON(engine->context_size > BIT(20))) - engine->context_size = 0; - if (engine->context_size) - DRIVER_CAPS(dev_priv)->has_logical_contexts = true; - - /* Nothing to do here, execute in order of dependencies */ - engine->schedule = NULL; - - seqlock_init(&engine->stats.lock); - - ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); - - /* Scrub mmio state on takeover */ - intel_engine_sanitize_mmio(engine); - - dev_priv->engine_class[info->class][info->instance] = engine; - dev_priv->engine[id] = engine; - return 0; -} - -/** - * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers - * @dev_priv: i915 device private - * - * Return: non-zero if the initialization failed. - */ -int intel_engines_init_mmio(struct drm_i915_private *dev_priv) -{ - struct intel_device_info *device_info = mkwrite_device_info(dev_priv); - const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask; - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int mask = 0; - unsigned int i; - int err; - - WARN_ON(engine_mask == 0); - WARN_ON(engine_mask & - GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); - - if (i915_inject_load_failure()) - return -ENODEV; - - for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { - if (!HAS_ENGINE(dev_priv, i)) - continue; - - err = intel_engine_setup(dev_priv, i); - if (err) - goto cleanup; - - mask |= BIT(i); - } - - /* - * Catch failures to update intel_engines table when the new engines - * are added to the driver by a warning and disabling the forgotten - * engines. - */ - if (WARN_ON(mask != engine_mask)) - device_info->engine_mask = mask; - - /* We always presume we have at least RCS available for later probing */ - if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) { - err = -ENODEV; - goto cleanup; - } - - RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask); - - i915_check_and_clear_faults(dev_priv); - - return 0; - -cleanup: - for_each_engine(engine, dev_priv, id) - kfree(engine); - return err; -} - -/** - * intel_engines_init() - init the Engine Command Streamers - * @dev_priv: i915 device private - * - * Return: non-zero if the initialization failed. - */ -int intel_engines_init(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id, err_id; - int err; - - for_each_engine(engine, dev_priv, id) { - const struct engine_class_info *class_info = - &intel_engine_classes[engine->class]; - int (*init)(struct intel_engine_cs *engine); - - if (HAS_EXECLISTS(dev_priv)) - init = class_info->init_execlists; - else - init = class_info->init_legacy; - - err = -EINVAL; - err_id = id; - - if (GEM_DEBUG_WARN_ON(!init)) - goto cleanup; - - err = init(engine); - if (err) - goto cleanup; - - GEM_BUG_ON(!engine->submit_request); - } - - return 0; - -cleanup: - for_each_engine(engine, dev_priv, id) { - if (id >= err_id) { - kfree(engine); - dev_priv->engine[id] = NULL; - } else { - dev_priv->gt.cleanup_engine(engine); - } - } - return err; -} - -static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) -{ - i915_gem_batch_pool_init(&engine->batch_pool, engine); -} - -static void intel_engine_init_execlist(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - - execlists->port_mask = 1; - GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); - GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); - - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; -} - -static void cleanup_status_page(struct intel_engine_cs *engine) -{ - struct i915_vma *vma; - - /* Prevent writes into HWSP after returning the page to the system */ - intel_engine_set_hwsp_writemask(engine, ~0u); - - vma = fetch_and_zero(&engine->status_page.vma); - if (!vma) - return; - - if (!HWS_NEEDS_PHYSICAL(engine->i915)) - i915_vma_unpin(vma); - - i915_gem_object_unpin_map(vma->obj); - __i915_gem_object_release_unless_active(vma->obj); -} - -static int pin_ggtt_status_page(struct intel_engine_cs *engine, - struct i915_vma *vma) -{ - unsigned int flags; - - flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915)) - /* - * On g33, we cannot place HWS above 256MiB, so - * restrict its pinning to the low mappable arena. - * Though this restriction is not documented for - * gen4, gen5, or byt, they also behave similarly - * and hang if the HWS is placed at the top of the - * GTT. To generalise, it appears that all !llc - * platforms have issues with us placing the HWS - * above the mappable region (even though we never - * actually map it). - */ - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; - - return i915_vma_pin(vma, 0, 0, flags); -} - -static int init_status_page(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - void *vaddr; - int ret; - - /* - * Though the HWS register does support 36bit addresses, historically - * we have had hangs and corruption reported due to wild writes if - * the HWS is placed above 4G. We only allow objects to be allocated - * in GFP_DMA32 for i965, and no earlier physical address users had - * access to more than 4G. - */ - obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate status page\n"); - return PTR_ERR(obj); - } - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err; - } - - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err; - } - - engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); - engine->status_page.vma = vma; - - if (!HWS_NEEDS_PHYSICAL(engine->i915)) { - ret = pin_ggtt_status_page(engine, vma); - if (ret) - goto err_unpin; - } - - return 0; - -err_unpin: - i915_gem_object_unpin_map(obj); -err: - i915_gem_object_put(obj); - return ret; -} - -/** - * intel_engines_setup_common - setup engine state not requiring hw access - * @engine: Engine to setup. - * - * Initializes @engine@ structure members shared between legacy and execlists - * submission modes which do not require hardware access. - * - * Typically done early in the submission mode specific engine setup stage. - */ -int intel_engine_setup_common(struct intel_engine_cs *engine) -{ - int err; - - err = init_status_page(engine); - if (err) - return err; - - err = i915_timeline_init(engine->i915, - &engine->timeline, - engine->status_page.vma); - if (err) - goto err_hwsp; - - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - - intel_engine_init_breadcrumbs(engine); - intel_engine_init_execlist(engine); - intel_engine_init_hangcheck(engine); - intel_engine_init_batch_pool(engine); - intel_engine_init_cmd_parser(engine); - - /* Use the whole device by default */ - engine->sseu = - intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); - - return 0; - -err_hwsp: - cleanup_status_page(engine); - return err; -} - -void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) -{ - static const struct { - u8 engine; - u8 sched; - } map[] = { -#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) } - MAP(PREEMPTION, PREEMPTION), - MAP(SEMAPHORES, SEMAPHORES), -#undef MAP - }; - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 enabled, disabled; - - enabled = 0; - disabled = 0; - for_each_engine(engine, i915, id) { /* all engines must agree! */ - int i; - - if (engine->schedule) - enabled |= (I915_SCHEDULER_CAP_ENABLED | - I915_SCHEDULER_CAP_PRIORITY); - else - disabled |= (I915_SCHEDULER_CAP_ENABLED | - I915_SCHEDULER_CAP_PRIORITY); - - for (i = 0; i < ARRAY_SIZE(map); i++) { - if (engine->flags & BIT(map[i].engine)) - enabled |= BIT(map[i].sched); - else - disabled |= BIT(map[i].sched); - } - } - - i915->caps.scheduler = enabled & ~disabled; - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED)) - i915->caps.scheduler = 0; -} - -struct measure_breadcrumb { - struct i915_request rq; - struct i915_timeline timeline; - struct intel_ring ring; - u32 cs[1024]; -}; - -static int measure_breadcrumb_dw(struct intel_engine_cs *engine) -{ - struct measure_breadcrumb *frame; - int dw = -ENOMEM; - - GEM_BUG_ON(!engine->i915->gt.scratch); - - frame = kzalloc(sizeof(*frame), GFP_KERNEL); - if (!frame) - return -ENOMEM; - - if (i915_timeline_init(engine->i915, - &frame->timeline, - engine->status_page.vma)) - goto out_frame; - - INIT_LIST_HEAD(&frame->ring.request_list); - frame->ring.timeline = &frame->timeline; - frame->ring.vaddr = frame->cs; - frame->ring.size = sizeof(frame->cs); - frame->ring.effective_size = frame->ring.size; - intel_ring_update_space(&frame->ring); - - frame->rq.i915 = engine->i915; - frame->rq.engine = engine; - frame->rq.ring = &frame->ring; - frame->rq.timeline = &frame->timeline; - - dw = i915_timeline_pin(&frame->timeline); - if (dw < 0) - goto out_timeline; - - dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; - - i915_timeline_unpin(&frame->timeline); - -out_timeline: - i915_timeline_fini(&frame->timeline); -out_frame: - kfree(frame); - return dw; -} - -static int pin_context(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context **out) -{ - struct intel_context *ce; - - ce = intel_context_pin(ctx, engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - *out = ce; - return 0; -} - -/** - * intel_engines_init_common - initialize cengine state which might require hw access - * @engine: Engine to initialize. - * - * Initializes @engine@ structure members shared between legacy and execlists - * submission modes which do require hardware access. - * - * Typcally done at later stages of submission mode specific engine setup. - * - * Returns zero on success or an error code on failure. - */ -int intel_engine_init_common(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - int ret; - - /* We may need to do things with the shrinker which - * require us to immediately switch back to the default - * context. This can cause a problem as pinning the - * default context also requires GTT space which may not - * be available. To avoid this we always pin the default - * context. - */ - ret = pin_context(i915->kernel_context, engine, - &engine->kernel_context); - if (ret) - return ret; - - /* - * Similarly the preempt context must always be available so that - * we can interrupt the engine at any time. However, as preemption - * is optional, we allow it to fail. - */ - if (i915->preempt_context) - pin_context(i915->preempt_context, engine, - &engine->preempt_context); - - ret = measure_breadcrumb_dw(engine); - if (ret < 0) - goto err_unpin; - - engine->emit_fini_breadcrumb_dw = ret; - - engine->set_default_submission(engine); - - return 0; - -err_unpin: - if (engine->preempt_context) - intel_context_unpin(engine->preempt_context); - intel_context_unpin(engine->kernel_context); - return ret; -} - -void intel_gt_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* - * After resume, we may need to poke into the pinned kernel - * contexts to paper over any damage caused by the sudden suspend. - * Only the kernel contexts should remain pinned over suspend, - * allowing us to fixup the user contexts on their first pin. - */ - for_each_engine(engine, i915, id) { - struct intel_context *ce; - - ce = engine->kernel_context; - if (ce) - ce->ops->reset(ce); - - ce = engine->preempt_context; - if (ce) - ce->ops->reset(ce); - } -} - -/** - * intel_engines_cleanup_common - cleans up the engine state created by - * the common initiailizers. - * @engine: Engine to cleanup. - * - * This cleans up everything created by the common helpers. - */ -void intel_engine_cleanup_common(struct intel_engine_cs *engine) -{ - cleanup_status_page(engine); - - intel_engine_fini_breadcrumbs(engine); - intel_engine_cleanup_cmd_parser(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - - if (engine->default_state) - i915_gem_object_put(engine->default_state); - - if (engine->preempt_context) - intel_context_unpin(engine->preempt_context); - intel_context_unpin(engine->kernel_context); - - i915_timeline_fini(&engine->timeline); - - intel_wa_list_free(&engine->ctx_wa_list); - intel_wa_list_free(&engine->wa_list); - intel_wa_list_free(&engine->whitelist); -} - -u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - - u64 acthd; - - if (INTEL_GEN(i915) >= 8) - acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW); - else if (INTEL_GEN(i915) >= 4) - acthd = ENGINE_READ(engine, RING_ACTHD); - else - acthd = ENGINE_READ(engine, ACTHD); - - return acthd; -} - -u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) -{ - u64 bbaddr; - - if (INTEL_GEN(engine->i915) >= 8) - bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW); - else - bbaddr = ENGINE_READ(engine, RING_BBADDR); - - return bbaddr; -} - -int intel_engine_stop_cs(struct intel_engine_cs *engine) -{ - struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; - const i915_reg_t mode = RING_MI_MODE(base); - int err; - - if (INTEL_GEN(engine->i915) < 3) - return -ENODEV; - - GEM_TRACE("%s\n", engine->name); - - intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); - - err = 0; - if (__intel_wait_for_register_fw(uncore, - mode, MODE_IDLE, MODE_IDLE, - 1000, 0, - NULL)) { - GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); - err = -ETIMEDOUT; - } - - /* A final mmio read to let GPU writes be hopefully flushed to memory */ - intel_uncore_posting_read_fw(uncore, mode); - - return err; -} - -void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) -{ - GEM_TRACE("%s\n", engine->name); - - ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); -} - -const char *i915_cache_level_str(struct drm_i915_private *i915, int type) -{ - switch (type) { - case I915_CACHE_NONE: return " uncached"; - case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped"; - case I915_CACHE_L3_LLC: return " L3+LLC"; - case I915_CACHE_WT: return " WT"; - default: return ""; - } -} - -u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; - u32 mcr_s_ss_select; - u32 slice = fls(sseu->slice_mask); - u32 subslice = fls(sseu->subslice_mask[slice]); - - if (IS_GEN(dev_priv, 10)) - mcr_s_ss_select = GEN8_MCR_SLICE(slice) | - GEN8_MCR_SUBSLICE(subslice); - else if (INTEL_GEN(dev_priv) >= 11) - mcr_s_ss_select = GEN11_MCR_SLICE(slice) | - GEN11_MCR_SUBSLICE(subslice); - else - mcr_s_ss_select = 0; - - return mcr_s_ss_select; -} - -static inline u32 -read_subslice_reg(struct drm_i915_private *dev_priv, int slice, - int subslice, i915_reg_t reg) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u32 mcr_slice_subslice_mask; - u32 mcr_slice_subslice_select; - u32 default_mcr_s_ss_select; - u32 mcr; - u32 ret; - enum forcewake_domains fw_domains; - - if (INTEL_GEN(dev_priv) >= 11) { - mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | - GEN11_MCR_SUBSLICE_MASK; - mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) | - GEN11_MCR_SUBSLICE(subslice); - } else { - mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | - GEN8_MCR_SUBSLICE_MASK; - mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) | - GEN8_MCR_SUBSLICE(subslice); - } - - default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv); - - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, - FW_REG_READ); - fw_domains |= intel_uncore_forcewake_for_reg(uncore, - GEN8_MCR_SELECTOR, - FW_REG_READ | FW_REG_WRITE); - - spin_lock_irq(&uncore->lock); - intel_uncore_forcewake_get__locked(uncore, fw_domains); - - mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); - - WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) != - default_mcr_s_ss_select); - - mcr &= ~mcr_slice_subslice_mask; - mcr |= mcr_slice_subslice_select; - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - ret = intel_uncore_read_fw(uncore, reg); - - mcr &= ~mcr_slice_subslice_mask; - mcr |= default_mcr_s_ss_select; - - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irq(&uncore->lock); - - return ret; -} - -/* NB: please notice the memset */ -void intel_engine_get_instdone(struct intel_engine_cs *engine, - struct intel_instdone *instdone) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_uncore *uncore = engine->uncore; - u32 mmio_base = engine->mmio_base; - int slice; - int subslice; - - memset(instdone, 0, sizeof(*instdone)); - - switch (INTEL_GEN(dev_priv)) { - default: - instdone->instdone = - intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); - - if (engine->id != RCS0) - break; - - instdone->slice_common = - intel_uncore_read(uncore, GEN7_SC_INSTDONE); - for_each_instdone_slice_subslice(dev_priv, slice, subslice) { - instdone->sampler[slice][subslice] = - read_subslice_reg(dev_priv, slice, subslice, - GEN7_SAMPLER_INSTDONE); - instdone->row[slice][subslice] = - read_subslice_reg(dev_priv, slice, subslice, - GEN7_ROW_INSTDONE); - } - break; - case 7: - instdone->instdone = - intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); - - if (engine->id != RCS0) - break; - - instdone->slice_common = - intel_uncore_read(uncore, GEN7_SC_INSTDONE); - instdone->sampler[0][0] = - intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); - instdone->row[0][0] = - intel_uncore_read(uncore, GEN7_ROW_INSTDONE); - - break; - case 6: - case 5: - case 4: - instdone->instdone = - intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); - if (engine->id == RCS0) - /* HACK: Using the wrong struct member */ - instdone->slice_common = - intel_uncore_read(uncore, GEN4_INSTDONE1); - break; - case 3: - case 2: - instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); - break; - } -} - -static bool ring_is_idle(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - intel_wakeref_t wakeref; - bool idle = true; - - if (I915_SELFTEST_ONLY(!engine->mmio_base)) - return true; - - /* If the whole device is asleep, the engine must be idle */ - wakeref = intel_runtime_pm_get_if_in_use(dev_priv); - if (!wakeref) - return true; - - /* First check that no commands are left in the ring */ - if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) != - (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR)) - idle = false; - - /* No bit for gen2, so assume the CS parser is idle */ - if (INTEL_GEN(dev_priv) > 2 && - !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE)) - idle = false; - - intel_runtime_pm_put(dev_priv, wakeref); - - return idle; -} - -/** - * intel_engine_is_idle() - Report if the engine has finished process all work - * @engine: the intel_engine_cs - * - * Return true if there are no requests pending, nothing left to be submitted - * to hardware, and that the engine is idle. - */ -bool intel_engine_is_idle(struct intel_engine_cs *engine) -{ - /* More white lies, if wedged, hw state is inconsistent */ - if (i915_reset_failed(engine->i915)) - return true; - - /* Waiting to drain ELSP? */ - if (READ_ONCE(engine->execlists.active)) { - struct tasklet_struct *t = &engine->execlists.tasklet; - - local_bh_disable(); - if (tasklet_trylock(t)) { - /* Must wait for any GPU reset in progress. */ - if (__tasklet_is_enabled(t)) - t->func(t->data); - tasklet_unlock(t); - } - local_bh_enable(); - - /* Otherwise flush the tasklet if it was on another cpu */ - tasklet_unlock_wait(t); - - if (READ_ONCE(engine->execlists.active)) - return false; - } - - /* ELSP is empty, but there are ready requests? E.g. after reset */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) - return false; - - /* Ring stopped? */ - return ring_is_idle(engine); -} - -bool intel_engines_are_idle(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* - * If the driver is wedged, HW state may be very inconsistent and - * report that it is still busy, even though we have stopped using it. - */ - if (i915_reset_failed(i915)) - return true; - - /* Already parked (and passed an idleness test); must still be idle */ - if (!READ_ONCE(i915->gt.awake)) - return true; - - for_each_engine(engine, i915, id) { - if (!intel_engine_is_idle(engine)) - return false; - } - - return true; -} - -void intel_engines_reset_default_submission(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) - engine->set_default_submission(engine); -} - -static bool reset_engines(struct drm_i915_private *i915) -{ - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - return false; - - return intel_gpu_reset(i915, ALL_ENGINES) == 0; -} - -/** - * intel_engines_sanitize: called after the GPU has lost power - * @i915: the i915 device - * @force: ignore a failed reset and sanitize engine state anyway - * - * Anytime we reset the GPU, either with an explicit GPU reset or through a - * PCI power cycle, the GPU loses state and we must reset our state tracking - * to match. Note that calling intel_engines_sanitize() if the GPU has not - * been reset results in much confusion! - */ -void intel_engines_sanitize(struct drm_i915_private *i915, bool force) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - GEM_TRACE("\n"); - - if (!reset_engines(i915) && !force) - return; - - for_each_engine(engine, i915, id) - intel_engine_reset(engine, false); -} - -/** - * intel_engines_park: called when the GT is transitioning from busy->idle - * @i915: the i915 device - * - * The GT is now idle and about to go to sleep (maybe never to wake again?). - * Time for us to tidy and put away our toys (release resources back to the - * system). - */ -void intel_engines_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - /* Flush the residual irq tasklets first. */ - intel_engine_disarm_breadcrumbs(engine); - tasklet_kill(&engine->execlists.tasklet); - - /* - * We are committed now to parking the engines, make sure there - * will be no more interrupts arriving later and the engines - * are truly idle. - */ - if (wait_for(intel_engine_is_idle(engine), 10)) { - struct drm_printer p = drm_debug_printer(__func__); - - dev_err(i915->drm.dev, - "%s is not idle before parking\n", - engine->name); - intel_engine_dump(engine, &p, NULL); - } - - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); - - if (engine->park) - engine->park(engine); - - if (engine->pinned_default_state) { - i915_gem_object_unpin_map(engine->default_state); - engine->pinned_default_state = NULL; - } - - i915_gem_batch_pool_fini(&engine->batch_pool); - engine->execlists.no_priolist = false; - } - - i915->gt.active_engines = 0; -} - -/** - * intel_engines_unpark: called when the GT is transitioning from idle->busy - * @i915: the i915 device - * - * The GT was idle and now about to fire up with some new user requests. - */ -void intel_engines_unpark(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - void *map; - - /* Pin the default state for fast resets from atomic context. */ - map = NULL; - if (engine->default_state) - map = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (!IS_ERR_OR_NULL(map)) - engine->pinned_default_state = map; - - if (engine->unpark) - engine->unpark(engine); - - intel_engine_init_hangcheck(engine); - } -} - -/** - * intel_engine_lost_context: called when the GPU is reset into unknown state - * @engine: the engine - * - * We have either reset the GPU or otherwise about to lose state tracking of - * the current GPU logical state (e.g. suspend). On next use, it is therefore - * imperative that we make no presumptions about the current state and load - * from scratch. - */ -void intel_engine_lost_context(struct intel_engine_cs *engine) -{ - struct intel_context *ce; - - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - ce = fetch_and_zero(&engine->last_retired_context); - if (ce) - intel_context_unpin(ce); -} - -bool intel_engine_can_store_dword(struct intel_engine_cs *engine) -{ - switch (INTEL_GEN(engine->i915)) { - case 2: - return false; /* uses physical not virtual addresses */ - case 3: - /* maybe only uses physical not virtual addresses */ - return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); - case 6: - return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ - default: - return true; - } -} - -unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int which; - - which = 0; - for_each_engine(engine, i915, id) - if (engine->default_state) - which |= BIT(engine->uabi_class); - - return which; -} - -static int print_sched_attr(struct drm_i915_private *i915, - const struct i915_sched_attr *attr, - char *buf, int x, int len) -{ - if (attr->priority == I915_PRIORITY_INVALID) - return x; - - x += snprintf(buf + x, len - x, - " prio=%d", attr->priority); - - return x; -} - -static void print_request(struct drm_printer *m, - struct i915_request *rq, - const char *prefix) -{ - const char *name = rq->fence.ops->get_timeline_name(&rq->fence); - char buf[80] = ""; - int x = 0; - - x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - - drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", - prefix, - rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - "", - test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags) ? "+" : "", - buf, - jiffies_to_msecs(jiffies - rq->emitted_jiffies), - name); -} - -static void hexdump(struct drm_printer *m, const void *buf, size_t len) -{ - const size_t rowsize = 8 * sizeof(u32); - const void *prev = NULL; - bool skip = false; - size_t pos; - - for (pos = 0; pos < len; pos += rowsize) { - char line[128]; - - if (prev && !memcmp(prev, buf + pos, rowsize)) { - if (!skip) { - drm_printf(m, "*\n"); - skip = true; - } - continue; - } - - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, - rowsize, sizeof(u32), - line, sizeof(line), - false) >= sizeof(line)); - drm_printf(m, "[%04zx] %s\n", pos, line); - - prev = buf + pos; - skip = false; - } -} - -static void intel_engine_print_registers(const struct intel_engine_cs *engine, - struct drm_printer *m) -{ - struct drm_i915_private *dev_priv = engine->i915; - const struct intel_engine_execlists * const execlists = - &engine->execlists; - u64 addr; - - if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7)) - drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); - drm_printf(m, "\tRING_START: 0x%08x\n", - ENGINE_READ(engine, RING_START)); - drm_printf(m, "\tRING_HEAD: 0x%08x\n", - ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR); - drm_printf(m, "\tRING_TAIL: 0x%08x\n", - ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR); - drm_printf(m, "\tRING_CTL: 0x%08x%s\n", - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : ""); - if (INTEL_GEN(engine->i915) > 2) { - drm_printf(m, "\tRING_MODE: 0x%08x%s\n", - ENGINE_READ(engine, RING_MI_MODE), - ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : ""); - } - - if (INTEL_GEN(dev_priv) >= 6) { - drm_printf(m, "\tRING_IMR: %08x\n", - ENGINE_READ(engine, RING_IMR)); - } - - addr = intel_engine_get_active_head(engine); - drm_printf(m, "\tACTHD: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - addr = intel_engine_get_last_batch_head(engine); - drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - if (INTEL_GEN(dev_priv) >= 8) - addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW); - else if (INTEL_GEN(dev_priv) >= 4) - addr = ENGINE_READ(engine, RING_DMA_FADD); - else - addr = ENGINE_READ(engine, DMA_FADD_I8XX); - drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - if (INTEL_GEN(dev_priv) >= 4) { - drm_printf(m, "\tIPEIR: 0x%08x\n", - ENGINE_READ(engine, RING_IPEIR)); - drm_printf(m, "\tIPEHR: 0x%08x\n", - ENGINE_READ(engine, RING_IPEHR)); - } else { - drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR)); - drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR)); - } - - if (HAS_EXECLISTS(dev_priv)) { - const u32 *hws = - &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; - const u8 num_entries = execlists->csb_size; - unsigned int idx; - u8 read, write; - - drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n", - ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), - ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), - num_entries); - - read = execlists->csb_head; - write = READ_ONCE(*execlists->csb_write); - - drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n", - read, write, - yesno(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state)), - enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); - if (read >= num_entries) - read = 0; - if (write >= num_entries) - write = 0; - if (read > write) - write += num_entries; - while (read < write) { - idx = ++read % num_entries; - drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n", - idx, hws[idx * 2], hws[idx * 2 + 1]); - } - - rcu_read_lock(); - for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - struct i915_request *rq; - unsigned int count; - - rq = port_unpack(&execlists->port[idx], &count); - if (rq) { - char hdr[80]; - - snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", - idx, count, - i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, - hwsp_seqno(rq)); - print_request(m, rq, hdr); - } else { - drm_printf(m, "\t\tELSP[%d] idle\n", idx); - } - } - drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); - rcu_read_unlock(); - } else if (INTEL_GEN(dev_priv) > 6) { - drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", - ENGINE_READ(engine, RING_PP_DIR_BASE)); - drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", - ENGINE_READ(engine, RING_PP_DIR_BASE_READ)); - drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", - ENGINE_READ(engine, RING_PP_DIR_DCLV)); - } -} - -static void print_request_ring(struct drm_printer *m, struct i915_request *rq) -{ - void *ring; - int size; - - drm_printf(m, - "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", - rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); - - size = rq->tail - rq->head; - if (rq->tail < rq->head) - size += rq->ring->size; - - ring = kmalloc(size, GFP_ATOMIC); - if (ring) { - const void *vaddr = rq->ring->vaddr; - unsigned int head = rq->head; - unsigned int len = 0; - - if (rq->tail < head) { - len = rq->ring->size - head; - memcpy(ring, vaddr + head, len); - head = 0; - } - memcpy(ring + len, vaddr + head, size - len); - - hexdump(m, ring, size); - kfree(ring); - } -} - -void intel_engine_dump(struct intel_engine_cs *engine, - struct drm_printer *m, - const char *header, ...) -{ - struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct i915_request *rq; - intel_wakeref_t wakeref; - - if (header) { - va_list ap; - - va_start(ap, header); - drm_vprintf(m, header, &ap); - va_end(ap); - } - - if (i915_reset_failed(engine->i915)) - drm_printf(m, "*** WEDGED ***\n"); - - drm_printf(m, "\tHangcheck %x:%x [%d ms]\n", - engine->hangcheck.last_seqno, - engine->hangcheck.next_seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); - drm_printf(m, "\tReset count: %d (global %d)\n", - i915_reset_engine_count(error, engine), - i915_reset_count(error)); - - rcu_read_lock(); - - drm_printf(m, "\tRequests:\n"); - - rq = list_first_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) - print_request(m, rq, "\t\tfirst "); - - rq = list_last_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) - print_request(m, rq, "\t\tlast "); - - rq = intel_engine_find_active_request(engine); - if (rq) { - print_request(m, rq, "\t\tactive "); - - drm_printf(m, "\t\tring->start: 0x%08x\n", - i915_ggtt_offset(rq->ring->vma)); - drm_printf(m, "\t\tring->head: 0x%08x\n", - rq->ring->head); - drm_printf(m, "\t\tring->tail: 0x%08x\n", - rq->ring->tail); - drm_printf(m, "\t\tring->emit: 0x%08x\n", - rq->ring->emit); - drm_printf(m, "\t\tring->space: 0x%08x\n", - rq->ring->space); - drm_printf(m, "\t\tring->hwsp: 0x%08x\n", - rq->timeline->hwsp_offset); - - print_request_ring(m, rq); - } - - rcu_read_unlock(); - - wakeref = intel_runtime_pm_get_if_in_use(engine->i915); - if (wakeref) { - intel_engine_print_registers(engine, m); - intel_runtime_pm_put(engine->i915, wakeref); - } else { - drm_printf(m, "\tDevice is asleep; skipping register dump\n"); - } - - intel_execlists_show_requests(engine, m, print_request, 8); - - drm_printf(m, "HWSP:\n"); - hexdump(m, engine->status_page.addr, PAGE_SIZE); - - drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); - - intel_engine_print_breadcrumbs(engine, m); -} - -static u8 user_class_map[] = { - [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS, - [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS, - [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS, - [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS, -}; - -struct intel_engine_cs * -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) -{ - if (class >= ARRAY_SIZE(user_class_map)) - return NULL; - - class = user_class_map[class]; - - GEM_BUG_ON(class > MAX_ENGINE_CLASS); - - if (instance > MAX_ENGINE_INSTANCE) - return NULL; - - return i915->engine_class[class][instance]; -} - -/** - * intel_enable_engine_stats() - Enable engine busy tracking on engine - * @engine: engine to enable stats collection - * - * Start collecting the engine busyness data for @engine. - * - * Returns 0 on success or a negative error code. - */ -int intel_enable_engine_stats(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - unsigned long flags; - int err = 0; - - if (!intel_engine_supports_stats(engine)) - return -ENODEV; - - spin_lock_irqsave(&engine->timeline.lock, flags); - write_seqlock(&engine->stats.lock); - - if (unlikely(engine->stats.enabled == ~0)) { - err = -EBUSY; - goto unlock; - } - - if (engine->stats.enabled++ == 0) { - const struct execlist_port *port = execlists->port; - unsigned int num_ports = execlists_num_ports(execlists); - - engine->stats.enabled_at = ktime_get(); - - /* XXX submission method oblivious? */ - while (num_ports-- && port_isset(port)) { - engine->stats.active++; - port++; - } - - if (engine->stats.active) - engine->stats.start = engine->stats.enabled_at; - } - -unlock: - write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->timeline.lock, flags); - - return err; -} - -static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) -{ - ktime_t total = engine->stats.total; - - /* - * If the engine is executing something at the moment - * add it to the total. - */ - if (engine->stats.active) - total = ktime_add(total, - ktime_sub(ktime_get(), engine->stats.start)); - - return total; -} - -/** - * intel_engine_get_busy_time() - Return current accumulated engine busyness - * @engine: engine to report on - * - * Returns accumulated time @engine was busy since engine stats were enabled. - */ -ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) -{ - unsigned int seq; - ktime_t total; - - do { - seq = read_seqbegin(&engine->stats.lock); - total = __intel_engine_get_busy_time(engine); - } while (read_seqretry(&engine->stats.lock, seq)); - - return total; -} - -/** - * intel_disable_engine_stats() - Disable engine busy tracking on engine - * @engine: engine to disable stats collection - * - * Stops collecting the engine busyness data for @engine. - */ -void intel_disable_engine_stats(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (!intel_engine_supports_stats(engine)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - WARN_ON_ONCE(engine->stats.enabled == 0); - if (--engine->stats.enabled == 0) { - engine->stats.total = __intel_engine_get_busy_time(engine); - engine->stats.active = 0; - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static bool match_ring(struct i915_request *rq) -{ - u32 ring = ENGINE_READ(rq->engine, RING_START); - - return ring == i915_ggtt_offset(rq->ring->vma); -} - -struct i915_request * -intel_engine_find_active_request(struct intel_engine_cs *engine) -{ - struct i915_request *request, *active = NULL; - unsigned long flags; - - /* - * We are called by the error capture, reset and to dump engine - * state at random points in time. In particular, note that neither is - * crucially ordered with an interrupt. After a hang, the GPU is dead - * and we assume that no more writes can happen (we waited long enough - * for all writes that were in transaction to be flushed) - adding an - * extra delay for a recent interrupt is pointless. Hence, we do - * not need an engine->irq_seqno_barrier() before the seqno reads. - * At all other times, we must assume the GPU is still running, but - * we only care about the snapshot of this moment. - */ - spin_lock_irqsave(&engine->timeline.lock, flags); - list_for_each_entry(request, &engine->timeline.requests, link) { - if (i915_request_completed(request)) - continue; - - if (!i915_request_started(request)) - break; - - /* More than one preemptible request may match! */ - if (!match_ring(request)) - break; - - active = request; - break; - } - spin_unlock_irqrestore(&engine->timeline.lock, flags); - - return active; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_engine.c" -#include "selftests/intel_engine_cs.c" -#endif diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h deleted file mode 100644 index d07a01b3ed0b..000000000000 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ /dev/null @@ -1,549 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef __INTEL_ENGINE_TYPES__ -#define __INTEL_ENGINE_TYPES__ - -#include -#include -#include -#include -#include - -#include "i915_gem.h" -#include "i915_priolist_types.h" -#include "i915_selftest.h" -#include "i915_timeline_types.h" -#include "intel_sseu.h" -#include "intel_workarounds_types.h" - -#include "i915_gem_batch_pool.h" -#include "i915_pmu.h" - -#define I915_MAX_SLICES 3 -#define I915_MAX_SUBSLICES 8 - -#define I915_CMD_HASH_ORDER 9 - -struct dma_fence; -struct drm_i915_reg_table; -struct i915_gem_context; -struct i915_request; -struct i915_sched_attr; -struct intel_uncore; - -typedef u8 intel_engine_mask_t; -#define ALL_ENGINES ((intel_engine_mask_t)~0ul) - -struct intel_hw_status_page { - struct i915_vma *vma; - u32 *addr; -}; - -struct intel_instdone { - u32 instdone; - /* The following exist only in the RCS engine */ - u32 slice_common; - u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; - u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; -}; - -struct intel_engine_hangcheck { - u64 acthd; - u32 last_seqno; - u32 next_seqno; - unsigned long action_timestamp; - struct intel_instdone instdone; -}; - -struct intel_ring { - struct kref ref; - struct i915_vma *vma; - void *vaddr; - - struct i915_timeline *timeline; - struct list_head request_list; - struct list_head active_link; - - u32 head; - u32 tail; - u32 emit; - - u32 space; - u32 size; - u32 effective_size; -}; - -/* - * we use a single page to load ctx workarounds so all of these - * values are referred in terms of dwords - * - * struct i915_wa_ctx_bb: - * offset: specifies batch starting position, also helpful in case - * if we want to have multiple batches at different offsets based on - * some criteria. It is not a requirement at the moment but provides - * an option for future use. - * size: size of the batch in DWORDS - */ -struct i915_ctx_workarounds { - struct i915_wa_ctx_bb { - u32 offset; - u32 size; - } indirect_ctx, per_ctx; - struct i915_vma *vma; -}; - -#define I915_MAX_VCS 4 -#define I915_MAX_VECS 2 - -/* - * Engine IDs definitions. - * Keep instances of the same type engine together. - */ -enum intel_engine_id { - RCS0 = 0, - BCS0, - VCS0, - VCS1, - VCS2, - VCS3, -#define _VCS(n) (VCS0 + (n)) - VECS0, - VECS1, -#define _VECS(n) (VECS0 + (n)) - I915_NUM_ENGINES -}; - -struct st_preempt_hang { - struct completion completion; - unsigned int count; - bool inject_hang; -}; - -/** - * struct intel_engine_execlists - execlist submission queue and port state - * - * The struct intel_engine_execlists represents the combined logical state of - * driver and the hardware state for execlist mode of submission. - */ -struct intel_engine_execlists { - /** - * @tasklet: softirq tasklet for bottom handler - */ - struct tasklet_struct tasklet; - - /** - * @default_priolist: priority list for I915_PRIORITY_NORMAL - */ - struct i915_priolist default_priolist; - - /** - * @no_priolist: priority lists disabled - */ - bool no_priolist; - - /** - * @submit_reg: gen-specific execlist submission register - * set to the ExecList Submission Port (elsp) register pre-Gen11 and to - * the ExecList Submission Queue Contents register array for Gen11+ - */ - u32 __iomem *submit_reg; - - /** - * @ctrl_reg: the enhanced execlists control register, used to load the - * submit queue on the HW and to request preemptions to idle - */ - u32 __iomem *ctrl_reg; - - /** - * @port: execlist port states - * - * For each hardware ELSP (ExecList Submission Port) we keep - * track of the last request and the number of times we submitted - * that port to hw. We then count the number of times the hw reports - * a context completion or preemption. As only one context can - * be active on hw, we limit resubmission of context to port[0]. This - * is called Lite Restore, of the context. - */ - struct execlist_port { - /** - * @request_count: combined request and submission count - */ - struct i915_request *request_count; -#define EXECLIST_COUNT_BITS 2 -#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) -#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) -#define port_set(p, packed) ((p)->request_count = (packed)) -#define port_isset(p) ((p)->request_count) -#define port_index(p, execlists) ((p) - (execlists)->port) - - /** - * @context_id: context ID for port - */ - GEM_DEBUG_DECL(u32 context_id); - -#define EXECLIST_MAX_PORTS 2 - } port[EXECLIST_MAX_PORTS]; - - /** - * @active: is the HW active? We consider the HW as active after - * submitting any context for execution and until we have seen the - * last context completion event. After that, we do not expect any - * more events until we submit, and so can park the HW. - * - * As we have a small number of different sources from which we feed - * the HW, we track the state of each inside a single bitfield. - */ - unsigned int active; -#define EXECLISTS_ACTIVE_USER 0 -#define EXECLISTS_ACTIVE_PREEMPT 1 -#define EXECLISTS_ACTIVE_HWACK 2 - - /** - * @port_mask: number of execlist ports - 1 - */ - unsigned int port_mask; - - /** - * @queue_priority_hint: Highest pending priority. - * - * When we add requests into the queue, or adjust the priority of - * executing requests, we compute the maximum priority of those - * pending requests. We can then use this value to determine if - * we need to preempt the executing requests to service the queue. - * However, since the we may have recorded the priority of an inflight - * request we wanted to preempt but since completed, at the time of - * dequeuing the priority hint may no longer may match the highest - * available request priority. - */ - int queue_priority_hint; - - /** - * @queue: queue of requests, in priority lists - */ - struct rb_root_cached queue; - - /** - * @csb_write: control register for Context Switch buffer - * - * Note this register may be either mmio or HWSP shadow. - */ - u32 *csb_write; - - /** - * @csb_status: status array for Context Switch buffer - * - * Note these register may be either mmio or HWSP shadow. - */ - u32 *csb_status; - - /** - * @preempt_complete_status: expected CSB upon completing preemption - */ - u32 preempt_complete_status; - - /** - * @csb_size: context status buffer FIFO size - */ - u8 csb_size; - - /** - * @csb_head: context status buffer head - */ - u8 csb_head; - - I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) -}; - -#define INTEL_ENGINE_CS_MAX_NAME 8 - -struct intel_engine_cs { - struct drm_i915_private *i915; - struct intel_uncore *uncore; - char name[INTEL_ENGINE_CS_MAX_NAME]; - - enum intel_engine_id id; - unsigned int hw_id; - unsigned int guc_id; - intel_engine_mask_t mask; - - u8 uabi_class; - - u8 class; - u8 instance; - u32 context_size; - u32 mmio_base; - - struct intel_sseu sseu; - - struct intel_ring *buffer; - - struct i915_timeline timeline; - - struct intel_context *kernel_context; /* pinned */ - struct intel_context *preempt_context; /* pinned; optional */ - - struct drm_i915_gem_object *default_state; - void *pinned_default_state; - - /* Rather than have every client wait upon all user interrupts, - * with the herd waking after every interrupt and each doing the - * heavyweight seqno dance, we delegate the task (of being the - * bottom-half of the user interrupt) to the first client. After - * every interrupt, we wake up one client, who does the heavyweight - * coherent seqno read and either goes back to sleep (if incomplete), - * or wakes up all the completed clients in parallel, before then - * transferring the bottom-half status to the next client in the queue. - * - * Compared to walking the entire list of waiters in a single dedicated - * bottom-half, we reduce the latency of the first waiter by avoiding - * a context switch, but incur additional coherent seqno reads when - * following the chain of request breadcrumbs. Since it is most likely - * that we have a single client waiting on each seqno, then reducing - * the overhead of waking that client is much preferred. - */ - struct intel_breadcrumbs { - spinlock_t irq_lock; - struct list_head signalers; - - struct irq_work irq_work; /* for use from inside irq_lock */ - - unsigned int irq_enabled; - - bool irq_armed; - } breadcrumbs; - - struct intel_engine_pmu { - /** - * @enable: Bitmask of enable sample events on this engine. - * - * Bits correspond to sample event types, for instance - * I915_SAMPLE_QUEUED is bit 0 etc. - */ - u32 enable; - /** - * @enable_count: Reference count for the enabled samplers. - * - * Index number corresponds to @enum drm_i915_pmu_engine_sample. - */ - unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; - /** - * @sample: Counter values for sampling events. - * - * Our internal timer stores the current counters in this field. - * - * Index number corresponds to @enum drm_i915_pmu_engine_sample. - */ - struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; - } pmu; - - /* - * A pool of objects to use as shadow copies of client batch buffers - * when the command parser is enabled. Prevents the client from - * modifying the batch contents after software parsing. - */ - struct i915_gem_batch_pool batch_pool; - - struct intel_hw_status_page status_page; - struct i915_ctx_workarounds wa_ctx; - struct i915_wa_list ctx_wa_list; - struct i915_wa_list wa_list; - struct i915_wa_list whitelist; - - u32 irq_keep_mask; /* always keep these interrupts */ - u32 irq_enable_mask; /* bitmask to enable ring interrupt */ - void (*irq_enable)(struct intel_engine_cs *engine); - void (*irq_disable)(struct intel_engine_cs *engine); - - int (*init_hw)(struct intel_engine_cs *engine); - - struct { - void (*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, bool stalled); - void (*finish)(struct intel_engine_cs *engine); - } reset; - - void (*park)(struct intel_engine_cs *engine); - void (*unpark)(struct intel_engine_cs *engine); - - void (*set_default_submission)(struct intel_engine_cs *engine); - - const struct intel_context_ops *cops; - - int (*request_alloc)(struct i915_request *rq); - int (*init_context)(struct i915_request *rq); - - int (*emit_flush)(struct i915_request *request, u32 mode); -#define EMIT_INVALIDATE BIT(0) -#define EMIT_FLUSH BIT(1) -#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) - int (*emit_bb_start)(struct i915_request *rq, - u64 offset, u32 length, - unsigned int dispatch_flags); -#define I915_DISPATCH_SECURE BIT(0) -#define I915_DISPATCH_PINNED BIT(1) - int (*emit_init_breadcrumb)(struct i915_request *rq); - u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, - u32 *cs); - unsigned int emit_fini_breadcrumb_dw; - - /* Pass the request to the hardware queue (e.g. directly into - * the legacy ringbuffer or to the end of an execlist). - * - * This is called from an atomic context with irqs disabled; must - * be irq safe. - */ - void (*submit_request)(struct i915_request *rq); - - /* - * Call when the priority on a request has changed and it and its - * dependencies may need rescheduling. Note the request itself may - * not be ready to run! - */ - void (*schedule)(struct i915_request *request, - const struct i915_sched_attr *attr); - - /* - * Cancel all requests on the hardware, or queued for execution. - * This should only cancel the ready requests that have been - * submitted to the engine (via the engine->submit_request callback). - * This is called when marking the device as wedged. - */ - void (*cancel_requests)(struct intel_engine_cs *engine); - - void (*cleanup)(struct intel_engine_cs *engine); - - struct intel_engine_execlists execlists; - - /* Contexts are pinned whilst they are active on the GPU. The last - * context executed remains active whilst the GPU is idle - the - * switch away and write to the context object only occurs on the - * next execution. Contexts are only unpinned on retirement of the - * following request ensuring that we can always write to the object - * on the context switch even after idling. Across suspend, we switch - * to the kernel context and trash it as the save may not happen - * before the hardware is powered down. - */ - struct intel_context *last_retired_context; - - /* status_notifier: list of callbacks for context-switch changes */ - struct atomic_notifier_head context_status_notifier; - - struct intel_engine_hangcheck hangcheck; - -#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) -#define I915_ENGINE_SUPPORTS_STATS BIT(1) -#define I915_ENGINE_HAS_PREEMPTION BIT(2) -#define I915_ENGINE_HAS_SEMAPHORES BIT(3) -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) - unsigned int flags; - - /* - * Table of commands the command parser needs to know about - * for this engine. - */ - DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); - - /* - * Table of registers allowed in commands that read/write registers. - */ - const struct drm_i915_reg_table *reg_tables; - int reg_table_count; - - /* - * Returns the bitmask for the length field of the specified command. - * Return 0 for an unrecognized/invalid command. - * - * If the command parser finds an entry for a command in the engine's - * cmd_tables, it gets the command's length based on the table entry. - * If not, it calls this function to determine the per-engine length - * field encoding for the command (i.e. different opcode ranges use - * certain bits to encode the command length in the header). - */ - u32 (*get_cmd_length_mask)(u32 cmd_header); - - struct { - /** - * @lock: Lock protecting the below fields. - */ - seqlock_t lock; - /** - * @enabled: Reference count indicating number of listeners. - */ - unsigned int enabled; - /** - * @active: Number of contexts currently scheduled in. - */ - unsigned int active; - /** - * @enabled_at: Timestamp when busy stats were enabled. - */ - ktime_t enabled_at; - /** - * @start: Timestamp of the last idle to active transition. - * - * Idle is defined as active == 0, active is active > 0. - */ - ktime_t start; - /** - * @total: Total time this engine was busy. - * - * Accumulated time not counting the most recent block in cases - * where engine is currently busy (active > 0). - */ - ktime_t total; - } stats; -}; - -static inline bool -intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; -} - -static inline bool -intel_engine_supports_stats(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_SUPPORTS_STATS; -} - -static inline bool -intel_engine_has_preemption(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_HAS_PREEMPTION; -} - -static inline bool -intel_engine_has_semaphores(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_HAS_SEMAPHORES; -} - -static inline bool -intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; -} - -#define instdone_slice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) - -#define instdone_subslice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) - -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ - for ((slice__) = 0, (subslice__) = 0; \ - (slice__) < I915_MAX_SLICES; \ - (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ - (slice__) += ((subslice__) == 0)) \ - for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ - (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) - -#endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h deleted file mode 100644 index a34ece53a771..000000000000 --- a/drivers/gpu/drm/i915/intel_gpu_commands.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright � 2003-2018 Intel Corporation - */ - -#ifndef _INTEL_GPU_COMMANDS_H_ -#define _INTEL_GPU_COMMANDS_H_ - -/* - * Instruction field definitions used by the command parser - */ -#define INSTR_CLIENT_SHIFT 29 -#define INSTR_MI_CLIENT 0x0 -#define INSTR_BC_CLIENT 0x2 -#define INSTR_RC_CLIENT 0x3 -#define INSTR_SUBCLIENT_SHIFT 27 -#define INSTR_SUBCLIENT_MASK 0x18000000 -#define INSTR_MEDIA_SUBCLIENT 0x2 -#define INSTR_26_TO_24_MASK 0x7000000 -#define INSTR_26_TO_24_SHIFT 24 - -/* - * Memory interface instructions used by the kernel - */ -#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) -/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */ -#define MI_GLOBAL_GTT (1<<22) - -#define MI_NOOP MI_INSTR(0, 0) -#define MI_USER_INTERRUPT MI_INSTR(0x02, 0) -#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) -#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) -#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) -#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) -#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1) -#define MI_FLUSH MI_INSTR(0x04, 0) -#define MI_READ_FLUSH (1 << 0) -#define MI_EXE_FLUSH (1 << 1) -#define MI_NO_WRITE_FLUSH (1 << 2) -#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */ -#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */ -#define MI_INVALIDATE_ISP (1 << 5) /* invalidate indirect state pointers */ -#define MI_REPORT_HEAD MI_INSTR(0x07, 0) -#define MI_ARB_ON_OFF MI_INSTR(0x08, 0) -#define MI_ARB_ENABLE (1<<0) -#define MI_ARB_DISABLE (0<<0) -#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) -#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) -#define MI_SUSPEND_FLUSH_EN (1<<0) -#define MI_SET_APPID MI_INSTR(0x0e, 0) -#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0) -#define MI_OVERLAY_CONTINUE (0x0<<21) -#define MI_OVERLAY_ON (0x1<<21) -#define MI_OVERLAY_OFF (0x2<<21) -#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0) -#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) -#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) -#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) -/* IVB has funny definitions for which plane to flip. */ -#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19) -#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19) -#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19) -#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) -#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) -#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) -/* SKL ones */ -#define MI_DISPLAY_FLIP_SKL_PLANE_1_A (0 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_1_B (1 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_1_C (2 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_2_A (4 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_2_B (5 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_2_C (6 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_3_A (7 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_3_B (8 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_3_C (9 << 8) -#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */ -#define MI_SEMAPHORE_GLOBAL_GTT (1<<22) -#define MI_SEMAPHORE_UPDATE (1<<21) -#define MI_SEMAPHORE_COMPARE (1<<20) -#define MI_SEMAPHORE_REGISTER (1<<18) -#define MI_SEMAPHORE_SYNC_VR (0<<16) /* RCS wait for VCS (RVSYNC) */ -#define MI_SEMAPHORE_SYNC_VER (1<<16) /* RCS wait for VECS (RVESYNC) */ -#define MI_SEMAPHORE_SYNC_BR (2<<16) /* RCS wait for BCS (RBSYNC) */ -#define MI_SEMAPHORE_SYNC_BV (0<<16) /* VCS wait for BCS (VBSYNC) */ -#define MI_SEMAPHORE_SYNC_VEV (1<<16) /* VCS wait for VECS (VVESYNC) */ -#define MI_SEMAPHORE_SYNC_RV (2<<16) /* VCS wait for RCS (VRSYNC) */ -#define MI_SEMAPHORE_SYNC_RB (0<<16) /* BCS wait for RCS (BRSYNC) */ -#define MI_SEMAPHORE_SYNC_VEB (1<<16) /* BCS wait for VECS (BVESYNC) */ -#define MI_SEMAPHORE_SYNC_VB (2<<16) /* BCS wait for VCS (BVSYNC) */ -#define MI_SEMAPHORE_SYNC_BVE (0<<16) /* VECS wait for BCS (VEBSYNC) */ -#define MI_SEMAPHORE_SYNC_VVE (1<<16) /* VECS wait for VCS (VEVSYNC) */ -#define MI_SEMAPHORE_SYNC_RVE (2<<16) /* VECS wait for RCS (VERSYNC) */ -#define MI_SEMAPHORE_SYNC_INVALID (3<<16) -#define MI_SEMAPHORE_SYNC_MASK (3<<16) -#define MI_SET_CONTEXT MI_INSTR(0x18, 0) -#define MI_MM_SPACE_GTT (1<<8) -#define MI_MM_SPACE_PHYSICAL (0<<8) -#define MI_SAVE_EXT_STATE_EN (1<<3) -#define MI_RESTORE_EXT_STATE_EN (1<<2) -#define MI_FORCE_RESTORE (1<<1) -#define MI_RESTORE_INHIBIT (1<<0) -#define HSW_MI_RS_SAVE_STATE_EN (1<<3) -#define HSW_MI_RS_RESTORE_STATE_EN (1<<2) -#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ -#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) -#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ -#define MI_SEMAPHORE_POLL (1 << 15) -#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) -#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) -#define MI_SEMAPHORE_SAD_LT_SDD (2 << 12) -#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) -#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) -#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) -#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) -#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) -#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ -#define MI_USE_GGTT (1 << 22) /* g4x+ */ -#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) -/* - * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: - * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw - * simply ignores the register load under certain conditions. - * - One can actually load arbitrary many arbitrary registers: Simply issue x - * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! - */ -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) -#define MI_LRI_FORCE_POSTED (1<<12) -#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) -#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) -#define MI_SRM_LRM_GLOBAL_GTT (1<<22) -#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ -#define MI_FLUSH_DW_STORE_INDEX (1<<21) -#define MI_INVALIDATE_TLB (1<<18) -#define MI_FLUSH_DW_OP_STOREDW (1<<14) -#define MI_FLUSH_DW_OP_MASK (3<<14) -#define MI_FLUSH_DW_NOTIFY (1<<8) -#define MI_INVALIDATE_BSD (1<<7) -#define MI_FLUSH_DW_USE_GTT (1<<2) -#define MI_FLUSH_DW_USE_PPGTT (0<<2) -#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) -#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) -#define MI_BATCH_BUFFER MI_INSTR(0x30, 1) -#define MI_BATCH_NON_SECURE (1) -/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ -#define MI_BATCH_NON_SECURE_I965 (1<<8) -#define MI_BATCH_PPGTT_HSW (1<<8) -#define MI_BATCH_NON_SECURE_HSW (1<<13) -#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) -#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ -#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) -#define MI_BATCH_RESOURCE_STREAMER (1<<10) - -/* - * 3D instructions used by the kernel - */ -#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags)) - -#define GEN9_MEDIA_POOL_STATE ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4) -#define GEN9_MEDIA_POOL_ENABLE (1 << 31) -#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24)) -#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19)) -#define SC_UPDATE_SCISSOR (0x1<<1) -#define SC_ENABLE_MASK (0x1<<0) -#define SC_ENABLE (0x1<<0) -#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16)) -#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1)) -#define SCI_YMIN_MASK (0xffff<<16) -#define SCI_XMIN_MASK (0xffff<<0) -#define SCI_YMAX_MASK (0xffff<<16) -#define SCI_XMAX_MASK (0xffff<<0) -#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19)) -#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1) -#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0) -#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) -#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4) -#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0) -#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) -#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) -#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) - -#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) -#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) -#define BLT_WRITE_A (2<<20) -#define BLT_WRITE_RGB (1<<20) -#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A) -#define BLT_DEPTH_8 (0<<24) -#define BLT_DEPTH_16_565 (1<<24) -#define BLT_DEPTH_16_1555 (2<<24) -#define BLT_DEPTH_32 (3<<24) -#define BLT_ROP_SRC_COPY (0xcc<<16) -#define BLT_ROP_COLOR_COPY (0xf0<<16) -#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */ -#define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */ -#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2) -#define ASYNC_FLIP (1<<22) -#define DISPLAY_PLANE_A (0<<20) -#define DISPLAY_PLANE_B (1<<20) -#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) -#define PIPE_CONTROL_FLUSH_L3 (1<<27) -#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ -#define PIPE_CONTROL_MMIO_WRITE (1<<23) -#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) -#define PIPE_CONTROL_CS_STALL (1<<20) -#define PIPE_CONTROL_TLB_INVALIDATE (1<<18) -#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) -#define PIPE_CONTROL_QW_WRITE (1<<14) -#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) -#define PIPE_CONTROL_DEPTH_STALL (1<<13) -#define PIPE_CONTROL_WRITE_FLUSH (1<<12) -#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ -#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ -#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ -#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) -#define PIPE_CONTROL_NOTIFY (1<<8) -#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ -#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) -#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) -#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) -#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) -#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) -#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) -#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ - -/* - * Commands used only by the command parser - */ -#define MI_SET_PREDICATE MI_INSTR(0x01, 0) -#define MI_ARB_CHECK MI_INSTR(0x05, 0) -#define MI_RS_CONTROL MI_INSTR(0x06, 0) -#define MI_URB_ATOMIC_ALLOC MI_INSTR(0x09, 0) -#define MI_PREDICATE MI_INSTR(0x0C, 0) -#define MI_RS_CONTEXT MI_INSTR(0x0F, 0) -#define MI_TOPOLOGY_FILTER MI_INSTR(0x0D, 0) -#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0) -#define MI_URB_CLEAR MI_INSTR(0x19, 0) -#define MI_UPDATE_GTT MI_INSTR(0x23, 0) -#define MI_CLFLUSH MI_INSTR(0x27, 0) -#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) -#define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) -#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) -#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) -#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) -#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) - -#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16)) -#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16)) -#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16)) -#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) -#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16)) -#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16)) -#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16)) -#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16)) -#define GFX_OP_3DSTATE_SO_DECL_LIST \ - ((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16)) - -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16)) - -#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16)) - -#define COLOR_BLT ((0x2<<29)|(0x40<<22)) -#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) - -#endif /* _INTEL_GPU_COMMANDS_H_ */ diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 37f60cb8e9e1..1b6d6403ee92 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -25,8 +25,9 @@ #include #include +#include "gt/intel_lrc_reg.h" + #include "intel_guc_submission.h" -#include "intel_lrc_reg.h" #include "i915_drv.h" #define GUC_PREEMPT_FINISHED 0x1 diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h index aa5e6749c925..7d823a513b9c 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/intel_guc_submission.h @@ -27,9 +27,10 @@ #include +#include "gt/intel_engine_types.h" + #include "i915_gem.h" #include "i915_selftest.h" -#include "intel_engine_types.h" struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c deleted file mode 100644 index 3d51ed1428d4..000000000000 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "i915_reset.h" - -struct hangcheck { - u64 acthd; - u32 seqno; - enum intel_engine_hangcheck_action action; - unsigned long action_timestamp; - int deadlock; - struct intel_instdone instdone; - bool wedged:1; - bool stalled:1; -}; - -static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) -{ - u32 tmp = current_instdone | *old_instdone; - bool unchanged; - - unchanged = tmp == *old_instdone; - *old_instdone |= tmp; - - return unchanged; -} - -static bool subunits_stuck(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_instdone instdone; - struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; - bool stuck; - int slice; - int subslice; - - if (engine->id != RCS0) - return true; - - intel_engine_get_instdone(engine, &instdone); - - /* There might be unstable subunit states even when - * actual head is not moving. Filter out the unstable ones by - * accumulating the undone -> done transitions and only - * consider those as progress. - */ - stuck = instdone_unchanged(instdone.instdone, - &accu_instdone->instdone); - stuck &= instdone_unchanged(instdone.slice_common, - &accu_instdone->slice_common); - - for_each_instdone_slice_subslice(dev_priv, slice, subslice) { - stuck &= instdone_unchanged(instdone.sampler[slice][subslice], - &accu_instdone->sampler[slice][subslice]); - stuck &= instdone_unchanged(instdone.row[slice][subslice], - &accu_instdone->row[slice][subslice]); - } - - return stuck; -} - -static enum intel_engine_hangcheck_action -head_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - if (acthd != engine->hangcheck.acthd) { - - /* Clear subunit states on head movement */ - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - return ENGINE_ACTIVE_HEAD; - } - - if (!subunits_stuck(engine)) - return ENGINE_ACTIVE_SUBUNITS; - - return ENGINE_DEAD; -} - -static enum intel_engine_hangcheck_action -engine_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - struct drm_i915_private *dev_priv = engine->i915; - enum intel_engine_hangcheck_action ha; - u32 tmp; - - ha = head_stuck(engine, acthd); - if (ha != ENGINE_DEAD) - return ha; - - if (IS_GEN(dev_priv, 2)) - return ENGINE_DEAD; - - /* Is the chip hanging on a WAIT_FOR_EVENT? - * If so we can simply poke the RB_WAIT bit - * and break the hang. This should work on - * all but the second generation chipsets. - */ - tmp = ENGINE_READ(engine, RING_CTL); - if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, engine->mask, 0, - "stuck wait on %s", engine->name); - ENGINE_WRITE(engine, RING_CTL, tmp); - return ENGINE_WAIT_KICK; - } - - return ENGINE_DEAD; -} - -static void hangcheck_load_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - hc->acthd = intel_engine_get_active_head(engine); - hc->seqno = intel_engine_get_hangcheck_seqno(engine); -} - -static void hangcheck_store_sample(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - engine->hangcheck.acthd = hc->acthd; - engine->hangcheck.last_seqno = hc->seqno; -} - -static enum intel_engine_hangcheck_action -hangcheck_get_action(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - if (engine->hangcheck.last_seqno != hc->seqno) - return ENGINE_ACTIVE_SEQNO; - - if (intel_engine_is_idle(engine)) - return ENGINE_IDLE; - - return engine_stuck(engine, hc->acthd); -} - -static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; - - hc->action = hangcheck_get_action(engine, hc); - - /* We always increment the progress - * if the engine is busy and still processing - * the same request, so that no single request - * can run indefinitely (such as a chain of - * batches). The only time we do not increment - * the hangcheck score on this ring, if this - * engine is in a legitimate wait for another - * engine. In that case the waiting engine is a - * victim and we want to be sure we catch the - * right culprit. Then every time we do kick - * the ring, make it as a progress as the seqno - * advancement might ensure and if not, it - * will catch the hanging engine. - */ - - switch (hc->action) { - case ENGINE_IDLE: - case ENGINE_ACTIVE_SEQNO: - /* Clear head and subunit states on seqno movement */ - hc->acthd = 0; - - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - /* Intentional fall through */ - case ENGINE_WAIT_KICK: - case ENGINE_WAIT: - engine->hangcheck.action_timestamp = jiffies; - break; - - case ENGINE_ACTIVE_HEAD: - case ENGINE_ACTIVE_SUBUNITS: - /* - * Seqno stuck with still active engine gets leeway, - * in hopes that it is just a long shader. - */ - timeout = I915_SEQNO_DEAD_TIMEOUT; - break; - - case ENGINE_DEAD: - break; - - default: - MISSING_CASE(hc->action); - } - - hc->stalled = time_after(jiffies, - engine->hangcheck.action_timestamp + timeout); - hc->wedged = time_after(jiffies, - engine->hangcheck.action_timestamp + - I915_ENGINE_WEDGED_TIMEOUT); -} - -static void hangcheck_declare_hang(struct drm_i915_private *i915, - unsigned int hung, - unsigned int stuck) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - char msg[80]; - int len; - - /* If some rings hung but others were still busy, only - * blame the hanging rings in the synopsis. - */ - if (stuck != hung) - hung &= ~stuck; - len = scnprintf(msg, sizeof(msg), - "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, i915, hung, tmp) - len += scnprintf(msg + len, sizeof(msg) - len, - "%s, ", engine->name); - msg[len-2] = '\0'; - - return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg); -} - -/* - * This is called when the chip hasn't reported back with completed - * batchbuffers in a long time. We keep track per ring seqno progress and - * if there are no progress, hangcheck score for that ring is increased. - * Further, acthd is inspected to see if the ring is stuck. On stuck case - * we kick the ring. If we see no progress on three subsequent calls - * we assume chip is wedged and try to fix it by resetting the chip. - */ -static void i915_hangcheck_elapsed(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), - gpu_error.hangcheck_work.work); - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int hung = 0, stuck = 0, wedged = 0; - - if (!i915_modparams.enable_hangcheck) - return; - - if (!READ_ONCE(dev_priv->gt.awake)) - return; - - if (i915_terminally_wedged(dev_priv)) - return; - - /* As enabling the GPU requires fairly extensive mmio access, - * periodically arm the mmio checker to see if we are triggering - * any invalid access. - */ - intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore); - - for_each_engine(engine, dev_priv, id) { - struct hangcheck hc; - - intel_engine_signal_breadcrumbs(engine); - - hangcheck_load_sample(engine, &hc); - hangcheck_accumulate_sample(engine, &hc); - hangcheck_store_sample(engine, &hc); - - if (hc.stalled) { - hung |= engine->mask; - if (hc.action != ENGINE_DEAD) - stuck |= engine->mask; - } - - if (hc.wedged) - wedged |= engine->mask; - } - - if (GEM_SHOW_DEBUG() && (hung | stuck)) { - struct drm_printer p = drm_debug_printer("hangcheck"); - - for_each_engine(engine, dev_priv, id) { - if (intel_engine_is_idle(engine)) - continue; - - intel_engine_dump(engine, &p, "%s\n", engine->name); - } - } - - if (wedged) { - dev_err(dev_priv->drm.dev, - "GPU recovery timed out," - " cancelling all in-flight rendering.\n"); - GEM_TRACE_DUMP(); - i915_gem_set_wedged(dev_priv); - } - - if (hung) - hangcheck_declare_hang(dev_priv, hung, stuck); - - /* Reset timer in case GPU hangs without another request being added */ - i915_queue_hangcheck(dev_priv); -} - -void intel_engine_init_hangcheck(struct intel_engine_cs *engine) -{ - memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); - engine->hangcheck.action_timestamp = jiffies; -} - -void intel_hangcheck_init(struct drm_i915_private *i915) -{ - INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, - i915_hangcheck_elapsed); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_hangcheck.c" -#endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c deleted file mode 100644 index 18a9dc6ca877..000000000000 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ /dev/null @@ -1,2909 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Ben Widawsky - * Michel Thierry - * Thomas Daniel - * Oscar Mateo - * - */ - -/** - * DOC: Logical Rings, Logical Ring Contexts and Execlists - * - * Motivation: - * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". - * These expanded contexts enable a number of new abilities, especially - * "Execlists" (also implemented in this file). - * - * One of the main differences with the legacy HW contexts is that logical - * ring contexts incorporate many more things to the context's state, like - * PDPs or ringbuffer control registers: - * - * The reason why PDPs are included in the context is straightforward: as - * PPGTTs (per-process GTTs) are actually per-context, having the PDPs - * contained there mean you don't need to do a ppgtt->switch_mm yourself, - * instead, the GPU will do it for you on the context switch. - * - * But, what about the ringbuffer control registers (head, tail, etc..)? - * shouldn't we just need a set of those per engine command streamer? This is - * where the name "Logical Rings" starts to make sense: by virtualizing the - * rings, the engine cs shifts to a new "ring buffer" with every context - * switch. When you want to submit a workload to the GPU you: A) choose your - * context, B) find its appropriate virtualized ring, C) write commands to it - * and then, finally, D) tell the GPU to switch to that context. - * - * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch - * to a contexts is via a context execution list, ergo "Execlists". - * - * LRC implementation: - * Regarding the creation of contexts, we have: - * - * - One global default context. - * - One local default context for each opened fd. - * - One local extra context for each context create ioctl call. - * - * Now that ringbuffers belong per-context (and not per-engine, like before) - * and that contexts are uniquely tied to a given engine (and not reusable, - * like before) we need: - * - * - One ringbuffer per-engine inside each context. - * - One backing object per-engine inside each context. - * - * The global default context starts its life with these new objects fully - * allocated and populated. The local default context for each opened fd is - * more complex, because we don't know at creation time which engine is going - * to use them. To handle this, we have implemented a deferred creation of LR - * contexts: - * - * The local context starts its life as a hollow or blank holder, that only - * gets populated for a given engine once we receive an execbuffer. If later - * on we receive another execbuffer ioctl for the same context but a different - * engine, we allocate/populate a new ringbuffer and context backing object and - * so on. - * - * Finally, regarding local contexts created using the ioctl call: as they are - * only allowed with the render ring, we can allocate & populate them right - * away (no need to defer anything, at least for now). - * - * Execlists implementation: - * Execlists are the new method by which, on gen8+ hardware, workloads are - * submitted for execution (as opposed to the legacy, ringbuffer-based, method). - * This method works as follows: - * - * When a request is committed, its commands (the BB start and any leading or - * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer - * for the appropriate context. The tail pointer in the hardware context is not - * updated at this time, but instead, kept by the driver in the ringbuffer - * structure. A structure representing this request is added to a request queue - * for the appropriate engine: this structure contains a copy of the context's - * tail after the request was written to the ring buffer and a pointer to the - * context itself. - * - * If the engine's request queue was empty before the request was added, the - * queue is processed immediately. Otherwise the queue will be processed during - * a context switch interrupt. In any case, elements on the queue will get sent - * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a - * globally unique 20-bits submission ID. - * - * When execution of a request completes, the GPU updates the context status - * buffer with a context complete event and generates a context switch interrupt. - * During the interrupt handling, the driver examines the events in the buffer: - * for each context complete event, if the announced ID matches that on the head - * of the request queue, then that request is retired and removed from the queue. - * - * After processing, if any requests were retired and the queue is not empty - * then a new execution list can be submitted. The two requests at the front of - * the queue are next to be submitted but since a context may not occur twice in - * an execution list, if subsequent requests have the same ID as the first then - * the two requests must be combined. This is done simply by discarding requests - * at the head of the queue until either only one requests is left (in which case - * we use a NULL second context) or the first two requests have unique IDs. - * - * By always executing the first two requests in the queue the driver ensures - * that the GPU is kept as busy as possible. In the case where a single context - * completes but a second context is still executing, the request for this second - * context will be at the head of the queue when we remove the first one. This - * request will then be resubmitted along with a new request for a different context, - * which will cause the hardware to continue executing the second request and queue - * the new request (the GPU detects the condition of a context getting preempted - * with the same context and optimizes the context switch flow by not doing - * preemption, but just sampling the new tail pointer). - * - */ -#include - -#include -#include "i915_drv.h" -#include "i915_gem_render_state.h" -#include "i915_reset.h" -#include "i915_vgpu.h" -#include "intel_lrc_reg.h" -#include "intel_mocs.h" -#include "intel_workarounds.h" - -#define RING_EXECLIST_QFULL (1 << 0x2) -#define RING_EXECLIST1_VALID (1 << 0x3) -#define RING_EXECLIST0_VALID (1 << 0x4) -#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) -#define RING_EXECLIST1_ACTIVE (1 << 0x11) -#define RING_EXECLIST0_ACTIVE (1 << 0x12) - -#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) -#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) -#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) -#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) -#define GEN8_CTX_STATUS_COMPLETE (1 << 4) -#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) - -#define GEN8_CTX_STATUS_COMPLETED_MASK \ - (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) - -/* Typical size of the average request (2 pipecontrols and a MI_BB) */ -#define EXECLISTS_REQUEST_SIZE 64 /* bytes */ -#define WA_TAIL_DWORDS 2 -#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) - -#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE) - -static int execlists_context_deferred_alloc(struct intel_context *ce, - struct intel_engine_cs *engine); -static void execlists_init_reg_state(u32 *reg_state, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring); - -static inline struct i915_priolist *to_priolist(struct rb_node *rb) -{ - return rb_entry(rb, struct i915_priolist, node); -} - -static inline int rq_prio(const struct i915_request *rq) -{ - return rq->sched.attr.priority; -} - -static int effective_prio(const struct i915_request *rq) -{ - int prio = rq_prio(rq); - - /* - * On unwinding the active request, we give it a priority bump - * equivalent to a freshly submitted request. This protects it from - * being gazumped again, but it would be preferable if we didn't - * let it be gazumped in the first place! - * - * See __unwind_incomplete_requests() - */ - if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(rq)) { - /* - * After preemption, we insert the active request at the - * end of the new priority level. This means that we will be - * _lower_ priority than the preemptee all things equal (and - * so the preemption is valid), so adjust our comparison - * accordingly. - */ - prio |= ACTIVE_PRIORITY; - prio--; - } - - /* Restrict mere WAIT boosts from triggering preemption */ - return prio | __NO_PREEMPTION; -} - -static int queue_prio(const struct intel_engine_execlists *execlists) -{ - struct i915_priolist *p; - struct rb_node *rb; - - rb = rb_first_cached(&execlists->queue); - if (!rb) - return INT_MIN; - - /* - * As the priolist[] are inverted, with the highest priority in [0], - * we have to flip the index value to become priority. - */ - p = to_priolist(rb); - return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); -} - -static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *rq) -{ - int last_prio; - - if (!engine->preempt_context) - return false; - - if (i915_request_completed(rq)) - return false; - - /* - * Check if the current priority hint merits a preemption attempt. - * - * We record the highest value priority we saw during rescheduling - * prior to this dequeue, therefore we know that if it is strictly - * less than the current tail of ESLP[0], we do not need to force - * a preempt-to-idle cycle. - * - * However, the priority hint is a mere hint that we may need to - * preempt. If that hint is stale or we may be trying to preempt - * ourselves, ignore the request. - */ - last_prio = effective_prio(rq); - if (!__execlists_need_preempt(engine->execlists.queue_priority_hint, - last_prio)) - return false; - - /* - * Check against the first request in ELSP[1], it will, thanks to the - * power of PI, be the highest priority of that context. - */ - if (!list_is_last(&rq->link, &engine->timeline.requests) && - rq_prio(list_next_entry(rq, link)) > last_prio) - return true; - - /* - * If the inflight context did not trigger the preemption, then maybe - * it was the set of queued requests? Pick the highest priority in - * the queue (the first active priolist) and see if it deserves to be - * running instead of ELSP[0]. - * - * The highest priority request in the queue can not be either - * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same - * context, it's priority would not exceed ELSP[0] aka last_prio. - */ - return queue_prio(&engine->execlists) > last_prio; -} - -__maybe_unused static inline bool -assert_priority_queue(const struct i915_request *prev, - const struct i915_request *next) -{ - const struct intel_engine_execlists *execlists = - &prev->engine->execlists; - - /* - * Without preemption, the prev may refer to the still active element - * which we refuse to let go. - * - * Even with preemption, there are times when we think it is better not - * to preempt and leave an ostensibly lower priority request in flight. - */ - if (port_request(execlists->port) == prev) - return true; - - return rq_prio(prev) >= rq_prio(next); -} - -/* - * The context descriptor encodes various attributes of a context, - * including its GTT address and some flags. Because it's fairly - * expensive to calculate, we'll just do it once and cache the result, - * which remains valid until the context is unpinned. - * - * This is what a descriptor looks like, from LSB to MSB:: - * - * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) - * bits 12-31: LRCA, GTT address of (the HWSP of) this context - * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) - * bits 53-54: mbz, reserved for use by hardware - * bits 55-63: group ID, currently unused and set to 0 - * - * Starting from Gen11, the upper dword of the descriptor has a new format: - * - * bits 32-36: reserved - * bits 37-47: SW context ID - * bits 48:53: engine instance - * bit 54: mbz, reserved for use by hardware - * bits 55-60: SW counter - * bits 61-63: engine class - * - * engine info, SW context ID and SW counter need to form a unique number - * (Context ID) per lrc. - */ -static u64 -lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) -{ - struct i915_gem_context *ctx = ce->gem_context; - u64 desc; - - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); - - desc = ctx->desc_template; /* bits 0-11 */ - GEM_BUG_ON(desc & GENMASK_ULL(63, 12)); - - desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; - /* bits 12-31 */ - GEM_BUG_ON(desc & GENMASK_ULL(63, 32)); - - /* - * The following 32bits are copied into the OA reports (dword 2). - * Consider updating oa_get_render_ctx_id in i915_perf.c when changing - * anything below. - */ - if (INTEL_GEN(engine->i915) >= 11) { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; - /* bits 37-47 */ - - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; - /* bits 48-53 */ - - /* TODO: decide what to do with SW counter (bits 55-60) */ - - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; - /* bits 61-63 */ - } else { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ - } - - return desc; -} - -static void unwind_wa_tail(struct i915_request *rq) -{ - rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); - assert_ring_tail_valid(rq->ring, rq->tail); -} - -static struct i915_request * -__unwind_incomplete_requests(struct intel_engine_cs *engine) -{ - struct i915_request *rq, *rn, *active = NULL; - struct list_head *uninitialized_var(pl); - int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY; - - lockdep_assert_held(&engine->timeline.lock); - - list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline.requests, - link) { - if (i915_request_completed(rq)) - break; - - __i915_request_unsubmit(rq); - unwind_wa_tail(rq); - - GEM_BUG_ON(rq->hw_context->active); - - GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); - if (rq_prio(rq) != prio) { - prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); - } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - - list_add(&rq->sched.link, pl); - - active = rq; - } - - /* - * The active request is now effectively the start of a new client - * stream, so give it the equivalent small priority bump to prevent - * it being gazumped a second time by another peer. - * - * Note we have to be careful not to apply a priority boost to a request - * still spinning on its semaphores. If the request hasn't started, that - * means it is still waiting for its dependencies to be signaled, and - * if we apply a priority boost to this request, we will boost it past - * its signalers and so break PI. - * - * One consequence of this preemption boost is that we may jump - * over lesser priorities (such as I915_PRIORITY_WAIT), effectively - * making those priorities non-preemptible. They will be moved forward - * in the priority queue, but they will not gain immediate access to - * the GPU. - */ - if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) { - prio |= ACTIVE_PRIORITY; - active->sched.attr.priority = prio; - list_move_tail(&active->sched.link, - i915_sched_lookup_priolist(engine, prio)); - } - - return active; -} - -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) -{ - struct intel_engine_cs *engine = - container_of(execlists, typeof(*engine), execlists); - - return __unwind_incomplete_requests(engine); -} - -static inline void -execlists_context_status_change(struct i915_request *rq, unsigned long status) -{ - /* - * Only used when GVT-g is enabled now. When GVT-g is disabled, - * The compiler should eliminate this function as dead-code. - */ - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return; - - atomic_notifier_call_chain(&rq->engine->context_status_notifier, - status, rq); -} - -inline void -execlists_user_begin(struct intel_engine_execlists *execlists, - const struct execlist_port *port) -{ - execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); -} - -inline void -execlists_user_end(struct intel_engine_execlists *execlists) -{ - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); -} - -static inline void -execlists_context_schedule_in(struct i915_request *rq) -{ - GEM_BUG_ON(rq->hw_context->active); - - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - intel_engine_context_in(rq->engine); - rq->hw_context->active = rq->engine; -} - -static inline void -execlists_context_schedule_out(struct i915_request *rq, unsigned long status) -{ - rq->hw_context->active = NULL; - intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, status); - trace_i915_request_out(rq); -} - -static u64 execlists_update_context(struct i915_request *rq) -{ - struct intel_context *ce = rq->hw_context; - - ce->lrc_reg_state[CTX_RING_TAIL + 1] = - intel_ring_set_tail(rq->ring, rq->tail); - - /* - * Make sure the context image is complete before we submit it to HW. - * - * Ostensibly, writes (including the WCB) should be flushed prior to - * an uncached write such as our mmio register access, the empirical - * evidence (esp. on Braswell) suggests that the WC write into memory - * may not be visible to the HW prior to the completion of the UC - * register write and that we may begin execution from the context - * before its image is complete leading to invalid PD chasing. - * - * Furthermore, Braswell, at least, wants a full mb to be sure that - * the writes are coherent in memory (visible to the GPU) prior to - * execution, and not just visible to other CPUs (as is the result of - * wmb). - */ - mb(); - return ce->lrc_desc; -} - -static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) -{ - if (execlists->ctrl_reg) { - writel(lower_32_bits(desc), execlists->submit_reg + port * 2); - writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); - } else { - writel(upper_32_bits(desc), execlists->submit_reg); - writel(lower_32_bits(desc), execlists->submit_reg); - } -} - -static void execlists_submit_ports(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - unsigned int n; - - /* - * We can skip acquiring intel_runtime_pm_get() here as it was taken - * on our behalf by the request (see i915_gem_mark_busy()) and it will - * not be relinquished until the device is idle (see - * i915_gem_idle_work_handler()). As a precaution, we make sure - * that all ELSP are drained i.e. we have processed the CSB, - * before allowing ourselves to idle and calling intel_runtime_pm_put(). - */ - GEM_BUG_ON(!engine->i915->gt.awake); - - /* - * ELSQ note: the submit queue is not cleared after being submitted - * to the HW so we need to make sure we always clean it up. This is - * currently ensured by the fact that we always write the same number - * of elsq entries, keep this in mind before changing the loop below. - */ - for (n = execlists_num_ports(execlists); n--; ) { - struct i915_request *rq; - unsigned int count; - u64 desc; - - rq = port_unpack(&port[n], &count); - if (rq) { - GEM_BUG_ON(count > !n); - if (!count++) - execlists_context_schedule_in(rq); - port_set(&port[n], port_pack(rq, count)); - desc = execlists_update_context(rq); - GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - - GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, n, - port[n].context_id, count, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq), - rq_prio(rq)); - } else { - GEM_BUG_ON(!n); - desc = 0; - } - - write_desc(execlists, desc, n); - } - - /* we need to manually load the submit queue */ - if (execlists->ctrl_reg) - writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); -} - -static bool ctx_single_port_submission(const struct intel_context *ce) -{ - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - i915_gem_context_force_single_submission(ce->gem_context)); -} - -static bool can_merge_ctx(const struct intel_context *prev, - const struct intel_context *next) -{ - if (prev != next) - return false; - - if (ctx_single_port_submission(prev)) - return false; - - return true; -} - -static bool can_merge_rq(const struct i915_request *prev, - const struct i915_request *next) -{ - GEM_BUG_ON(!assert_priority_queue(prev, next)); - - if (!can_merge_ctx(prev->hw_context, next->hw_context)) - return false; - - return true; -} - -static void port_assign(struct execlist_port *port, struct i915_request *rq) -{ - GEM_BUG_ON(rq == port_request(port)); - - if (port_isset(port)) - i915_request_put(port_request(port)); - - port_set(port, port_pack(i915_request_get(rq), port_count(port))); -} - -static void inject_preempt_context(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - struct intel_context *ce = engine->preempt_context; - unsigned int n; - - GEM_BUG_ON(execlists->preempt_complete_status != - upper_32_bits(ce->lrc_desc)); - - /* - * Switch to our empty preempt context so - * the state of the GPU is known (idle). - */ - GEM_TRACE("%s\n", engine->name); - for (n = execlists_num_ports(execlists); --n; ) - write_desc(execlists, 0, n); - - write_desc(execlists, ce->lrc_desc, n); - - /* we need to manually load the submit queue */ - if (execlists->ctrl_reg) - writel(EL_CTRL_LOAD, execlists->ctrl_reg); - - execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); - execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); - - (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); -} - -static void complete_preempt_context(struct intel_engine_execlists *execlists) -{ - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); - - if (inject_preempt_hang(execlists)) - return; - - execlists_cancel_port_requests(execlists); - __unwind_incomplete_requests(container_of(execlists, - struct intel_engine_cs, - execlists)); -} - -static void execlists_dequeue(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - const struct execlist_port * const last_port = - &execlists->port[execlists->port_mask]; - struct i915_request *last = port_request(port); - struct rb_node *rb; - bool submit = false; - - /* - * Hardware submission is through 2 ports. Conceptually each port - * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is - * static for a context, and unique to each, so we only execute - * requests belonging to a single context from each ring. RING_HEAD - * is maintained by the CS in the context image, it marks the place - * where it got up to last time, and through RING_TAIL we tell the CS - * where we want to execute up to this time. - * - * In this list the requests are in order of execution. Consecutive - * requests from the same context are adjacent in the ringbuffer. We - * can combine these requests into a single RING_TAIL update: - * - * RING_HEAD...req1...req2 - * ^- RING_TAIL - * since to execute req2 the CS must first execute req1. - * - * Our goal then is to point each port to the end of a consecutive - * sequence of requests as being the most optimal (fewest wake ups - * and context switches) submission. - */ - - if (last) { - /* - * Don't resubmit or switch until all outstanding - * preemptions (lite-restore) are seen. Then we - * know the next preemption status we see corresponds - * to this ELSP update. - */ - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - GEM_BUG_ON(!port_count(&port[0])); - - /* - * If we write to ELSP a second time before the HW has had - * a chance to respond to the previous write, we can confuse - * the HW and hit "undefined behaviour". After writing to ELSP, - * we must then wait until we see a context-switch event from - * the HW to indicate that it has had a chance to respond. - */ - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - return; - - if (need_preempt(engine, last)) { - inject_preempt_context(engine); - return; - } - - /* - * In theory, we could coalesce more requests onto - * the second port (the first port is active, with - * no preemptions pending). However, that means we - * then have to deal with the possible lite-restore - * of the second port (as we submit the ELSP, there - * may be a context-switch) but also we may complete - * the resubmission before the context-switch. Ergo, - * coalescing onto the second port will cause a - * preemption event, but we cannot predict whether - * that will affect port[0] or port[1]. - * - * If the second port is already active, we can wait - * until the next context-switch before contemplating - * new requests. The GPU will be busy and we should be - * able to resubmit the new ELSP before it idles, - * avoiding pipeline bubbles (momentary pauses where - * the driver is unable to keep up the supply of new - * work). However, we have to double check that the - * priorities of the ports haven't been switch. - */ - if (port_count(&port[1])) - return; - - /* - * WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_fini_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; - } - - while ((rb = rb_first_cached(&execlists->queue))) { - struct i915_priolist *p = to_priolist(rb); - struct i915_request *rq, *rn; - int i; - - priolist_for_each_request_consume(rq, rn, p, i) { - /* - * Can we combine this request with the current port? - * It has to be the same context/ringbuffer and not - * have any exceptions (e.g. GVT saying never to - * combine contexts). - * - * If we can combine the requests, we can execute both - * by updating the RING_TAIL to point to the end of the - * second request, and so we never need to tell the - * hardware about the first. - */ - if (last && !can_merge_rq(last, rq)) { - /* - * If we are on the second port and cannot - * combine this request with the last, then we - * are done. - */ - if (port == last_port) - goto done; - - /* - * We must not populate both ELSP[] with the - * same LRCA, i.e. we must submit 2 different - * contexts if we submit 2 ELSP. - */ - if (last->hw_context == rq->hw_context) - goto done; - - /* - * If GVT overrides us we only ever submit - * port[0], leaving port[1] empty. Note that we - * also have to be careful that we don't queue - * the same context (even though a different - * request) to the second port. - */ - if (ctx_single_port_submission(last->hw_context) || - ctx_single_port_submission(rq->hw_context)) - goto done; - - - if (submit) - port_assign(port, last); - port++; - - GEM_BUG_ON(port_isset(port)); - } - - list_del_init(&rq->sched.link); - - __i915_request_submit(rq); - trace_i915_request_in(rq, port_index(port, execlists)); - - last = rq; - submit = true; - } - - rb_erase_cached(&p->node, &execlists->queue); - i915_priolist_free(p); - } - -done: - /* - * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. - * - * We choose the priority hint such that if we add a request of greater - * priority than this, we kick the submission tasklet to decide on - * the right order of submitting the requests to hardware. We must - * also be prepared to reorder requests as they are in-flight on the - * HW. We derive the priority hint then as the first "hole" in - * the HW submission ports and if there are no available slots, - * the priority of the lowest executing request, i.e. last. - * - * When we do receive a higher priority request ready to run from the - * user, see queue_request(), the priority hint is bumped to that - * request triggering preemption on the next dequeue (or subsequent - * interrupt for secondary ports). - */ - execlists->queue_priority_hint = queue_prio(execlists); - - if (submit) { - port_assign(port, last); - execlists_submit_ports(engine); - } - - /* We must always keep the beast fed if we have work piled up */ - GEM_BUG_ON(rb_first_cached(&execlists->queue) && - !port_isset(execlists->port)); - - /* Re-evaluate the executing context setup after each preemptive kick */ - if (last) - execlists_user_begin(execlists, execlists->port); - - /* If the engine is now idle, so should be the flag; and vice versa. */ - GEM_BUG_ON(execlists_is_active(&engine->execlists, - EXECLISTS_ACTIVE_USER) == - !port_isset(engine->execlists.port)); -} - -void -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) -{ - struct execlist_port *port = execlists->port; - unsigned int num_ports = execlists_num_ports(execlists); - - while (num_ports-- && port_isset(port)) { - struct i915_request *rq = port_request(port); - - GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", - rq->engine->name, - (unsigned int)(port - execlists->port), - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); - - GEM_BUG_ON(!execlists->active); - execlists_context_schedule_out(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); - - i915_request_put(rq); - - memset(port, 0, sizeof(*port)); - port++; - } - - execlists_clear_all_active(execlists); -} - -static inline void -invalidate_csb_entries(const u32 *first, const u32 *last) -{ - clflush((void *)first); - clflush((void *)last); -} - -static inline bool -reset_in_progress(const struct intel_engine_execlists *execlists) -{ - return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); -} - -static void process_csb(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; - const u32 * const buf = execlists->csb_status; - const u8 num_entries = execlists->csb_size; - u8 head, tail; - - lockdep_assert_held(&engine->timeline.lock); - - /* - * Note that csb_write, csb_status may be either in HWSP or mmio. - * When reading from the csb_write mmio register, we have to be - * careful to only use the GEN8_CSB_WRITE_PTR portion, which is - * the low 4bits. As it happens we know the next 4bits are always - * zero and so we can simply masked off the low u8 of the register - * and treat it identically to reading from the HWSP (without having - * to use explicit shifting and masking, and probably bifurcating - * the code to handle the legacy mmio read). - */ - head = execlists->csb_head; - tail = READ_ONCE(*execlists->csb_write); - GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail); - if (unlikely(head == tail)) - return; - - /* - * Hopefully paired with a wmb() in HW! - * - * We must complete the read of the write pointer before any reads - * from the CSB, so that we do not see stale values. Without an rmb - * (lfence) the HW may speculatively perform the CSB[] reads *before* - * we perform the READ_ONCE(*csb_write). - */ - rmb(); - - do { - struct i915_request *rq; - unsigned int status; - unsigned int count; - - if (++head == num_entries) - head = 0; - - /* - * We are flying near dragons again. - * - * We hold a reference to the request in execlist_port[] - * but no more than that. We are operating in softirq - * context and so cannot hold any mutex or sleep. That - * prevents us stopping the requests we are processing - * in port[] from being retired simultaneously (the - * breadcrumb will be complete before we see the - * context-switch). As we only hold the reference to the - * request, any pointer chasing underneath the request - * is subject to a potential use-after-free. Thus we - * store all of the bookkeeping within port[] as - * required, and avoid using unguarded pointers beneath - * request itself. The same applies to the atomic - * status notifier. - */ - - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", - engine->name, head, - buf[2 * head + 0], buf[2 * head + 1], - execlists->active); - - status = buf[2 * head]; - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | - GEN8_CTX_STATUS_PREEMPTED)) - execlists_set_active(execlists, - EXECLISTS_ACTIVE_HWACK); - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_HWACK); - - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) - continue; - - /* We should never get a COMPLETED | IDLE_ACTIVE! */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); - - if (status & GEN8_CTX_STATUS_COMPLETE && - buf[2*head + 1] == execlists->preempt_complete_status) { - GEM_TRACE("%s preempt-idle\n", engine->name); - complete_preempt_context(execlists); - continue; - } - - if (status & GEN8_CTX_STATUS_PREEMPTED && - execlists_is_active(execlists, - EXECLISTS_ACTIVE_PREEMPT)) - continue; - - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - - rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", - engine->name, - port->context_id, count, - rq ? rq->fence.context : 0, - rq ? rq->fence.seqno : 0, - rq ? hwsp_seqno(rq) : 0, - rq ? rq_prio(rq) : 0); - - /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); - - GEM_BUG_ON(count == 0); - if (--count == 0) { - /* - * On the final event corresponding to the - * submission of this context, we expect either - * an element-switch event or a completion - * event (and on completion, the active-idle - * marker). No more preemptions, lite-restore - * or otherwise. - */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - - /* - * We rely on the hardware being strongly - * ordered, that the breadcrumb write is - * coherent (visible from the CPU) before the - * user interrupt and CSB is processed. - */ - GEM_BUG_ON(!i915_request_completed(rq)); - - execlists_context_schedule_out(rq, - INTEL_CONTEXT_SCHEDULE_OUT); - i915_request_put(rq); - - GEM_TRACE("%s completed ctx=%d\n", - engine->name, port->context_id); - - port = execlists_port_complete(execlists, port); - if (port_isset(port)) - execlists_user_begin(execlists, port); - else - execlists_user_end(execlists); - } else { - port_set(port, port_pack(rq, count)); - } - } while (head != tail); - - execlists->csb_head = head; - - /* - * Gen11 has proven to fail wrt global observation point between - * entry and tail update, failing on the ordering and thus - * we see an old entry in the context status buffer. - * - * Forcibly evict out entries for the next gpu csb update, - * to increase the odds that we get a fresh entries with non - * working hardware. The cost for doing so comes out mostly with - * the wash as hardware, working or not, will need to do the - * invalidation before. - */ - invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); -} - -static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) -{ - lockdep_assert_held(&engine->timeline.lock); - - process_csb(engine); - if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) - execlists_dequeue(engine); -} - -/* - * Check the unread Context Status Buffers and manage the submission of new - * contexts to the ELSP accordingly. - */ -static void execlists_submission_tasklet(unsigned long data) -{ - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - unsigned long flags; - - GEM_TRACE("%s awake?=%d, active=%x\n", - engine->name, - !!engine->i915->gt.awake, - engine->execlists.active); - - spin_lock_irqsave(&engine->timeline.lock, flags); - __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -static void queue_request(struct intel_engine_cs *engine, - struct i915_sched_node *node, - int prio) -{ - list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); -} - -static void __submit_queue_imm(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - - if (reset_in_progress(execlists)) - return; /* defer until we restart the engine following reset */ - - if (execlists->tasklet.func == execlists_submission_tasklet) - __execlists_submission_tasklet(engine); - else - tasklet_hi_schedule(&execlists->tasklet); -} - -static void submit_queue(struct intel_engine_cs *engine, int prio) -{ - if (prio > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = prio; - __submit_queue_imm(engine); - } -} - -static void execlists_submit_request(struct i915_request *request) -{ - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); - - queue_request(engine, &request->sched, rq_prio(request)); - - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&request->sched.link)); - - submit_queue(engine, rq_prio(request)); - - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -static void __execlists_context_fini(struct intel_context *ce) -{ - intel_ring_put(ce->ring); - - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); - i915_gem_object_put(ce->state->obj); -} - -static void execlists_context_destroy(struct kref *kref) -{ - struct intel_context *ce = container_of(kref, typeof(*ce), ref); - - GEM_BUG_ON(intel_context_is_pinned(ce)); - - if (ce->state) - __execlists_context_fini(ce); - - intel_context_free(ce); -} - -static int __context_pin(struct i915_vma *vma) -{ - unsigned int flags; - int err; - - flags = PIN_GLOBAL | PIN_HIGH; - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - return err; - - vma->obj->pin_global++; - vma->obj->mm.dirty = true; - - return 0; -} - -static void __context_unpin(struct i915_vma *vma) -{ - vma->obj->pin_global--; - __i915_vma_unpin(vma); -} - -static void execlists_context_unpin(struct intel_context *ce) -{ - struct intel_engine_cs *engine; - - /* - * The tasklet may still be using a pointer to our state, via an - * old request. However, since we know we only unpin the context - * on retirement of the following request, we know that the last - * request referencing us will have had a completion CS interrupt. - * If we see that it is still active, it means that the tasklet hasn't - * had the chance to run yet; let it run before we teardown the - * reference it may use. - */ - engine = READ_ONCE(ce->active); - if (unlikely(engine)) { - unsigned long flags; - - spin_lock_irqsave(&engine->timeline.lock, flags); - process_csb(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); - - GEM_BUG_ON(READ_ONCE(ce->active)); - } - - i915_gem_context_unpin_hw_id(ce->gem_context); - - intel_ring_unpin(ce->ring); - - i915_gem_object_unpin_map(ce->state->obj); - __context_unpin(ce->state); -} - -static void -__execlists_update_reg_state(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - struct intel_ring *ring = ce->ring; - u32 *regs = ce->lrc_reg_state; - - GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); - GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - - regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); - regs[CTX_RING_HEAD + 1] = ring->head; - regs[CTX_RING_TAIL + 1] = ring->tail; - - /* RPCS */ - if (engine->class == RENDER_CLASS) - regs[CTX_R_PWR_CLK_STATE + 1] = - intel_sseu_make_rpcs(engine->i915, &ce->sseu); -} - -static int -__execlists_context_pin(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - void *vaddr; - int ret; - - GEM_BUG_ON(!ce->gem_context->ppgtt); - - ret = execlists_context_deferred_alloc(ce, engine); - if (ret) - goto err; - GEM_BUG_ON(!ce->state); - - ret = __context_pin(ce->state); - if (ret) - goto err; - - vaddr = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(engine->i915) | - I915_MAP_OVERRIDE); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto unpin_vma; - } - - ret = intel_ring_pin(ce->ring); - if (ret) - goto unpin_map; - - ret = i915_gem_context_pin_hw_id(ce->gem_context); - if (ret) - goto unpin_ring; - - ce->lrc_desc = lrc_descriptor(ce, engine); - ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - __execlists_update_reg_state(ce, engine); - - return 0; - -unpin_ring: - intel_ring_unpin(ce->ring); -unpin_map: - i915_gem_object_unpin_map(ce->state->obj); -unpin_vma: - __context_unpin(ce->state); -err: - return ret; -} - -static int execlists_context_pin(struct intel_context *ce) -{ - return __execlists_context_pin(ce, ce->engine); -} - -static void execlists_context_reset(struct intel_context *ce) -{ - /* - * Because we emit WA_TAIL_DWORDS there may be a disparity - * between our bookkeeping in ce->ring->head and ce->ring->tail and - * that stored in context. As we only write new commands from - * ce->ring->tail onwards, everything before that is junk. If the GPU - * starts reading from its RING_HEAD from the context, it may try to - * execute that junk and die. - * - * The contexts that are stilled pinned on resume belong to the - * kernel, and are local to each engine. All other contexts will - * have their head/tail sanitized upon pinning before use, so they - * will never see garbage, - * - * So to avoid that we reset the context images upon resume. For - * simplicity, we just zero everything out. - */ - intel_ring_reset(ce->ring, 0); - __execlists_update_reg_state(ce, ce->engine); -} - -static const struct intel_context_ops execlists_context_ops = { - .pin = execlists_context_pin, - .unpin = execlists_context_unpin, - - .reset = execlists_context_reset, - .destroy = execlists_context_destroy, -}; - -static int gen8_emit_init_breadcrumb(struct i915_request *rq) -{ - u32 *cs; - - GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* - * Check if we have been preempted before we even get started. - * - * After this point i915_request_started() reports true, even if - * we get preempted and so are no longer running. - */ - *cs++ = MI_ARB_CHECK; - *cs++ = MI_NOOP; - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = rq->timeline->hwsp_offset; - *cs++ = 0; - *cs++ = rq->fence.seqno - 1; - - intel_ring_advance(rq, cs); - - /* Record the updated position of the request's payload */ - rq->infix = intel_ring_offset(rq, cs); - - return 0; -} - -static int emit_pdps(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt; - int err, i; - u32 *cs; - - GEM_BUG_ON(intel_vgpu_active(rq->i915)); - - /* - * Beware ye of the dragons, this sequence is magic! - * - * Small changes to this sequence can cause anything from - * GPU hangs to forcewake errors and machine lockups! - */ - - /* Flush any residual operations from the context load */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Ensure the LRI have landed before we invalidate & continue */ - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - u32 base = engine->mmio_base; - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - /* Be doubly sure the LRI have landed before proceeding */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Re-invalidate the TLB for luck */ - return engine->emit_flush(rq, EMIT_INVALIDATE); -} - -static int execlists_request_alloc(struct i915_request *request) -{ - int ret; - - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); - - /* - * Flush enough space to reduce the likelihood of waiting after - * we start building the request - in which case we will just - * have to repeat work. - */ - request->reserved_space += EXECLISTS_REQUEST_SIZE; - - /* - * Note that after this point, we have committed to using - * this request as it is being used to both track the - * state of engine initialisation and liveness of the - * golden renderstate above. Think twice before you try - * to cancel/unwind this request now. - */ - - /* Unconditionally invalidate GPU caches and TLBs. */ - if (i915_vm_is_4lvl(&request->gem_context->ppgtt->vm)) - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); - else - ret = emit_pdps(request); - if (ret) - return ret; - - request->reserved_space -= EXECLISTS_REQUEST_SIZE; - return 0; -} - -/* - * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after - * PIPE_CONTROL instruction. This is required for the flush to happen correctly - * but there is a slight complication as this is applied in WA batch where the - * values are only initialized once so we cannot take register value at the - * beginning and reuse it further; hence we save its value to memory, upload a - * constant value with bit21 set and then we restore it back with the saved value. - * To simplify the WA, a constant value is formed by using the default value - * of this register. This shouldn't be a problem because we are only modifying - * it for a short period and this batch in non-premptible. We can ofcourse - * use additional instructions that read the actual value of the register - * at that time and set our bit of interest but it makes the WA complicated. - * - * This WA is also required for Gen9 so extracting as a function avoids - * code duplication. - */ -static u32 * -gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) -{ - /* NB no one else is allowed to scribble over scratch + 256! */ - *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; - *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_scratch_offset(engine->i915) + 256; - *batch++ = 0; - - *batch++ = MI_LOAD_REGISTER_IMM(1); - *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; - - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE, - 0); - - *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; - *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_scratch_offset(engine->i915) + 256; - *batch++ = 0; - - return batch; -} - -/* - * Typically we only have one indirect_ctx and per_ctx batch buffer which are - * initialized at the beginning and shared across all contexts but this field - * helps us to have multiple batches at different offsets and select them based - * on a criteria. At the moment this batch always start at the beginning of the page - * and at this point we don't have multiple wa_ctx batch buffers. - * - * The number of WA applied are not known at the beginning; we use this field - * to return the no of DWORDS written. - * - * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END - * so it adds NOOPs as padding to make it cacheline aligned. - * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together - * makes a complete batch buffer. - */ -static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - /* WaDisableCtxRestoreArbitration:bdw,chv */ - *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - - /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ - if (IS_BROADWELL(engine->i915)) - batch = gen8_emit_flush_coherentl3_wa(engine, batch); - - /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ - /* Actual scratch location is at 128 bytes offset */ - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE, - i915_scratch_offset(engine->i915) + - 2 * CACHELINE_BYTES); - - *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - /* Pad to end of cacheline */ - while ((unsigned long)batch % CACHELINE_BYTES) - *batch++ = MI_NOOP; - - /* - * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because - * execution depends on the length specified in terms of cache lines - * in the register CTX_RCS_INDIRECT_CTX - */ - - return batch; -} - -struct lri { - i915_reg_t reg; - u32 value; -}; - -static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) -{ - GEM_BUG_ON(!count || count > 63); - - *batch++ = MI_LOAD_REGISTER_IMM(count); - do { - *batch++ = i915_mmio_reg_offset(lri->reg); - *batch++ = lri->value; - } while (lri++, --count); - *batch++ = MI_NOOP; - - return batch; -} - -static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - static const struct lri lri[] = { - /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ - { - COMMON_SLICE_CHICKEN2, - __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, - 0), - }, - - /* BSpec: 11391 */ - { - FF_SLICE_CHICKEN, - __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, - FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), - }, - - /* BSpec: 11299 */ - { - _3D_CHICKEN3, - __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, - _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), - } - }; - - *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ - batch = gen8_emit_flush_coherentl3_wa(engine, batch); - - batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); - - /* WaMediaPoolStateCmdInWABB:bxt,glk */ - if (HAS_POOLED_EU(engine->i915)) { - /* - * EU pool configuration is setup along with golden context - * during context initialization. This value depends on - * device type (2x6 or 3x6) and needs to be updated based - * on which subslice is disabled especially for 2x6 - * devices, however it is safe to load default - * configuration of 3x6 device instead of masking off - * corresponding bits because HW ignores bits of a disabled - * subslice and drops down to appropriate config. Please - * see render_state_setup() in i915_gem_render_state.c for - * possible configurations, to avoid duplication they are - * not shown here again. - */ - *batch++ = GEN9_MEDIA_POOL_STATE; - *batch++ = GEN9_MEDIA_POOL_ENABLE; - *batch++ = 0x00777000; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; - } - - *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - /* Pad to end of cacheline */ - while ((unsigned long)batch % CACHELINE_BYTES) - *batch++ = MI_NOOP; - - return batch; -} - -static u32 * -gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - int i; - - /* - * WaPipeControlBefore3DStateSamplePattern: cnl - * - * Ensure the engine is idle prior to programming a - * 3DSTATE_SAMPLE_PATTERN during a context restore. - */ - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_CS_STALL, - 0); - /* - * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for - * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in - * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is - * confusing. Since gen8_emit_pipe_control() already advances the - * batch by 6 dwords, we advance the other 10 here, completing a - * cacheline. It's not clear if the workaround requires this padding - * before other commands, or if it's just the regular padding we would - * already have for the workaround bb, so leave it here for now. - */ - for (i = 0; i < 10; i++) - *batch++ = MI_NOOP; - - /* Pad to end of cacheline */ - while ((unsigned long)batch % CACHELINE_BYTES) - *batch++ = MI_NOOP; - - return batch; -} - -#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) - -static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int err; - - obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (err) - goto err; - - engine->wa_ctx.vma = vma; - return 0; - -err: - i915_gem_object_put(obj); - return err; -} - -static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) -{ - i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); -} - -typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); - -static int intel_init_workaround_bb(struct intel_engine_cs *engine) -{ - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, - &wa_ctx->per_ctx }; - wa_bb_func_t wa_bb_fn[2]; - struct page *page; - void *batch, *batch_ptr; - unsigned int i; - int ret; - - if (GEM_DEBUG_WARN_ON(engine->id != RCS0)) - return -EINVAL; - - switch (INTEL_GEN(engine->i915)) { - case 11: - return 0; - case 10: - wa_bb_fn[0] = gen10_init_indirectctx_bb; - wa_bb_fn[1] = NULL; - break; - case 9: - wa_bb_fn[0] = gen9_init_indirectctx_bb; - wa_bb_fn[1] = NULL; - break; - case 8: - wa_bb_fn[0] = gen8_init_indirectctx_bb; - wa_bb_fn[1] = NULL; - break; - default: - MISSING_CASE(INTEL_GEN(engine->i915)); - return 0; - } - - ret = lrc_setup_wa_ctx(engine); - if (ret) { - DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); - return ret; - } - - page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); - batch = batch_ptr = kmap_atomic(page); - - /* - * Emit the two workaround batch buffers, recording the offset from the - * start of the workaround batch buffer object for each and their - * respective sizes. - */ - for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { - wa_bb[i]->offset = batch_ptr - batch; - if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, - CACHELINE_BYTES))) { - ret = -EINVAL; - break; - } - if (wa_bb_fn[i]) - batch_ptr = wa_bb_fn[i](engine, batch_ptr); - wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); - } - - BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); - - kunmap_atomic(batch); - if (ret) - lrc_destroy_wa_ctx(engine); - - return ret; -} - -static void enable_execlists(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ - - if (INTEL_GEN(dev_priv) >= 11) - I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); - else - I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); - - I915_WRITE(RING_MI_MODE(engine->mmio_base), - _MASKED_BIT_DISABLE(STOP_RING)); - - I915_WRITE(RING_HWS_PGA(engine->mmio_base), - i915_ggtt_offset(engine->status_page.vma)); - POSTING_READ(RING_HWS_PGA(engine->mmio_base)); -} - -static bool unexpected_starting_state(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - bool unexpected = false; - - if (I915_READ(RING_MI_MODE(engine->mmio_base)) & STOP_RING) { - DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n"); - unexpected = true; - } - - return unexpected; -} - -static int gen8_init_common_ring(struct intel_engine_cs *engine) -{ - intel_engine_apply_workarounds(engine); - intel_engine_apply_whitelist(engine); - - intel_mocs_init_engine(engine); - - intel_engine_reset_breadcrumbs(engine); - - if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) { - struct drm_printer p = drm_debug_printer(__func__); - - intel_engine_dump(engine, &p, NULL); - } - - enable_execlists(engine); - - return 0; -} - -static void execlists_reset_prepare(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - unsigned long flags; - - GEM_TRACE("%s: depth<-%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); - - /* - * Prevent request submission to the hardware until we have - * completed the reset in i915_gem_reset_finish(). If a request - * is completed by one engine, it may then queue a request - * to a second via its execlists->tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. - * Turning off the execlists->tasklet until the reset is over - * prevents the race. - */ - __tasklet_disable_sync_once(&execlists->tasklet); - GEM_BUG_ON(!reset_in_progress(execlists)); - - intel_engine_stop_cs(engine); - - /* And flush any current direct submission. */ - spin_lock_irqsave(&engine->timeline.lock, flags); - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -static bool lrc_regs_ok(const struct i915_request *rq) -{ - const struct intel_ring *ring = rq->ring; - const u32 *regs = rq->hw_context->lrc_reg_state; - - /* Quick spot check for the common signs of context corruption */ - - if (regs[CTX_RING_BUFFER_CONTROL + 1] != - (RING_CTL_SIZE(ring->size) | RING_VALID)) - return false; - - if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma)) - return false; - - return true; -} - -static void reset_csb_pointers(struct intel_engine_execlists *execlists) -{ - const unsigned int reset_value = execlists->csb_size - 1; - - /* - * After a reset, the HW starts writing into CSB entry [0]. We - * therefore have to set our HEAD pointer back one entry so that - * the *first* entry we check is entry 0. To complicate this further, - * as we don't wait for the first interrupt after reset, we have to - * fake the HW write to point back to the last entry so that our - * inline comparison of our cached head position against the last HW - * write works even before the first interrupt. - */ - execlists->csb_head = reset_value; - WRITE_ONCE(*execlists->csb_write, reset_value); - wmb(); /* Make sure this is visible to HW (paranoia?) */ - - invalidate_csb_entries(&execlists->csb_status[0], - &execlists->csb_status[reset_value]); -} - -static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct intel_context *ce; - struct i915_request *rq; - u32 *regs; - - process_csb(engine); /* drain preemption events */ - - /* Following the reset, we need to reload the CSB read/write pointers */ - reset_csb_pointers(&engine->execlists); - - /* - * Save the currently executing context, even if we completed - * its request, it was still running at the time of the - * reset and will have been clobbered. - */ - if (!port_isset(execlists->port)) - goto out_clear; - - ce = port_request(execlists->port)->hw_context; - - /* - * Catch up with any missed context-switch interrupts. - * - * Ideally we would just read the remaining CSB entries now that we - * know the gpu is idle. However, the CSB registers are sometimes^W - * often trashed across a GPU reset! Instead we have to rely on - * guessing the missed context-switch events by looking at what - * requests were completed. - */ - execlists_cancel_port_requests(execlists); - - /* Push back any incomplete requests for replay after the reset. */ - rq = __unwind_incomplete_requests(engine); - if (!rq) - goto out_replay; - - if (rq->hw_context != ce) { /* caught just before a CS event */ - rq = NULL; - goto out_replay; - } - - /* - * If this request hasn't started yet, e.g. it is waiting on a - * semaphore, we need to avoid skipping the request or else we - * break the signaling chain. However, if the context is corrupt - * the request will not restart and we will be stuck with a wedged - * device. It is quite often the case that if we issue a reset - * while the GPU is loading the context image, that the context - * image becomes corrupt. - * - * Otherwise, if we have not started yet, the request should replay - * perfectly and we do not need to flag the result as being erroneous. - */ - if (!i915_request_started(rq) && lrc_regs_ok(rq)) - goto out_replay; - - /* - * If the request was innocent, we leave the request in the ELSP - * and will try to replay it on restarting. The context image may - * have been corrupted by the reset, in which case we may have - * to service a new GPU hang, but more likely we can continue on - * without impact. - * - * If the request was guilty, we presume the context is corrupt - * and have to at least restore the RING register in the context - * image back to the expected values to skip over the guilty request. - */ - i915_reset_request(rq, stalled); - if (!stalled && lrc_regs_ok(rq)) - goto out_replay; - - /* - * We want a simple context + ring to execute the breadcrumb update. - * We cannot rely on the context being intact across the GPU hang, - * so clear it and rebuild just what we need for the breadcrumb. - * All pending requests for this context will be zapped, and any - * future request will be after userspace has had the opportunity - * to recreate its own state. - */ - regs = ce->lrc_reg_state; - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring); - - /* Rerun the request; its payload has been neutered (if guilty). */ -out_replay: - ce->ring->head = - rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail; - intel_ring_update_space(ce->ring); - __execlists_update_reg_state(ce, engine); - -out_clear: - execlists_clear_all_active(execlists); -} - -static void execlists_reset(struct intel_engine_cs *engine, bool stalled) -{ - unsigned long flags; - - GEM_TRACE("%s\n", engine->name); - - spin_lock_irqsave(&engine->timeline.lock, flags); - - __execlists_reset(engine, stalled); - - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -static void nop_submission_tasklet(unsigned long data) -{ - /* The driver is wedged; don't process any more events. */ -} - -static void execlists_cancel_requests(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *rq, *rn; - struct rb_node *rb; - unsigned long flags; - - GEM_TRACE("%s\n", engine->name); - - /* - * Before we call engine->cancel_requests(), we should have exclusive - * access to the submission state. This is arranged for us by the - * caller disabling the interrupt generation, the tasklet and other - * threads that may then access the same state, giving us a free hand - * to reset state. However, we still need to let lockdep be aware that - * we know this state may be accessed in hardirq context, so we - * disable the irq around this manipulation and we want to keep - * the spinlock focused on its duties and not accidentally conflate - * coverage to the submission's irq state. (Similarly, although we - * shouldn't need to disable irq around the manipulation of the - * submission's irq state, we also wish to remind ourselves that - * it is irq state.) - */ - spin_lock_irqsave(&engine->timeline.lock, flags); - - __execlists_reset(engine, true); - - /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline.requests, link) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); - - i915_request_mark_complete(rq); - } - - /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { - struct i915_priolist *p = to_priolist(rb); - int i; - - priolist_for_each_request_consume(rq, rn, p, i) { - list_del_init(&rq->sched.link); - __i915_request_submit(rq); - dma_fence_set_error(&rq->fence, -EIO); - i915_request_mark_complete(rq); - } - - rb_erase_cached(&p->node, &execlists->queue); - i915_priolist_free(p); - } - - /* Remaining _unready_ requests will be nop'ed when submitted */ - - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; - GEM_BUG_ON(port_isset(execlists->port)); - - GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); - execlists->tasklet.func = nop_submission_tasklet; - - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -static void execlists_reset_finish(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - - /* - * After a GPU reset, we may have requests to replay. Do so now while - * we still have the forcewake to be sure that the GPU is not allowed - * to sleep before we restart and reload a context. - */ - GEM_BUG_ON(!reset_in_progress(execlists)); - if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) - execlists->tasklet.func(execlists->tasklet.data); - - if (__tasklet_enable(&execlists->tasklet)) - /* And kick in case we missed a new request submission. */ - tasklet_hi_schedule(&execlists->tasklet); - GEM_TRACE("%s: depth->%d\n", engine->name, - atomic_read(&execlists->tasklet.count)); -} - -static int gen8_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* - * WaDisableCtxRestoreArbitration:bdw,chv - * - * We don't need to perform MI_ARB_ENABLE as often as we do (in - * particular all the gen that do not need the w/a at all!), if we - * took care to make sure that on every switch into this context - * (both ordinary and for preemption) that arbitrartion was enabled - * we would be fine. However, for gen8 there is another w/a that - * requires us to not preempt inside GPGPU execution, so we keep - * arbitration disabled for gen8 batches. Arbitration will be - * re-enabled before we close the request - * (engine->emit_fini_breadcrumb). - */ - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - - /* FIXME(BDW+): Address space and security selectors. */ - *cs++ = MI_BATCH_BUFFER_START_GEN8 | - (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen9_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - *cs++ = MI_BATCH_BUFFER_START_GEN8 | - (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - return 0; -} - -static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, - ~(engine->irq_enable_mask | engine->irq_keep_mask)); - ENGINE_POSTING_READ(engine, RING_IMR); -} - -static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); -} - -static int gen8_emit_flush(struct i915_request *request, u32 mode) -{ - u32 cmd, *cs; - - cs = intel_ring_begin(request, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cmd = MI_FLUSH_DW + 1; - - /* We always require a command barrier so that subsequent - * commands, such as breadcrumb interrupts, are strictly ordered - * wrt the contents of the write cache being flushed to memory - * (and thus being coherent from the CPU). - */ - cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - - if (mode & EMIT_INVALIDATE) { - cmd |= MI_INVALIDATE_TLB; - if (request->engine->class == VIDEO_DECODE_CLASS) - cmd |= MI_INVALIDATE_BSD; - } - - *cs++ = cmd; - *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; /* upper addr */ - *cs++ = 0; /* value */ - intel_ring_advance(request, cs); - - return 0; -} - -static int gen8_emit_flush_render(struct i915_request *request, - u32 mode) -{ - struct intel_engine_cs *engine = request->engine; - u32 scratch_addr = - i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES; - bool vf_flush_wa = false, dc_flush_wa = false; - u32 *cs, flags = 0; - int len; - - flags |= PIPE_CONTROL_CS_STALL; - - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; - } - - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; - - /* - * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL - * pipe control. - */ - if (IS_GEN(request->i915, 9)) - vf_flush_wa = true; - - /* WaForGAMHang:kbl */ - if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0)) - dc_flush_wa = true; - } - - len = 6; - - if (vf_flush_wa) - len += 6; - - if (dc_flush_wa) - len += 12; - - cs = intel_ring_begin(request, len); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - if (vf_flush_wa) - cs = gen8_emit_pipe_control(cs, 0, 0); - - if (dc_flush_wa) - cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, - 0); - - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); - - if (dc_flush_wa) - cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); - - intel_ring_advance(request, cs); - - return 0; -} - -/* - * Reserve space for 2 NOOPs at the end of each request to be - * used as a workaround for not being allowed to do lite - * restore with HEAD==TAIL (WaIdleLiteRestore). - */ -static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) -{ - /* Ensure there's always at least one preemption point per-request. */ - *cs++ = MI_ARB_CHECK; - *cs++ = MI_NOOP; - request->wa_tail = intel_ring_offset(request, cs); - - return cs; -} - -static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) -{ - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - request->timeline->hwsp_offset, - 0); - - cs = gen8_emit_ggtt_write(cs, - intel_engine_next_hangcheck_seqno(request->engine), - I915_GEM_HWS_HANGCHECK_ADDR, - MI_FLUSH_DW_STORE_INDEX); - - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - request->tail = intel_ring_offset(request, cs); - assert_ring_tail_valid(request->ring, request->tail); - - return gen8_emit_wa_tail(request, cs); -} - -static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) -{ - cs = gen8_emit_ggtt_write_rcs(cs, - request->fence.seqno, - request->timeline->hwsp_offset, - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_CS_STALL); - - cs = gen8_emit_ggtt_write_rcs(cs, - intel_engine_next_hangcheck_seqno(request->engine), - I915_GEM_HWS_HANGCHECK_ADDR, - PIPE_CONTROL_STORE_DATA_INDEX); - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - request->tail = intel_ring_offset(request, cs); - assert_ring_tail_valid(request->ring, request->tail); - - return gen8_emit_wa_tail(request, cs); -} - -static int gen8_init_rcs_context(struct i915_request *rq) -{ - int ret; - - ret = intel_engine_emit_ctx_wa(rq); - if (ret) - return ret; - - ret = intel_rcs_context_init_mocs(rq); - /* - * Failing to program the MOCS is non-fatal.The system will not - * run at peak performance. So generate an error and carry on. - */ - if (ret) - DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - - return i915_gem_render_state_emit(rq); -} - -/** - * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer - * @engine: Engine Command Streamer. - */ -void intel_logical_ring_cleanup(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv; - - /* - * Tasklet cannot be active at this point due intel_mark_active/idle - * so this is just for documentation. - */ - if (WARN_ON(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state))) - tasklet_kill(&engine->execlists.tasklet); - - dev_priv = engine->i915; - - if (engine->buffer) { - WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); - } - - if (engine->cleanup) - engine->cleanup(engine); - - intel_engine_cleanup_common(engine); - - lrc_destroy_wa_ctx(engine); - - engine->i915 = NULL; - dev_priv->engine[engine->id] = NULL; - kfree(engine); -} - -void intel_execlists_set_default_submission(struct intel_engine_cs *engine) -{ - engine->submit_request = execlists_submit_request; - engine->cancel_requests = execlists_cancel_requests; - engine->schedule = i915_schedule; - engine->execlists.tasklet.func = execlists_submission_tasklet; - - engine->reset.prepare = execlists_reset_prepare; - engine->reset.reset = execlists_reset; - engine->reset.finish = execlists_reset_finish; - - engine->park = NULL; - engine->unpark = NULL; - - engine->flags |= I915_ENGINE_SUPPORTS_STATS; - if (!intel_vgpu_active(engine->i915)) - engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (engine->preempt_context && - HAS_LOGICAL_RING_PREEMPTION(engine->i915)) - engine->flags |= I915_ENGINE_HAS_PREEMPTION; -} - -static void -logical_ring_default_vfuncs(struct intel_engine_cs *engine) -{ - /* Default vfuncs which can be overriden by each engine. */ - engine->init_hw = gen8_init_common_ring; - - engine->reset.prepare = execlists_reset_prepare; - engine->reset.reset = execlists_reset; - engine->reset.finish = execlists_reset_finish; - - engine->cops = &execlists_context_ops; - engine->request_alloc = execlists_request_alloc; - - engine->emit_flush = gen8_emit_flush; - engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; - engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; - - engine->set_default_submission = intel_execlists_set_default_submission; - - if (INTEL_GEN(engine->i915) < 11) { - engine->irq_enable = gen8_logical_ring_enable_irq; - engine->irq_disable = gen8_logical_ring_disable_irq; - } else { - /* - * TODO: On Gen11 interrupt masks need to be clear - * to allow C6 entry. Keep interrupts enabled at - * and take the hit of generating extra interrupts - * until a more refined solution exists. - */ - } - if (IS_GEN(engine->i915, 8)) - engine->emit_bb_start = gen8_emit_bb_start; - else - engine->emit_bb_start = gen9_emit_bb_start; -} - -static inline void -logical_ring_default_irqs(struct intel_engine_cs *engine) -{ - unsigned int shift = 0; - - if (INTEL_GEN(engine->i915) < 11) { - const u8 irq_shifts[] = { - [RCS0] = GEN8_RCS_IRQ_SHIFT, - [BCS0] = GEN8_BCS_IRQ_SHIFT, - [VCS0] = GEN8_VCS0_IRQ_SHIFT, - [VCS1] = GEN8_VCS1_IRQ_SHIFT, - [VECS0] = GEN8_VECS_IRQ_SHIFT, - }; - - shift = irq_shifts[engine->id]; - } - - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; - engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; -} - -static int -logical_ring_setup(struct intel_engine_cs *engine) -{ - int err; - - err = intel_engine_setup_common(engine); - if (err) - return err; - - /* Intentionally left blank. */ - engine->buffer = NULL; - - tasklet_init(&engine->execlists.tasklet, - execlists_submission_tasklet, (unsigned long)engine); - - logical_ring_default_vfuncs(engine); - logical_ring_default_irqs(engine); - - return 0; -} - -static int logical_ring_init(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct intel_engine_execlists * const execlists = &engine->execlists; - u32 base = engine->mmio_base; - int ret; - - ret = intel_engine_init_common(engine); - if (ret) - return ret; - - intel_engine_init_workarounds(engine); - - if (HAS_LOGICAL_RING_ELSQ(i915)) { - execlists->submit_reg = i915->uncore.regs + - i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); - execlists->ctrl_reg = i915->uncore.regs + - i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); - } else { - execlists->submit_reg = i915->uncore.regs + - i915_mmio_reg_offset(RING_ELSP(base)); - } - - execlists->preempt_complete_status = ~0u; - if (engine->preempt_context) - execlists->preempt_complete_status = - upper_32_bits(engine->preempt_context->lrc_desc); - - execlists->csb_status = - &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; - - execlists->csb_write = - &engine->status_page.addr[intel_hws_csb_write_index(i915)]; - - if (INTEL_GEN(engine->i915) < 11) - execlists->csb_size = GEN8_CSB_ENTRIES; - else - execlists->csb_size = GEN11_CSB_ENTRIES; - - reset_csb_pointers(execlists); - - return 0; -} - -int logical_render_ring_init(struct intel_engine_cs *engine) -{ - int ret; - - ret = logical_ring_setup(engine); - if (ret) - return ret; - - /* Override some for render ring. */ - engine->init_context = gen8_init_rcs_context; - engine->emit_flush = gen8_emit_flush_render; - engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; - - ret = logical_ring_init(engine); - if (ret) - return ret; - - ret = intel_init_workaround_bb(engine); - if (ret) { - /* - * We continue even if we fail to initialize WA batch - * because we only expect rare glitches but nothing - * critical to prevent us from using GPU - */ - DRM_ERROR("WA batch buffer initialization failed: %d\n", - ret); - } - - intel_engine_init_whitelist(engine); - - return 0; -} - -int logical_xcs_ring_init(struct intel_engine_cs *engine) -{ - int err; - - err = logical_ring_setup(engine); - if (err) - return err; - - return logical_ring_init(engine); -} - -static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) -{ - u32 indirect_ctx_offset; - - switch (INTEL_GEN(engine->i915)) { - default: - MISSING_CASE(INTEL_GEN(engine->i915)); - /* fall through */ - case 11: - indirect_ctx_offset = - GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 10: - indirect_ctx_offset = - GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 9: - indirect_ctx_offset = - GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 8: - indirect_ctx_offset = - GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - } - - return indirect_ctx_offset; -} - -static void execlists_init_reg_state(u32 *regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - struct i915_hw_ppgtt *ppgtt = ce->gem_context->ppgtt; - bool rcs = engine->class == RENDER_CLASS; - u32 base = engine->mmio_base; - - /* A context is actually a big batch buffer with several - * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The - * values we are setting here are only for the first context restore: - * on a subsequent save, the GPU will recreate this batchbuffer with new - * values (including all the missing MI_LOAD_REGISTER_IMM commands that - * we are not initializing here). - */ - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | - MI_LRI_FORCE_POSTED; - - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)); - if (INTEL_GEN(engine->i915) < 11) { - regs[CTX_CONTEXT_CONTROL + 1] |= - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); - } - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), - RING_CTL_SIZE(ring->size) | RING_VALID); - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); - if (rcs) { - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(base), 0); - if (wa_ctx->indirect_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - regs[CTX_RCS_INDIRECT_CTX + 1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset) | - (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - - regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = - intel_lr_indirect_ctx_offset(engine) << 6; - } - - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); - if (wa_ctx->per_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - regs[CTX_BB_PER_CTX_PTR + 1] = - (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; - } - } - - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; - - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); - /* PDP values well be assigned later if needed */ - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0); - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0); - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0); - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0); - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0); - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0); - - if (i915_vm_is_4lvl(&ppgtt->vm)) { - /* 64b PPGTT (48bit canonical) - * PDP0_DESCRIPTOR contains the base address to PML4 and - * other PDP Descriptors are ignored. - */ - ASSIGN_CTX_PML4(ppgtt, regs); - } else { - ASSIGN_CTX_PDP(ppgtt, regs, 3); - ASSIGN_CTX_PDP(ppgtt, regs, 2); - ASSIGN_CTX_PDP(ppgtt, regs, 1); - ASSIGN_CTX_PDP(ppgtt, regs, 0); - } - - if (rcs) { - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); - - i915_oa_init_reg_state(engine, ce, regs); - } - - regs[CTX_END] = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - regs[CTX_END] |= BIT(0); -} - -static int -populate_lr_context(struct intel_context *ce, - struct drm_i915_gem_object *ctx_obj, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - void *vaddr; - u32 *regs; - int ret; - - vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); - return ret; - } - - if (engine->default_state) { - /* - * We only want to copy over the template context state; - * skipping over the headers reserved for GuC communication, - * leaving those as zero. - */ - const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; - void *defaults; - - defaults = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (IS_ERR(defaults)) { - ret = PTR_ERR(defaults); - goto err_unpin_ctx; - } - - memcpy(vaddr + start, defaults + start, engine->context_size); - i915_gem_object_unpin_map(engine->default_state); - } - - /* The second page of the context object contains some fields which must - * be set up prior to the first execution. */ - regs = vaddr + LRC_STATE_PN * PAGE_SIZE; - execlists_init_reg_state(regs, ce, engine, ring); - if (!engine->default_state) - regs[CTX_CONTEXT_CONTROL + 1] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (ce->gem_context == engine->i915->preempt_context && - INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL + 1] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); - - ret = 0; -err_unpin_ctx: - __i915_gem_object_flush_map(ctx_obj, - LRC_HEADER_PAGES * PAGE_SIZE, - engine->context_size); - i915_gem_object_unpin_map(ctx_obj); - return ret; -} - -static struct i915_timeline *get_timeline(struct i915_gem_context *ctx) -{ - if (ctx->timeline) - return i915_timeline_get(ctx->timeline); - else - return i915_timeline_create(ctx->i915, NULL); -} - -static int execlists_context_deferred_alloc(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - struct drm_i915_gem_object *ctx_obj; - struct i915_vma *vma; - u32 context_size; - struct intel_ring *ring; - struct i915_timeline *timeline; - int ret; - - if (ce->state) - return 0; - - context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); - - /* - * Before the actual start of the context image, we insert a few pages - * for our own use and for sharing with the GuC. - */ - context_size += LRC_HEADER_PAGES * PAGE_SIZE; - - ctx_obj = i915_gem_object_create(engine->i915, context_size); - if (IS_ERR(ctx_obj)) - return PTR_ERR(ctx_obj); - - vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto error_deref_obj; - } - - timeline = get_timeline(ce->gem_context); - if (IS_ERR(timeline)) { - ret = PTR_ERR(timeline); - goto error_deref_obj; - } - - ring = intel_engine_create_ring(engine, - timeline, - ce->gem_context->ring_size); - i915_timeline_put(timeline); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - goto error_deref_obj; - } - - ret = populate_lr_context(ce, ctx_obj, engine, ring); - if (ret) { - DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); - goto error_ring_free; - } - - ce->ring = ring; - ce->state = vma; - - return 0; - -error_ring_free: - intel_ring_put(ring); -error_deref_obj: - i915_gem_object_put(ctx_obj); - return ret; -} - -void intel_execlists_show_requests(struct intel_engine_cs *engine, - struct drm_printer *m, - void (*show_request)(struct drm_printer *m, - struct i915_request *rq, - const char *prefix), - unsigned int max) -{ - const struct intel_engine_execlists *execlists = &engine->execlists; - struct i915_request *rq, *last; - unsigned long flags; - unsigned int count; - struct rb_node *rb; - - spin_lock_irqsave(&engine->timeline.lock, flags); - - last = NULL; - count = 0; - list_for_each_entry(rq, &engine->timeline.requests, link) { - if (count++ < max - 1) - show_request(m, rq, "\t\tE "); - else - last = rq; - } - if (last) { - if (count > max) { - drm_printf(m, - "\t\t...skipping %d executing requests...\n", - count - max); - } - show_request(m, last, "\t\tE "); - } - - last = NULL; - count = 0; - if (execlists->queue_priority_hint != INT_MIN) - drm_printf(m, "\t\tQueue priority hint: %d\n", - execlists->queue_priority_hint); - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - int i; - - priolist_for_each_request(rq, p, i) { - if (count++ < max - 1) - show_request(m, rq, "\t\tQ "); - else - last = rq; - } - } - if (last) { - if (count > max) { - drm_printf(m, - "\t\t...skipping %d queued requests...\n", - count - max); - } - show_request(m, last, "\t\tQ "); - } - - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -void intel_lr_context_reset(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 head, - bool scrub) -{ - /* - * We want a simple context + ring to execute the breadcrumb update. - * We cannot rely on the context being intact across the GPU hang, - * so clear it and rebuild just what we need for the breadcrumb. - * All pending requests for this context will be zapped, and any - * future request will be after userspace has had the opportunity - * to recreate its own state. - */ - if (scrub) { - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring); - } - - /* Rerun the request; its payload has been neutered (if guilty). */ - ce->ring->head = head; - intel_ring_update_space(ce->ring); - - __execlists_update_reg_state(ce, engine); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_lrc.c" -#endif diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h deleted file mode 100644 index 99f75ee9d087..000000000000 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef _INTEL_LRC_H_ -#define _INTEL_LRC_H_ - -#include "intel_ringbuffer.h" -#include "i915_gem_context.h" - -/* Execlists regs */ -#define RING_ELSP(base) _MMIO((base) + 0x230) -#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234) -#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4) -#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244) -#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) -#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) -#define CTX_CTRL_RS_CTX_ENABLE (1 << 1) -#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) -#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) -#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) -#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) - -#define EL_CTRL_LOAD (1 << 0) - -/* The docs specify that the write pointer wraps around after 5h, "After status - * is written out to the last available status QW at offset 5h, this pointer - * wraps to 0." - * - * Therefore, one must infer than even though there are 3 bits available, 6 and - * 7 appear to be * reserved. - */ -#define GEN8_CSB_ENTRIES 6 -#define GEN8_CSB_PTR_MASK 0x7 -#define GEN8_CSB_READ_PTR_MASK (GEN8_CSB_PTR_MASK << 8) -#define GEN8_CSB_WRITE_PTR_MASK (GEN8_CSB_PTR_MASK << 0) - -#define GEN11_CSB_ENTRIES 12 -#define GEN11_CSB_PTR_MASK 0xf -#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8) -#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) - -enum { - INTEL_CONTEXT_SCHEDULE_IN = 0, - INTEL_CONTEXT_SCHEDULE_OUT, - INTEL_CONTEXT_SCHEDULE_PREEMPTED, -}; - -/* Logical Rings */ -void intel_logical_ring_cleanup(struct intel_engine_cs *engine); -int logical_render_ring_init(struct intel_engine_cs *engine); -int logical_xcs_ring_init(struct intel_engine_cs *engine); - -/* Logical Ring Contexts */ - -/* - * We allocate a header at the start of the context image for our own - * use, therefore the actual location of the logical state is offset - * from the start of the VMA. The layout is - * - * | [guc] | [hwsp] [logical state] | - * |<- our header ->|<- context image ->| - * - */ -/* The first page is used for sharing data with the GuC */ -#define LRC_GUCSHR_PN (0) -#define LRC_GUCSHR_SZ (1) -/* At the start of the context image is its per-process HWS page */ -#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ) -#define LRC_PPHWSP_SZ (1) -/* Finally we have the logical state for the context */ -#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) - -/* - * Currently we include the PPHWSP in __intel_engine_context_size() so - * the size of the header is synonymous with the start of the PPHWSP. - */ -#define LRC_HEADER_PAGES LRC_PPHWSP_PN - -struct drm_printer; - -struct drm_i915_private; -struct i915_gem_context; - -void intel_execlists_set_default_submission(struct intel_engine_cs *engine); - -void intel_lr_context_reset(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 head, - bool scrub); - -void intel_execlists_show_requests(struct intel_engine_cs *engine, - struct drm_printer *m, - void (*show_request)(struct drm_printer *m, - struct i915_request *rq, - const char *prefix), - unsigned int max); - -#endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h deleted file mode 100644 index 5ef932d810a7..000000000000 --- a/drivers/gpu/drm/i915/intel_lrc_reg.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#ifndef _INTEL_LRC_REG_H_ -#define _INTEL_LRC_REG_H_ - -#include - -/* GEN8+ Reg State Context */ -#define CTX_LRI_HEADER_0 0x01 -#define CTX_CONTEXT_CONTROL 0x02 -#define CTX_RING_HEAD 0x04 -#define CTX_RING_TAIL 0x06 -#define CTX_RING_BUFFER_START 0x08 -#define CTX_RING_BUFFER_CONTROL 0x0a -#define CTX_BB_HEAD_U 0x0c -#define CTX_BB_HEAD_L 0x0e -#define CTX_BB_STATE 0x10 -#define CTX_SECOND_BB_HEAD_U 0x12 -#define CTX_SECOND_BB_HEAD_L 0x14 -#define CTX_SECOND_BB_STATE 0x16 -#define CTX_BB_PER_CTX_PTR 0x18 -#define CTX_RCS_INDIRECT_CTX 0x1a -#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c -#define CTX_LRI_HEADER_1 0x21 -#define CTX_CTX_TIMESTAMP 0x22 -#define CTX_PDP3_UDW 0x24 -#define CTX_PDP3_LDW 0x26 -#define CTX_PDP2_UDW 0x28 -#define CTX_PDP2_LDW 0x2a -#define CTX_PDP1_UDW 0x2c -#define CTX_PDP1_LDW 0x2e -#define CTX_PDP0_UDW 0x30 -#define CTX_PDP0_LDW 0x32 -#define CTX_LRI_HEADER_2 0x41 -#define CTX_R_PWR_CLK_STATE 0x42 -#define CTX_END 0x44 - -#define CTX_REG(reg_state, pos, reg, val) do { \ - u32 *reg_state__ = (reg_state); \ - const u32 pos__ = (pos); \ - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \ - (reg_state__)[(pos__) + 1] = (val); \ -} while (0) - -#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ - u32 *reg_state__ = (reg_state); \ - const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ - (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \ -} while (0) - -#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ - u32 *reg_state__ = (reg_state); \ - const u64 addr__ = px_dma(&ppgtt->pml4); \ - (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ -} while (0) - -#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 -#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 -#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 -#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A - -#endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c deleted file mode 100644 index 274ba78500c0..000000000000 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ /dev/null @@ -1,564 +0,0 @@ -/* - * Copyright (c) 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "intel_mocs.h" -#include "intel_lrc.h" -#include "intel_ringbuffer.h" - -/* structures required */ -struct drm_i915_mocs_entry { - u32 control_value; - u16 l3cc_value; - u16 used; -}; - -struct drm_i915_mocs_table { - unsigned int size; - unsigned int n_entries; - const struct drm_i915_mocs_entry *table; -}; - -/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ -#define _LE_CACHEABILITY(value) ((value) << 0) -#define _LE_TGT_CACHE(value) ((value) << 2) -#define LE_LRUM(value) ((value) << 4) -#define LE_AOM(value) ((value) << 6) -#define LE_RSC(value) ((value) << 7) -#define LE_SCC(value) ((value) << 8) -#define LE_PFM(value) ((value) << 11) -#define LE_SCF(value) ((value) << 14) -#define LE_COS(value) ((value) << 15) -#define LE_SSE(value) ((value) << 17) - -/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ -#define L3_ESC(value) ((value) << 0) -#define L3_SCC(value) ((value) << 1) -#define _L3_CACHEABILITY(value) ((value) << 4) - -/* Helper defines */ -#define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ -#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ - -/* (e)LLC caching options */ -#define LE_0_PAGETABLE _LE_CACHEABILITY(0) -#define LE_1_UC _LE_CACHEABILITY(1) -#define LE_2_WT _LE_CACHEABILITY(2) -#define LE_3_WB _LE_CACHEABILITY(3) - -/* Target cache */ -#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) -#define LE_TC_1_LLC _LE_TGT_CACHE(1) -#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) -#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) - -/* L3 caching options */ -#define L3_0_DIRECT _L3_CACHEABILITY(0) -#define L3_1_UC _L3_CACHEABILITY(1) -#define L3_2_RESERVED _L3_CACHEABILITY(2) -#define L3_3_WB _L3_CACHEABILITY(3) - -#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ - [__idx] = { \ - .control_value = __control_value, \ - .l3cc_value = __l3cc_value, \ - .used = 1, \ - } - -/* - * MOCS tables - * - * These are the MOCS tables that are programmed across all the rings. - * The control value is programmed to all the rings that support the - * MOCS registers. While the l3cc_values are only programmed to the - * LNCFCMOCS0 - LNCFCMOCS32 registers. - * - * These tables are intended to be kept reasonably consistent across - * HW platforms, and for ICL+, be identical across OSes. To achieve - * that, for Icelake and above, list of entries is published as part - * of bspec. - * - * Entries not part of the following tables are undefined as far as - * userspace is concerned and shouldn't be relied upon. For the time - * being they will be initialized to PTE. - * - * The last two entries are reserved by the hardware. For ICL+ they - * should be initialized according to bspec and never used, for older - * platforms they should never be written to. - * - * NOTE: These tables are part of bspec and defined as part of hardware - * interface for ICL+. For older platforms, they are part of kernel - * ABI. It is expected that, for specific hardware platform, existing - * entries will remain constant and the table will only be updated by - * adding new entries, filling unused positions. - */ -#define GEN9_MOCS_ENTRIES \ - MOCS_ENTRY(I915_MOCS_UNCACHED, \ - LE_1_UC | LE_TC_2_LLC_ELLC, \ - L3_1_UC), \ - MOCS_ENTRY(I915_MOCS_PTE, \ - LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ - L3_3_WB) - -static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - GEN9_MOCS_ENTRIES, - MOCS_ENTRY(I915_MOCS_CACHED, - LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), - L3_3_WB) -}; - -/* NOTE: the LE_TGT_CACHE is not used on Broxton */ -static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - GEN9_MOCS_ENTRIES, - MOCS_ENTRY(I915_MOCS_CACHED, - LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), - L3_3_WB) -}; - -#define GEN11_MOCS_ENTRIES \ - /* Base - Uncached (Deprecated) */ \ - MOCS_ENTRY(I915_MOCS_UNCACHED, \ - LE_1_UC | LE_TC_1_LLC, \ - L3_1_UC), \ - /* Base - L3 + LeCC:PAT (Deprecated) */ \ - MOCS_ENTRY(I915_MOCS_PTE, \ - LE_0_PAGETABLE | LE_TC_1_LLC, \ - L3_3_WB), \ - /* Base - L3 + LLC */ \ - MOCS_ENTRY(2, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ - L3_3_WB), \ - /* Base - Uncached */ \ - MOCS_ENTRY(3, \ - LE_1_UC | LE_TC_1_LLC, \ - L3_1_UC), \ - /* Base - L3 */ \ - MOCS_ENTRY(4, \ - LE_1_UC | LE_TC_1_LLC, \ - L3_3_WB), \ - /* Base - LLC */ \ - MOCS_ENTRY(5, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ - L3_1_UC), \ - /* Age 0 - LLC */ \ - MOCS_ENTRY(6, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ - L3_1_UC), \ - /* Age 0 - L3 + LLC */ \ - MOCS_ENTRY(7, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ - L3_3_WB), \ - /* Age: Don't Chg. - LLC */ \ - MOCS_ENTRY(8, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ - L3_1_UC), \ - /* Age: Don't Chg. - L3 + LLC */ \ - MOCS_ENTRY(9, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ - L3_3_WB), \ - /* No AOM - LLC */ \ - MOCS_ENTRY(10, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ - L3_1_UC), \ - /* No AOM - L3 + LLC */ \ - MOCS_ENTRY(11, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ - L3_3_WB), \ - /* No AOM; Age 0 - LLC */ \ - MOCS_ENTRY(12, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ - L3_1_UC), \ - /* No AOM; Age 0 - L3 + LLC */ \ - MOCS_ENTRY(13, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ - L3_3_WB), \ - /* No AOM; Age:DC - LLC */ \ - MOCS_ENTRY(14, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ - L3_1_UC), \ - /* No AOM; Age:DC - L3 + LLC */ \ - MOCS_ENTRY(15, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ - L3_3_WB), \ - /* Self-Snoop - L3 + LLC */ \ - MOCS_ENTRY(18, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(12.5%) */ \ - MOCS_ENTRY(19, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(25%) */ \ - MOCS_ENTRY(20, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(50%) */ \ - MOCS_ENTRY(21, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(75%) */ \ - MOCS_ENTRY(22, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(87.5%) */ \ - MOCS_ENTRY(23, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ - L3_3_WB), \ - /* HW Reserved - SW program but never use */ \ - MOCS_ENTRY(62, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ - L3_1_UC), \ - /* HW Reserved - SW program but never use */ \ - MOCS_ENTRY(63, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ - L3_1_UC) - -static const struct drm_i915_mocs_entry icelake_mocs_table[] = { - GEN11_MOCS_ENTRIES -}; - -/** - * get_mocs_settings() - * @dev_priv: i915 device. - * @table: Output table that will be made to point at appropriate - * MOCS values for the device. - * - * This function will return the values of the MOCS table that needs to - * be programmed for the platform. It will return the values that need - * to be programmed and if they need to be programmed. - * - * Return: true if there are applicable MOCS settings for the device. - */ -static bool get_mocs_settings(struct drm_i915_private *dev_priv, - struct drm_i915_mocs_table *table) -{ - bool result = false; - - if (INTEL_GEN(dev_priv) >= 11) { - table->size = ARRAY_SIZE(icelake_mocs_table); - table->table = icelake_mocs_table; - table->n_entries = GEN11_NUM_MOCS_ENTRIES; - result = true; - } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { - table->size = ARRAY_SIZE(skylake_mocs_table); - table->n_entries = GEN9_NUM_MOCS_ENTRIES; - table->table = skylake_mocs_table; - result = true; - } else if (IS_GEN9_LP(dev_priv)) { - table->size = ARRAY_SIZE(broxton_mocs_table); - table->n_entries = GEN9_NUM_MOCS_ENTRIES; - table->table = broxton_mocs_table; - result = true; - } else { - WARN_ONCE(INTEL_GEN(dev_priv) >= 9, - "Platform that should have a MOCS table does not.\n"); - } - - /* WaDisableSkipCaching:skl,bxt,kbl,glk */ - if (IS_GEN(dev_priv, 9)) { - int i; - - for (i = 0; i < table->size; i++) - if (WARN_ON(table->table[i].l3cc_value & - (L3_ESC(1) | L3_SCC(0x7)))) - return false; - } - - return result; -} - -static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) -{ - switch (engine_id) { - case RCS0: - return GEN9_GFX_MOCS(index); - case VCS0: - return GEN9_MFX0_MOCS(index); - case BCS0: - return GEN9_BLT_MOCS(index); - case VECS0: - return GEN9_VEBOX_MOCS(index); - case VCS1: - return GEN9_MFX1_MOCS(index); - case VCS2: - return GEN11_MFX2_MOCS(index); - default: - MISSING_CASE(engine_id); - return INVALID_MMIO_REG; - } -} - -/* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. - */ -static u32 get_entry_control(const struct drm_i915_mocs_table *table, - unsigned int index) -{ - if (table->table[index].used) - return table->table[index].control_value; - - return table->table[I915_MOCS_PTE].control_value; -} - -/** - * intel_mocs_init_engine() - emit the mocs control table - * @engine: The engine for whom to emit the registers. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. - */ -void intel_mocs_init_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct drm_i915_mocs_table table; - unsigned int index; - u32 unused_value; - - if (!get_mocs_settings(dev_priv, &table)) - return; - - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].control_value; - - for (index = 0; index < table.size; index++) { - u32 value = get_entry_control(&table, index); - - I915_WRITE(mocs_register(engine->id, index), value); - } - - /* All remaining entries are also unused */ - for (; index < table.n_entries; index++) - I915_WRITE(mocs_register(engine->id, index), unused_value); -} - -/** - * emit_mocs_control_table() - emit the mocs control table - * @rq: Request to set up the MOCS table for. - * @table: The values to program into the control regs. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. - * - * Return: 0 on success, otherwise the error status. - */ -static int emit_mocs_control_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) -{ - enum intel_engine_id engine = rq->engine->id; - unsigned int index; - u32 unused_value; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; - - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].control_value; - - cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); - - for (index = 0; index < table->size; index++) { - u32 value = get_entry_control(table, index); - - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = value; - } - - /* All remaining entries are also unused */ - for (; index < table->n_entries; index++) { - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = unused_value; - } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -/* - * Get l3cc_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. - */ -static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, - unsigned int index) -{ - if (table->table[index].used) - return table->table[index].l3cc_value; - - return table->table[I915_MOCS_PTE].l3cc_value; -} - -static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, - u16 low, - u16 high) -{ - return low | high << 16; -} - -/** - * emit_mocs_l3cc_table() - emit the mocs control table - * @rq: Request to set up the MOCS table for. - * @table: The values to program into the control regs. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. This register set is - * programmed in pairs. - * - * Return: 0 on success, otherwise the error status. - */ -static int emit_mocs_l3cc_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) -{ - u16 unused_value; - unsigned int i; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; - - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].l3cc_value; - - cs = intel_ring_begin(rq, 2 + table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); - - for (i = 0; i < table->size / 2; i++) { - u16 low = get_entry_l3cc(table, 2 * i); - u16 high = get_entry_l3cc(table, 2 * i + 1); - - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, high); - } - - /* Odd table size - 1 left over */ - if (table->size & 0x01) { - u16 low = get_entry_l3cc(table, 2 * i); - - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, unused_value); - i++; - } - - /* All remaining entries are also unused */ - for (; i < table->n_entries / 2; i++) { - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, unused_value, unused_value); - } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -/** - * intel_mocs_init_l3cc_table() - program the mocs control table - * @dev_priv: i915 device private - * - * This function simply programs the mocs registers for the given table - * starting at the given address. This register set is programmed in pairs. - * - * These registers may get programmed more than once, it is simpler to - * re-program 32 registers than maintain the state of when they were programmed. - * We are always reprogramming with the same values and this only on context - * start. - * - * Return: Nothing. - */ -void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) -{ - struct drm_i915_mocs_table table; - unsigned int i; - u16 unused_value; - - if (!get_mocs_settings(dev_priv, &table)) - return; - - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].l3cc_value; - - for (i = 0; i < table.size / 2; i++) { - u16 low = get_entry_l3cc(&table, 2 * i); - u16 high = get_entry_l3cc(&table, 2 * i + 1); - - I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, high)); - } - - /* Odd table size - 1 left over */ - if (table.size & 0x01) { - u16 low = get_entry_l3cc(&table, 2 * i); - - I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, unused_value)); - i++; - } - - /* All remaining entries are also unused */ - for (; i < table.n_entries / 2; i++) - I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, unused_value, unused_value)); -} - -/** - * intel_rcs_context_init_mocs() - program the MOCS register. - * @rq: Request to set up the MOCS tables for. - * - * This function will emit a batch buffer with the values required for - * programming the MOCS register values for all the currently supported - * rings. - * - * These registers are partially stored in the RCS context, so they are - * emitted at the same time so that when a context is created these registers - * are set up. These registers have to be emitted into the start of the - * context as setting the ELSP will re-init some of these registers back - * to the hw values. - * - * Return: 0 on success, otherwise the error status. - */ -int intel_rcs_context_init_mocs(struct i915_request *rq) -{ - struct drm_i915_mocs_table t; - int ret; - - if (get_mocs_settings(rq->i915, &t)) { - /* Program the RCS control registers */ - ret = emit_mocs_control_table(rq, &t); - if (ret) - return ret; - - /* Now program the l3cc registers */ - ret = emit_mocs_l3cc_table(rq, &t); - if (ret) - return ret; - } - - return 0; -} diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/intel_mocs.h deleted file mode 100644 index 3d99d1271b2b..000000000000 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef INTEL_MOCS_H -#define INTEL_MOCS_H - -/** - * DOC: Memory Objects Control State (MOCS) - * - * Motivation: - * In previous Gens the MOCS settings was a value that was set by user land as - * part of the batch. In Gen9 this has changed to be a single table (per ring) - * that all batches now reference by index instead of programming the MOCS - * directly. - * - * The one wrinkle in this is that only PART of the MOCS tables are included - * in context (The GFX_MOCS_0 - GFX_MOCS_64 and the LNCFCMOCS0 - LNCFCMOCS32 - * registers). The rest are not (the settings for the other rings). - * - * This table needs to be set at system start-up because the way the table - * interacts with the contexts and the GmmLib interface. - * - * - * Implementation: - * - * The tables (one per supported platform) are defined in intel_mocs.c - * and are programmed in the first batch after the context is loaded - * (with the hardware workarounds). This will then let the usual - * context handling keep the MOCS in step. - */ - -#include "i915_drv.h" - -int intel_rcs_context_init_mocs(struct i915_request *rq); -void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); -void intel_mocs_init_engine(struct intel_engine_cs *engine); - -#endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c deleted file mode 100644 index 3844581f622c..000000000000 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ /dev/null @@ -1,2339 +0,0 @@ -/* - * Copyright © 2008-2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * Zou Nan hai - * Xiang Hai hao - * - */ - -#include - -#include - -#include "i915_drv.h" -#include "i915_gem_render_state.h" -#include "i915_reset.h" -#include "i915_trace.h" -#include "intel_drv.h" -#include "intel_workarounds.h" - -/* Rough estimate of the typical request size, performing a flush, - * set-context and then emitting the batch. - */ -#define LEGACY_REQUEST_SIZE 200 - -unsigned int intel_ring_update_space(struct intel_ring *ring) -{ - unsigned int space; - - space = __intel_ring_space(ring->head, ring->emit, ring->size); - - ring->space = space; - return space; -} - -static int -gen2_render_ring_flush(struct i915_request *rq, u32 mode) -{ - unsigned int num_store_dw; - u32 cmd, *cs; - - cmd = MI_FLUSH; - num_store_dw = 0; - if (mode & EMIT_INVALIDATE) - cmd |= MI_READ_FLUSH; - if (mode & EMIT_FLUSH) - num_store_dw = 4; - - cs = intel_ring_begin(rq, 2 + 3 * num_store_dw); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = cmd; - while (num_store_dw--) { - *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = i915_scratch_offset(rq->i915); - *cs++ = 0; - } - *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; - - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen4_render_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 cmd, *cs; - int i; - - /* - * read/write caches: - * - * I915_GEM_DOMAIN_RENDER is always invalidated, but is - * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is - * also flushed at 2d versus 3d pipeline switches. - * - * read-only caches: - * - * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if - * MI_READ_FLUSH is set, and is always flushed on 965. - * - * I915_GEM_DOMAIN_COMMAND may not exist? - * - * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is - * invalidated when MI_EXE_FLUSH is set. - * - * I915_GEM_DOMAIN_VERTEX, which exists on 965, is - * invalidated with every MI_FLUSH. - * - * TLBs: - * - * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND - * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and - * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER - * are flushed at any MI_FLUSH. - */ - - cmd = MI_FLUSH; - if (mode & EMIT_INVALIDATE) { - cmd |= MI_EXE_FLUSH; - if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5)) - cmd |= MI_INVALIDATE_ISP; - } - - i = 2; - if (mode & EMIT_INVALIDATE) - i += 20; - - cs = intel_ring_begin(rq, i); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = cmd; - - /* - * A random delay to let the CS invalidate take effect? Without this - * delay, the GPU relocation path fails as the CS does not see - * the updated contents. Just as important, if we apply the flushes - * to the EMIT_FLUSH branch (i.e. immediately after the relocation - * write and before the invalidate on the next batch), the relocations - * still fail. This implies that is a delay following invalidation - * that is required to reset the caches as opposed to a delay to - * ensure the memory is written. - */ - if (mode & EMIT_INVALIDATE) { - *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - *cs++ = 0; - - for (i = 0; i < 12; i++) - *cs++ = MI_FLUSH; - - *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - *cs++ = 0; - } - - *cs++ = cmd; - - intel_ring_advance(rq, cs); - - return 0; -} - -/* - * Emits a PIPE_CONTROL with a non-zero post-sync operation, for - * implementing two workarounds on gen6. From section 1.4.7.1 - * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: - * - * [DevSNB-C+{W/A}] Before any depth stall flush (including those - * produced by non-pipelined state commands), software needs to first - * send a PIPE_CONTROL with no bits set except Post-Sync Operation != - * 0. - * - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable - * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. - * - * And the workaround for these two requires this workaround first: - * - * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent - * BEFORE the pipe-control with a post-sync op and no write-cache - * flushes. - * - * And this last workaround is tricky because of the requirements on - * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM - * volume 2 part 1: - * - * "1 of the following must also be set: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - Notify Enable ([8] of DW1)" - * - * The cache flushes require the workaround flush that triggered this - * one, so we can't use it. Depth stall would trigger the same. - * Post-sync nonzero is what triggered this second workaround, so we - * can't use that one either. Notify enable is IRQs, which aren't - * really our business. That leaves only stall at scoreboard. - */ -static int -gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) -{ - u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; - u32 *cs; - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; - *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; /* low dword */ - *cs++ = 0; /* high dword */ - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - *cs++ = 0; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen6_render_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; - u32 *cs, flags = 0; - int ret; - - /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = gen6_emit_post_sync_nonzero_flush(rq); - if (ret) - return ret; - - /* Just flush everything. Experiments have shown that reducing the - * number of bits based on the write domains has little performance - * impact. - */ - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - /* - * Ensure that any following seqno writes only happen - * when the render cache is indeed flushed. - */ - flags |= PIPE_CONTROL_CS_STALL; - } - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - /* - * TLB invalidate requires a post-sync write. - */ - flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; - } - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = flags; - *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - intel_ring_advance(rq, cs); - - return 0; -} - -static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; - *cs++ = 0; - *cs++ = 0; - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - - /* Finally we can flush and with it emit the breadcrumb */ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = rq->fence.seqno; - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -static int -gen7_render_ring_cs_stall_wa(struct i915_request *rq) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; - *cs++ = 0; - *cs++ = 0; - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen7_render_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; - u32 *cs, flags = 0; - - /* - * Ensure that any following seqno writes only happen when the render - * cache is indeed flushed. - * - * Workaround: 4th PIPE_CONTROL command (except the ones with only - * read-cache invalidate bits set) must have the CS_STALL bit set. We - * don't try to be clever and just set it unconditionally. - */ - flags |= PIPE_CONTROL_CS_STALL; - - /* Just flush everything. Experiments have shown that reducing the - * number of bits based on the write domains has little performance - * impact. - */ - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; - } - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; - /* - * TLB invalidate requires a post-sync write. - */ - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; - - flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; - - /* Workaround: we must issue a pipe_control with CS-stall bit - * set before a pipe_control command that has the state cache - * invalidate bit set. */ - gen7_render_ring_cs_stall_wa(rq); - } - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = flags; - *cs++ = scratch_addr; - *cs++ = 0; - intel_ring_advance(rq, cs); - - return 0; -} - -static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset; - *cs++ = rq->fence.seqno; - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_STORE_DATA_INDEX | - PIPE_CONTROL_GLOBAL_GTT_IVB); - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->fence.seqno; - - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -#define GEN7_XCS_WA 32 -static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - int i; - - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->fence.seqno; - - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - - for (i = 0; i < GEN7_XCS_WA; i++) { - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - } - - *cs++ = MI_FLUSH_DW; - *cs++ = 0; - *cs++ = 0; - - *cs++ = MI_USER_INTERRUPT; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} -#undef GEN7_XCS_WA - -static void set_hwstam(struct intel_engine_cs *engine, u32 mask) -{ - /* - * Keep the render interrupt unmasked as this papers over - * lost interrupts following a reset. - */ - if (engine->class == RENDER_CLASS) { - if (INTEL_GEN(engine->i915) >= 6) - mask &= ~BIT(0); - else - mask &= ~I915_USER_INTERRUPT; - } - - intel_engine_set_hwsp_writemask(engine, mask); -} - -static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) -{ - struct drm_i915_private *dev_priv = engine->i915; - u32 addr; - - addr = lower_32_bits(phys); - if (INTEL_GEN(dev_priv) >= 4) - addr |= (phys >> 28) & 0xf0; - - I915_WRITE(HWS_PGA, addr); -} - -static struct page *status_page(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_object *obj = engine->status_page.vma->obj; - - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - return sg_page(obj->mm.pages->sgl); -} - -static void ring_setup_phys_status_page(struct intel_engine_cs *engine) -{ - set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine)))); - set_hwstam(engine, ~0u); -} - -static void set_hwsp(struct intel_engine_cs *engine, u32 offset) -{ - struct drm_i915_private *dev_priv = engine->i915; - i915_reg_t hwsp; - - /* - * The ring status page addresses are no longer next to the rest of - * the ring registers as of gen7. - */ - if (IS_GEN(dev_priv, 7)) { - switch (engine->id) { - /* - * No more rings exist on Gen7. Default case is only to shut up - * gcc switch check warning. - */ - default: - GEM_BUG_ON(engine->id); - /* fallthrough */ - case RCS0: - hwsp = RENDER_HWS_PGA_GEN7; - break; - case BCS0: - hwsp = BLT_HWS_PGA_GEN7; - break; - case VCS0: - hwsp = BSD_HWS_PGA_GEN7; - break; - case VECS0: - hwsp = VEBOX_HWS_PGA_GEN7; - break; - } - } else if (IS_GEN(dev_priv, 6)) { - hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); - } else { - hwsp = RING_HWS_PGA(engine->mmio_base); - } - - I915_WRITE(hwsp, offset); - POSTING_READ(hwsp); -} - -static void flush_cs_tlb(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - if (!IS_GEN_RANGE(dev_priv, 6, 7)) - return; - - /* ring should be idle before issuing a sync flush*/ - WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); - - ENGINE_WRITE(engine, RING_INSTPM, - _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | - INSTPM_SYNC_FLUSH)); - if (intel_wait_for_register(engine->uncore, - RING_INSTPM(engine->mmio_base), - INSTPM_SYNC_FLUSH, 0, - 1000)) - DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", - engine->name); -} - -static void ring_setup_status_page(struct intel_engine_cs *engine) -{ - set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma)); - set_hwstam(engine, ~0u); - - flush_cs_tlb(engine); -} - -static bool stop_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - if (INTEL_GEN(dev_priv) > 2) { - ENGINE_WRITE(engine, - RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register(engine->uncore, - RING_MI_MODE(engine->mmio_base), - MODE_IDLE, - MODE_IDLE, - 1000)) { - DRM_ERROR("%s : timed out trying to stop ring\n", - engine->name); - - /* - * Sometimes we observe that the idle flag is not - * set even though the ring is empty. So double - * check before giving up. - */ - if (ENGINE_READ(engine, RING_HEAD) != - ENGINE_READ(engine, RING_TAIL)) - return false; - } - } - - ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL)); - - ENGINE_WRITE(engine, RING_HEAD, 0); - ENGINE_WRITE(engine, RING_TAIL, 0); - - /* The ring must be empty before it is disabled */ - ENGINE_WRITE(engine, RING_CTL, 0); - - return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0; -} - -static int init_ring_common(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_ring *ring = engine->buffer; - int ret = 0; - - intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); - - if (!stop_ring(engine)) { - /* G45 ring initialization often fails to reset head to zero */ - DRM_DEBUG_DRIVER("%s head not reset to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_HEAD), - ENGINE_READ(engine, RING_TAIL), - ENGINE_READ(engine, RING_START)); - - if (!stop_ring(engine)) { - DRM_ERROR("failed to set %s head to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_HEAD), - ENGINE_READ(engine, RING_TAIL), - ENGINE_READ(engine, RING_START)); - ret = -EIO; - goto out; - } - } - - if (HWS_NEEDS_PHYSICAL(dev_priv)) - ring_setup_phys_status_page(engine); - else - ring_setup_status_page(engine); - - intel_engine_reset_breadcrumbs(engine); - - /* Enforce ordering by reading HEAD register back */ - ENGINE_READ(engine, RING_HEAD); - - /* Initialize the ring. This must happen _after_ we've cleared the ring - * registers with the above sequence (the readback of the HEAD registers - * also enforces ordering), otherwise the hw might lose the new ring - * register values. */ - ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma)); - - /* WaClearRingBufHeadRegAtInit:ctg,elk */ - if (ENGINE_READ(engine, RING_HEAD)) - DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n", - engine->name, ENGINE_READ(engine, RING_HEAD)); - - /* Check that the ring offsets point within the ring! */ - GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); - GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - intel_ring_update_space(ring); - - /* First wake the ring up to an empty/idle ring */ - ENGINE_WRITE(engine, RING_HEAD, ring->head); - ENGINE_WRITE(engine, RING_TAIL, ring->head); - ENGINE_POSTING_READ(engine, RING_TAIL); - - ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID); - - /* If the head is still not zero, the ring is dead */ - if (intel_wait_for_register(engine->uncore, - RING_CTL(engine->mmio_base), - RING_VALID, RING_VALID, - 50)) { - DRM_ERROR("%s initialization failed " - "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_CTL) & RING_VALID, - ENGINE_READ(engine, RING_HEAD), ring->head, - ENGINE_READ(engine, RING_TAIL), ring->tail, - ENGINE_READ(engine, RING_START), - i915_ggtt_offset(ring->vma)); - ret = -EIO; - goto out; - } - - if (INTEL_GEN(dev_priv) > 2) - ENGINE_WRITE(engine, - RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); - - /* Now awake, let it get started */ - if (ring->tail != ring->head) { - ENGINE_WRITE(engine, RING_TAIL, ring->tail); - ENGINE_POSTING_READ(engine, RING_TAIL); - } - - /* Papering over lost _interrupts_ immediately following the restart */ - intel_engine_queue_breadcrumbs(engine); -out: - intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); - - return ret; -} - -static void reset_prepare(struct intel_engine_cs *engine) -{ - intel_engine_stop_cs(engine); -} - -static void reset_ring(struct intel_engine_cs *engine, bool stalled) -{ - struct i915_timeline *tl = &engine->timeline; - struct i915_request *pos, *rq; - unsigned long flags; - u32 head; - - rq = NULL; - spin_lock_irqsave(&tl->lock, flags); - list_for_each_entry(pos, &tl->requests, link) { - if (!i915_request_completed(pos)) { - rq = pos; - break; - } - } - - /* - * The guilty request will get skipped on a hung engine. - * - * Users of client default contexts do not rely on logical - * state preserved between batches so it is safe to execute - * queued requests following the hang. Non default contexts - * rely on preserved state, so skipping a batch loses the - * evolution of the state and it needs to be considered corrupted. - * Executing more queued batches on top of corrupted state is - * risky. But we take the risk by trying to advance through - * the queued requests in order to make the client behaviour - * more predictable around resets, by not throwing away random - * amount of batches it has prepared for execution. Sophisticated - * clients can use gem_reset_stats_ioctl and dma fence status - * (exported via sync_file info ioctl on explicit fences) to observe - * when it loses the context state and should rebuild accordingly. - * - * The context ban, and ultimately the client ban, mechanism are safety - * valves if client submission ends up resulting in nothing more than - * subsequent hangs. - */ - - if (rq) { - /* - * Try to restore the logical GPU state to match the - * continuation of the request queue. If we skip the - * context/PD restore, then the next request may try to execute - * assuming that its context is valid and loaded on the GPU and - * so may try to access invalid memory, prompting repeated GPU - * hangs. - * - * If the request was guilty, we still restore the logical - * state in case the next request requires it (e.g. the - * aliasing ppgtt), but skip over the hung batch. - * - * If the request was innocent, we try to replay the request - * with the restored context. - */ - i915_reset_request(rq, stalled); - - GEM_BUG_ON(rq->ring != engine->buffer); - head = rq->head; - } else { - head = engine->buffer->tail; - } - engine->buffer->head = intel_ring_wrap(engine->buffer, head); - - spin_unlock_irqrestore(&tl->lock, flags); -} - -static void reset_finish(struct intel_engine_cs *engine) -{ -} - -static int intel_rcs_ctx_init(struct i915_request *rq) -{ - int ret; - - ret = intel_engine_emit_ctx_wa(rq); - if (ret != 0) - return ret; - - ret = i915_gem_render_state_emit(rq); - if (ret) - return ret; - - return 0; -} - -static int init_render_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ - if (IS_GEN_RANGE(dev_priv, 4, 6)) - I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); - - /* We need to disable the AsyncFlip performance optimisations in order - * to use MI_WAIT_FOR_EVENT within the CS. It should already be - * programmed to '1' on all products. - * - * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv - */ - if (IS_GEN_RANGE(dev_priv, 6, 7)) - I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); - - /* Required for the hardware to program scanline values for waiting */ - /* WaEnableFlushTlbInvalidationMode:snb */ - if (IS_GEN(dev_priv, 6)) - I915_WRITE(GFX_MODE, - _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); - - /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ - if (IS_GEN(dev_priv, 7)) - I915_WRITE(GFX_MODE_GEN7, - _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | - _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); - - if (IS_GEN(dev_priv, 6)) { - /* From the Sandybridge PRM, volume 1 part 3, page 24: - * "If this bit is set, STCunit will have LRA as replacement - * policy. [...] This bit must be reset. LRA replacement - * policy is not supported." - */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - } - - if (IS_GEN_RANGE(dev_priv, 6, 7)) - I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - - return init_ring_common(engine); -} - -static void cancel_requests(struct intel_engine_cs *engine) -{ - struct i915_request *request; - unsigned long flags; - - spin_lock_irqsave(&engine->timeline.lock, flags); - - /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline.requests, link) { - if (!i915_request_signaled(request)) - dma_fence_set_error(&request->fence, -EIO); - - i915_request_mark_complete(request); - } - - /* Remaining _unready_ requests will be nop'ed when submitted */ - - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -static void i9xx_submit_request(struct i915_request *request) -{ - i915_request_submit(request); - - ENGINE_WRITE(request->engine, RING_TAIL, - intel_ring_set_tail(request->ring, request->tail)); -} - -static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH; - - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - - *cs++ = MI_USER_INTERRUPT; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -#define GEN5_WA_STORES 8 /* must be at least 1! */ -static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - int i; - - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH; - - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - - BUILD_BUG_ON(GEN5_WA_STORES < 1); - for (i = 0; i < GEN5_WA_STORES; i++) { - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - } - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} -#undef GEN5_WA_STORES - -static void -gen5_irq_enable(struct intel_engine_cs *engine) -{ - gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); -} - -static void -gen5_irq_disable(struct intel_engine_cs *engine) -{ - gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); -} - -static void -i9xx_irq_enable(struct intel_engine_cs *engine) -{ - engine->i915->irq_mask &= ~engine->irq_enable_mask; - intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); - intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); -} - -static void -i9xx_irq_disable(struct intel_engine_cs *engine) -{ - engine->i915->irq_mask |= engine->irq_enable_mask; - intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); -} - -static void -i8xx_irq_enable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - dev_priv->irq_mask &= ~engine->irq_enable_mask; - I915_WRITE16(GEN2_IMR, dev_priv->irq_mask); - POSTING_READ16(RING_IMR(engine->mmio_base)); -} - -static void -i8xx_irq_disable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - dev_priv->irq_mask |= engine->irq_enable_mask; - I915_WRITE16(GEN2_IMR, dev_priv->irq_mask); -} - -static int -bsd_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_FLUSH; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - return 0; -} - -static void -gen6_irq_enable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, - ~(engine->irq_enable_mask | engine->irq_keep_mask)); - - /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ - ENGINE_POSTING_READ(engine, RING_IMR); - - gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); -} - -static void -gen6_irq_disable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); - gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); -} - -static void -hsw_vebox_irq_enable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask); - - /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ - ENGINE_POSTING_READ(engine, RING_IMR); - - gen6_unmask_pm_irq(engine->i915, engine->irq_enable_mask); -} - -static void -hsw_vebox_irq_disable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~0); - gen6_mask_pm_irq(engine->i915, engine->irq_enable_mask); -} - -static int -i965_emit_bb_start(struct i915_request *rq, - u64 offset, u32 length, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & - I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); - *cs++ = offset; - intel_ring_advance(rq, cs); - - return 0; -} - -/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ -#define I830_BATCH_LIMIT SZ_256K -#define I830_TLB_ENTRIES (2) -#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) -static int -i830_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs, cs_offset = i915_scratch_offset(rq->i915); - - GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Evict the invalid PTE TLBs */ - *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; - *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; - *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ - *cs++ = cs_offset; - *cs++ = 0xdeadbeef; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { - if (len > I830_BATCH_LIMIT) - return -ENOSPC; - - cs = intel_ring_begin(rq, 6 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Blit the batch (which has now all relocs applied) to the - * stable batch scratch bo area (so that the CS never - * stumbles over its tlb invalidation bug) ... - */ - *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; - *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; - *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; - *cs++ = cs_offset; - *cs++ = 4096; - *cs++ = offset; - - *cs++ = MI_FLUSH; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - /* ... and execute it. */ - offset = cs_offset; - } - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : - MI_BATCH_NON_SECURE); - intel_ring_advance(rq, cs); - - return 0; -} - -static int -i915_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : - MI_BATCH_NON_SECURE); - intel_ring_advance(rq, cs); - - return 0; -} - -int intel_ring_pin(struct intel_ring *ring) -{ - struct i915_vma *vma = ring->vma; - enum i915_map_type map = i915_coherent_map_type(vma->vm->i915); - unsigned int flags; - void *addr; - int ret; - - GEM_BUG_ON(ring->vaddr); - - ret = i915_timeline_pin(ring->timeline); - if (ret) - return ret; - - flags = PIN_GLOBAL; - - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - - if (vma->obj->stolen) - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; - - ret = i915_vma_pin(vma, 0, 0, flags); - if (unlikely(ret)) - goto unpin_timeline; - - if (i915_vma_is_map_and_fenceable(vma)) - addr = (void __force *)i915_vma_pin_iomap(vma); - else - addr = i915_gem_object_pin_map(vma->obj, map); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto unpin_ring; - } - - vma->obj->pin_global++; - - ring->vaddr = addr; - return 0; - -unpin_ring: - i915_vma_unpin(vma); -unpin_timeline: - i915_timeline_unpin(ring->timeline); - return ret; -} - -void intel_ring_reset(struct intel_ring *ring, u32 tail) -{ - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); - - ring->tail = tail; - ring->head = tail; - ring->emit = tail; - intel_ring_update_space(ring); -} - -void intel_ring_unpin(struct intel_ring *ring) -{ - GEM_BUG_ON(!ring->vma); - GEM_BUG_ON(!ring->vaddr); - - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->tail); - - if (i915_vma_is_map_and_fenceable(ring->vma)) - i915_vma_unpin_iomap(ring->vma); - else - i915_gem_object_unpin_map(ring->vma->obj); - ring->vaddr = NULL; - - ring->vma->obj->pin_global--; - i915_vma_unpin(ring->vma); - - i915_timeline_unpin(ring->timeline); -} - -static struct i915_vma * -intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) -{ - struct i915_address_space *vm = &dev_priv->ggtt.vm; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = i915_gem_object_create_stolen(dev_priv, size); - if (!obj) - obj = i915_gem_object_create_internal(dev_priv, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - /* - * Mark ring buffers as read-only from GPU side (so no stray overwrites) - * if supported by the platform's GGTT. - */ - if (vm->has_read_only) - i915_gem_object_set_readonly(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return vma; -} - -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, - int size) -{ - struct intel_ring *ring; - struct i915_vma *vma; - - GEM_BUG_ON(!is_power_of_2(size)); - GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - GEM_BUG_ON(timeline == &engine->timeline); - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - return ERR_PTR(-ENOMEM); - - kref_init(&ring->ref); - INIT_LIST_HEAD(&ring->request_list); - ring->timeline = i915_timeline_get(timeline); - - ring->size = size; - /* Workaround an erratum on the i830 which causes a hang if - * the TAIL pointer points to within the last 2 cachelines - * of the buffer. - */ - ring->effective_size = size; - if (IS_I830(engine->i915) || IS_I845G(engine->i915)) - ring->effective_size -= 2 * CACHELINE_BYTES; - - intel_ring_update_space(ring); - - vma = intel_ring_create_vma(engine->i915, size); - if (IS_ERR(vma)) { - kfree(ring); - return ERR_CAST(vma); - } - ring->vma = vma; - - return ring; -} - -void intel_ring_free(struct kref *ref) -{ - struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - struct drm_i915_gem_object *obj = ring->vma->obj; - - i915_vma_close(ring->vma); - __i915_gem_object_release_unless_active(obj); - - i915_timeline_put(ring->timeline); - kfree(ring); -} - -static void __ring_context_fini(struct intel_context *ce) -{ - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); - i915_gem_object_put(ce->state->obj); -} - -static void ring_context_destroy(struct kref *ref) -{ - struct intel_context *ce = container_of(ref, typeof(*ce), ref); - - GEM_BUG_ON(intel_context_is_pinned(ce)); - - if (ce->state) - __ring_context_fini(ce); - - intel_context_free(ce); -} - -static int __context_pin_ppgtt(struct i915_gem_context *ctx) -{ - struct i915_hw_ppgtt *ppgtt; - int err = 0; - - ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt; - if (ppgtt) - err = gen6_ppgtt_pin(ppgtt); - - return err; -} - -static void __context_unpin_ppgtt(struct i915_gem_context *ctx) -{ - struct i915_hw_ppgtt *ppgtt; - - ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt; - if (ppgtt) - gen6_ppgtt_unpin(ppgtt); -} - -static int __context_pin(struct intel_context *ce) -{ - struct i915_vma *vma; - int err; - - vma = ce->state; - if (!vma) - return 0; - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (err) - return err; - - /* - * And mark is as a globally pinned object to let the shrinker know - * it cannot reclaim the object until we release it. - */ - vma->obj->pin_global++; - vma->obj->mm.dirty = true; - - return 0; -} - -static void __context_unpin(struct intel_context *ce) -{ - struct i915_vma *vma; - - vma = ce->state; - if (!vma) - return; - - vma->obj->pin_global--; - i915_vma_unpin(vma); -} - -static void ring_context_unpin(struct intel_context *ce) -{ - __context_unpin_ppgtt(ce->gem_context); - __context_unpin(ce); -} - -static struct i915_vma * -alloc_context_vma(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int err; - - obj = i915_gem_object_create(i915, engine->context_size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - /* - * Try to make the context utilize L3 as well as LLC. - * - * On VLV we don't have L3 controls in the PTEs so we - * shouldn't touch the cache level, especially as that - * would make the object snooped which might have a - * negative performance impact. - * - * Snooping is required on non-llc platforms in execlist - * mode, but since all GGTT accesses use PAT entry 0 we - * get snooping anyway regardless of cache_level. - * - * This is only applicable for Ivy Bridge devices since - * later platforms don't have L3 control bits in the PTE. - */ - if (IS_IVYBRIDGE(i915)) - i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); - - if (engine->default_state) { - void *defaults, *vaddr; - - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_obj; - } - - defaults = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (IS_ERR(defaults)) { - err = PTR_ERR(defaults); - goto err_map; - } - - memcpy(vaddr, defaults, engine->context_size); - i915_gem_object_unpin_map(engine->default_state); - - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - } - - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - return vma; - -err_map: - i915_gem_object_unpin_map(obj); -err_obj: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static int ring_context_pin(struct intel_context *ce) -{ - struct intel_engine_cs *engine = ce->engine; - int err; - - /* One ringbuffer to rule them all */ - GEM_BUG_ON(!engine->buffer); - ce->ring = engine->buffer; - - if (!ce->state && engine->context_size) { - struct i915_vma *vma; - - vma = alloc_context_vma(engine); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - ce->state = vma; - } - - err = __context_pin(ce); - if (err) - return err; - - err = __context_pin_ppgtt(ce->gem_context); - if (err) - goto err_unpin; - - return 0; - -err_unpin: - __context_unpin(ce); - return err; -} - -static void ring_context_reset(struct intel_context *ce) -{ - intel_ring_reset(ce->ring, 0); -} - -static const struct intel_context_ops ring_context_ops = { - .pin = ring_context_pin, - .unpin = ring_context_unpin, - - .reset = ring_context_reset, - .destroy = ring_context_destroy, -}; - -static int intel_init_ring_buffer(struct intel_engine_cs *engine) -{ - struct i915_timeline *timeline; - struct intel_ring *ring; - int err; - - err = intel_engine_setup_common(engine); - if (err) - return err; - - timeline = i915_timeline_create(engine->i915, engine->status_page.vma); - if (IS_ERR(timeline)) { - err = PTR_ERR(timeline); - goto err; - } - GEM_BUG_ON(timeline->has_initial_breadcrumb); - - ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); - i915_timeline_put(timeline); - if (IS_ERR(ring)) { - err = PTR_ERR(ring); - goto err; - } - - err = intel_ring_pin(ring); - if (err) - goto err_ring; - - GEM_BUG_ON(engine->buffer); - engine->buffer = ring; - - err = intel_engine_init_common(engine); - if (err) - goto err_unpin; - - GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); - - return 0; - -err_unpin: - intel_ring_unpin(ring); -err_ring: - intel_ring_put(ring); -err: - intel_engine_cleanup_common(engine); - return err; -} - -void intel_engine_cleanup(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WARN_ON(INTEL_GEN(dev_priv) > 2 && - (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); - - intel_ring_unpin(engine->buffer); - intel_ring_put(engine->buffer); - - if (engine->cleanup) - engine->cleanup(engine); - - intel_engine_cleanup_common(engine); - - dev_priv->engine[engine->id] = NULL; - kfree(engine); -} - -static int load_pd_dir(struct i915_request *rq, - const struct i915_hw_ppgtt *ppgtt) -{ - const struct intel_engine_cs * const engine = rq->engine; - u32 *cs; - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); - *cs++ = PP_DIR_DCLV_2G; - - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = ppgtt->pd.base.ggtt_offset << 10; - - intel_ring_advance(rq, cs); - - return 0; -} - -static int flush_pd_dir(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Stall until the page table load is complete */ - *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = i915_scratch_offset(rq->i915); - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - return 0; -} - -static inline int mi_set_context(struct i915_request *rq, u32 flags) -{ - struct drm_i915_private *i915 = rq->i915; - struct intel_engine_cs *engine = rq->engine; - enum intel_engine_id id; - const int num_engines = - IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; - bool force_restore = false; - int len; - u32 *cs; - - flags |= MI_MM_SPACE_GTT; - if (IS_HASWELL(i915)) - /* These flags are for resource streamer on HSW+ */ - flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; - else - flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; - - len = 4; - if (IS_GEN(i915, 7)) - len += 2 + (num_engines ? 4 * num_engines + 6 : 0); - if (flags & MI_FORCE_RESTORE) { - GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); - flags &= ~MI_FORCE_RESTORE; - force_restore = true; - len += 2; - } - - cs = intel_ring_begin(rq, len); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ - if (IS_GEN(i915, 7)) { - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - if (num_engines) { - struct intel_engine_cs *signaller; - - *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { - if (signaller == engine) - continue; - - *cs++ = i915_mmio_reg_offset( - RING_PSMI_CTL(signaller->mmio_base)); - *cs++ = _MASKED_BIT_ENABLE( - GEN6_PSMI_SLEEP_MSG_DISABLE); - } - } - } - - if (force_restore) { - /* - * The HW doesn't handle being told to restore the current - * context very well. Quite often it likes goes to go off and - * sulk, especially when it is meant to be reloading PP_DIR. - * A very simple fix to force the reload is to simply switch - * away from the current context and back again. - * - * Note that the kernel_context will contain random state - * following the INHIBIT_RESTORE. We accept this since we - * never use the kernel_context state; it is merely a - * placeholder we use to flush other contexts. - */ - *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(engine->kernel_context->state) | - MI_MM_SPACE_GTT | - MI_RESTORE_INHIBIT; - } - - *cs++ = MI_NOOP; - *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags; - /* - * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP - * WaMiSetContext_Hang:snb,ivb,vlv - */ - *cs++ = MI_NOOP; - - if (IS_GEN(i915, 7)) { - if (num_engines) { - struct intel_engine_cs *signaller; - i915_reg_t last_reg = {}; /* keep gcc quiet */ - - *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { - if (signaller == engine) - continue; - - last_reg = RING_PSMI_CTL(signaller->mmio_base); - *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = _MASKED_BIT_DISABLE( - GEN6_PSMI_SLEEP_MSG_DISABLE); - } - - /* Insert a delay before the next switch! */ - *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = i915_scratch_offset(rq->i915); - *cs++ = MI_NOOP; - } - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - } - - intel_ring_advance(rq, cs); - - return 0; -} - -static int remap_l3(struct i915_request *rq, int slice) -{ - u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; - int i; - - if (!remap_info) - return 0; - - cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* - * Note: We do not worry about the concurrent register cacheline hang - * here because no other code should access these registers other than - * at initialization time. - */ - *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); - for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); - *cs++ = remap_info[i]; - } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int switch_context(struct i915_request *rq) -{ - struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *ctx = rq->gem_context; - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; - unsigned int unwind_mm = 0; - u32 hw_flags = 0; - int ret, i; - - lockdep_assert_held(&rq->i915->drm.struct_mutex); - GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); - - if (ppgtt) { - int loops; - - /* - * Baytail takes a little more convincing that it really needs - * to reload the PD between contexts. It is not just a little - * longer, as adding more stalls after the load_pd_dir (i.e. - * adding a long loop around flush_pd_dir) is not as effective - * as reloading the PD umpteen times. 32 is derived from - * experimentation (gem_exec_parallel/fds) and has no good - * explanation. - */ - loops = 1; - if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915)) - loops = 32; - - do { - ret = load_pd_dir(rq, ppgtt); - if (ret) - goto err; - } while (--loops); - - if (ppgtt->pd_dirty_engines & engine->mask) { - unwind_mm = engine->mask; - ppgtt->pd_dirty_engines &= ~unwind_mm; - hw_flags = MI_FORCE_RESTORE; - } - } - - if (rq->hw_context->state) { - GEM_BUG_ON(engine->id != RCS0); - - /* - * The kernel context(s) is treated as pure scratch and is not - * expected to retain any state (as we sacrifice it during - * suspend and on resume it may be corrupted). This is ok, - * as nothing actually executes using the kernel context; it - * is purely used for flushing user contexts. - */ - if (i915_gem_context_is_kernel(ctx)) - hw_flags = MI_RESTORE_INHIBIT; - - ret = mi_set_context(rq, hw_flags); - if (ret) - goto err_mm; - } - - if (ppgtt) { - ret = engine->emit_flush(rq, EMIT_INVALIDATE); - if (ret) - goto err_mm; - - ret = flush_pd_dir(rq); - if (ret) - goto err_mm; - - /* - * Not only do we need a full barrier (post-sync write) after - * invalidating the TLBs, but we need to wait a little bit - * longer. Whether this is merely delaying us, or the - * subsequent flush is a key part of serialising with the - * post-sync op, this extra pass appears vital before a - * mm switch! - */ - ret = engine->emit_flush(rq, EMIT_INVALIDATE); - if (ret) - goto err_mm; - - ret = engine->emit_flush(rq, EMIT_FLUSH); - if (ret) - goto err_mm; - } - - if (ctx->remap_slice) { - for (i = 0; i < MAX_L3_SLICES; i++) { - if (!(ctx->remap_slice & BIT(i))) - continue; - - ret = remap_l3(rq, i); - if (ret) - goto err_mm; - } - - ctx->remap_slice = 0; - } - - return 0; - -err_mm: - if (unwind_mm) - ppgtt->pd_dirty_engines |= unwind_mm; -err: - return ret; -} - -static int ring_request_alloc(struct i915_request *request) -{ - int ret; - - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); - GEM_BUG_ON(request->timeline->has_initial_breadcrumb); - - /* - * Flush enough space to reduce the likelihood of waiting after - * we start building the request - in which case we will just - * have to repeat work. - */ - request->reserved_space += LEGACY_REQUEST_SIZE; - - ret = switch_context(request); - if (ret) - return ret; - - /* Unconditionally invalidate GPU caches and TLBs. */ - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); - if (ret) - return ret; - - request->reserved_space -= LEGACY_REQUEST_SIZE; - return 0; -} - -static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) -{ - struct i915_request *target; - long timeout; - - lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); - - if (intel_ring_update_space(ring) >= bytes) - return 0; - - GEM_BUG_ON(list_empty(&ring->request_list)); - list_for_each_entry(target, &ring->request_list, ring_link) { - /* Would completion of this request free enough space? */ - if (bytes <= __intel_ring_space(target->postfix, - ring->emit, ring->size)) - break; - } - - if (WARN_ON(&target->ring_link == &ring->request_list)) - return -ENOSPC; - - timeout = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) - return timeout; - - i915_request_retire_upto(target); - - intel_ring_update_space(ring); - GEM_BUG_ON(ring->space < bytes); - return 0; -} - -u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) -{ - struct intel_ring *ring = rq->ring; - const unsigned int remain_usable = ring->effective_size - ring->emit; - const unsigned int bytes = num_dwords * sizeof(u32); - unsigned int need_wrap = 0; - unsigned int total_bytes; - u32 *cs; - - /* Packets must be qword aligned. */ - GEM_BUG_ON(num_dwords & 1); - - total_bytes = bytes + rq->reserved_space; - GEM_BUG_ON(total_bytes > ring->effective_size); - - if (unlikely(total_bytes > remain_usable)) { - const int remain_actual = ring->size - ring->emit; - - if (bytes > remain_usable) { - /* - * Not enough space for the basic request. So need to - * flush out the remainder and then wait for - * base + reserved. - */ - total_bytes += remain_actual; - need_wrap = remain_actual | 1; - } else { - /* - * The base request will fit but the reserved space - * falls off the end. So we don't need an immediate - * wrap and only need to effectively wait for the - * reserved size from the start of ringbuffer. - */ - total_bytes = rq->reserved_space + remain_actual; - } - } - - if (unlikely(total_bytes > ring->space)) { - int ret; - - /* - * Space is reserved in the ringbuffer for finalising the - * request, as that cannot be allowed to fail. During request - * finalisation, reserved_space is set to 0 to stop the - * overallocation and the assumption is that then we never need - * to wait (which has the risk of failing with EINTR). - * - * See also i915_request_alloc() and i915_request_add(). - */ - GEM_BUG_ON(!rq->reserved_space); - - ret = wait_for_space(ring, total_bytes); - if (unlikely(ret)) - return ERR_PTR(ret); - } - - if (unlikely(need_wrap)) { - need_wrap &= ~1; - GEM_BUG_ON(need_wrap > ring->space); - GEM_BUG_ON(ring->emit + need_wrap > ring->size); - GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); - - /* Fill the tail with MI_NOOP */ - memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); - ring->space -= need_wrap; - ring->emit = 0; - } - - GEM_BUG_ON(ring->emit > ring->size - bytes); - GEM_BUG_ON(ring->space < bytes); - cs = ring->vaddr + ring->emit; - GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); - ring->emit += bytes; - ring->space -= bytes; - - return cs; -} - -/* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct i915_request *rq) -{ - int num_dwords; - void *cs; - - num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - if (num_dwords == 0) - return 0; - - num_dwords = CACHELINE_DWORDS - num_dwords; - GEM_BUG_ON(num_dwords & 1); - - cs = intel_ring_begin(rq, num_dwords); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); - intel_ring_advance(rq, cs); - - GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); - return 0; -} - -static void gen6_bsd_submit_request(struct i915_request *request) -{ - struct intel_uncore *uncore = request->engine->uncore; - - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - /* Every tail move must follow the sequence below */ - - /* Disable notification that the ring is IDLE. The GT - * will then assume that it is busy and bring it out of rc6. - */ - intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL, - _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); - - /* Clear the context id. Here be magic! */ - intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0); - - /* Wait for the ring not to be idle, i.e. for it to wake up. */ - if (__intel_wait_for_register_fw(uncore, - GEN6_BSD_SLEEP_PSMI_CONTROL, - GEN6_BSD_SLEEP_INDICATOR, - 0, - 1000, 0, NULL)) - DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); - - /* Now that the ring is fully powered up, update the tail */ - i9xx_submit_request(request); - - /* Let the ring send IDLE messages to the GT again, - * and so let it sleep to conserve power when idle. - */ - intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL, - _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); - - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); -} - -static int mi_flush_dw(struct i915_request *rq, u32 flags) -{ - u32 cmd, *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cmd = MI_FLUSH_DW; - - /* - * We always require a command barrier so that subsequent - * commands, such as breadcrumb interrupts, are strictly ordered - * wrt the contents of the write cache being flushed to memory - * (and thus being coherent from the CPU). - */ - cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - - /* - * Bspec vol 1c.3 - blitter engine command streamer: - * "If ENABLED, all TLBs will be invalidated once the flush - * operation is complete. This bit is only valid when the - * Post-Sync Operation field is a value of 1h or 3h." - */ - cmd |= flags; - - *cs++ = cmd; - *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags) -{ - return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0); -} - -static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) -{ - return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD); -} - -static int -hsw_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW); - /* bit0-7 is the length on GEN6+ */ - *cs++ = offset; - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen6_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965); - /* bit0-7 is the length on GEN6+ */ - *cs++ = offset; - intel_ring_advance(rq, cs); - - return 0; -} - -/* Blitter support (SandyBridge+) */ - -static int gen6_ring_flush(struct i915_request *rq, u32 mode) -{ - return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); -} - -static void intel_ring_init_irq(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - if (INTEL_GEN(dev_priv) >= 6) { - engine->irq_enable = gen6_irq_enable; - engine->irq_disable = gen6_irq_disable; - } else if (INTEL_GEN(dev_priv) >= 5) { - engine->irq_enable = gen5_irq_enable; - engine->irq_disable = gen5_irq_disable; - } else if (INTEL_GEN(dev_priv) >= 3) { - engine->irq_enable = i9xx_irq_enable; - engine->irq_disable = i9xx_irq_disable; - } else { - engine->irq_enable = i8xx_irq_enable; - engine->irq_disable = i8xx_irq_disable; - } -} - -static void i9xx_set_default_submission(struct intel_engine_cs *engine) -{ - engine->submit_request = i9xx_submit_request; - engine->cancel_requests = cancel_requests; - - engine->park = NULL; - engine->unpark = NULL; -} - -static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) -{ - i9xx_set_default_submission(engine); - engine->submit_request = gen6_bsd_submit_request; -} - -static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - /* gen8+ are only supported with execlists */ - GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); - - intel_ring_init_irq(dev_priv, engine); - - engine->init_hw = init_ring_common; - engine->reset.prepare = reset_prepare; - engine->reset.reset = reset_ring; - engine->reset.finish = reset_finish; - - engine->cops = &ring_context_ops; - engine->request_alloc = ring_request_alloc; - - /* - * Using a global execution timeline; the previous final breadcrumb is - * equivalent to our next initial bread so we can elide - * engine->emit_init_breadcrumb(). - */ - engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; - if (IS_GEN(dev_priv, 5)) - engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; - - engine->set_default_submission = i9xx_set_default_submission; - - if (INTEL_GEN(dev_priv) >= 6) - engine->emit_bb_start = gen6_emit_bb_start; - else if (INTEL_GEN(dev_priv) >= 4) - engine->emit_bb_start = i965_emit_bb_start; - else if (IS_I830(dev_priv) || IS_I845G(dev_priv)) - engine->emit_bb_start = i830_emit_bb_start; - else - engine->emit_bb_start = i915_emit_bb_start; -} - -int intel_init_render_ring_buffer(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - intel_ring_default_vfuncs(dev_priv, engine); - - if (HAS_L3_DPF(dev_priv)) - engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; - - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - - if (INTEL_GEN(dev_priv) >= 7) { - engine->init_context = intel_rcs_ctx_init; - engine->emit_flush = gen7_render_ring_flush; - engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; - } else if (IS_GEN(dev_priv, 6)) { - engine->init_context = intel_rcs_ctx_init; - engine->emit_flush = gen6_render_ring_flush; - engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; - } else if (IS_GEN(dev_priv, 5)) { - engine->emit_flush = gen4_render_ring_flush; - } else { - if (INTEL_GEN(dev_priv) < 4) - engine->emit_flush = gen2_render_ring_flush; - else - engine->emit_flush = gen4_render_ring_flush; - engine->irq_enable_mask = I915_USER_INTERRUPT; - } - - if (IS_HASWELL(dev_priv)) - engine->emit_bb_start = hsw_emit_bb_start; - - engine->init_hw = init_render_ring; - - ret = intel_init_ring_buffer(engine); - if (ret) - return ret; - - return 0; -} - -int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - intel_ring_default_vfuncs(dev_priv, engine); - - if (INTEL_GEN(dev_priv) >= 6) { - /* gen6 bsd needs a special wa for tail updates */ - if (IS_GEN(dev_priv, 6)) - engine->set_default_submission = gen6_bsd_set_default_submission; - engine->emit_flush = gen6_bsd_ring_flush; - engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - - if (IS_GEN(dev_priv, 6)) - engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; - else - engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; - } else { - engine->emit_flush = bsd_ring_flush; - if (IS_GEN(dev_priv, 5)) - engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; - else - engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; - } - - return intel_init_ring_buffer(engine); -} - -int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); - - intel_ring_default_vfuncs(dev_priv, engine); - - engine->emit_flush = gen6_ring_flush; - engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - - if (IS_GEN(dev_priv, 6)) - engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; - else - engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; - - return intel_init_ring_buffer(engine); -} - -int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - GEM_BUG_ON(INTEL_GEN(dev_priv) < 7); - - intel_ring_default_vfuncs(dev_priv, engine); - - engine->emit_flush = gen6_ring_flush; - engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; - engine->irq_enable = hsw_vebox_irq_enable; - engine->irq_disable = hsw_vebox_irq_disable; - - engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; - - return intel_init_ring_buffer(engine); -} diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h deleted file mode 100644 index 72c7c337ace9..000000000000 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ /dev/null @@ -1,583 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -#ifndef _INTEL_RINGBUFFER_H_ -#define _INTEL_RINGBUFFER_H_ - -#include - -#include -#include -#include -#include - -#include "i915_gem_batch_pool.h" -#include "i915_pmu.h" -#include "i915_reg.h" -#include "i915_request.h" -#include "i915_selftest.h" -#include "i915_timeline.h" -#include "intel_engine_types.h" -#include "intel_gpu_commands.h" -#include "intel_workarounds.h" - -struct drm_printer; - -/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, - * but keeps the logic simple. Indeed, the whole purpose of this macro is just - * to give some inclination as to some of the magic values used in the various - * workarounds! - */ -#define CACHELINE_BYTES 64 -#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) - -/* - * The register defines to be used with the following macros need to accept a - * base param, e.g: - * - * REG_FOO(base) _MMIO((base) + ) - * ENGINE_READ(engine, REG_FOO); - * - * register arrays are to be defined and accessed as follows: - * - * REG_BAR(base, i) _MMIO((base) + + (i) * ) - * ENGINE_READ_IDX(engine, REG_BAR, i) - */ - -#define __ENGINE_REG_OP(op__, engine__, ...) \ - intel_uncore_##op__((engine__)->uncore, __VA_ARGS__) - -#define __ENGINE_READ_OP(op__, engine__, reg__) \ - __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base)) - -#define ENGINE_READ16(...) __ENGINE_READ_OP(read16, __VA_ARGS__) -#define ENGINE_READ(...) __ENGINE_READ_OP(read, __VA_ARGS__) -#define ENGINE_READ_FW(...) __ENGINE_READ_OP(read_fw, __VA_ARGS__) -#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__) - -#define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \ - __ENGINE_REG_OP(read64_2x32, (engine__), \ - lower_reg__((engine__)->mmio_base), \ - upper_reg__((engine__)->mmio_base)) - -#define ENGINE_READ_IDX(engine__, reg__, idx__) \ - __ENGINE_REG_OP(read, (engine__), reg__((engine__)->mmio_base, (idx__))) - -#define __ENGINE_WRITE_OP(op__, engine__, reg__, val__) \ - __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base), (val__)) - -#define ENGINE_WRITE16(...) __ENGINE_WRITE_OP(write16, __VA_ARGS__) -#define ENGINE_WRITE(...) __ENGINE_WRITE_OP(write, __VA_ARGS__) -#define ENGINE_WRITE_FW(...) __ENGINE_WRITE_OP(write_fw, __VA_ARGS__) - -/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to - * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. - */ -enum intel_engine_hangcheck_action { - ENGINE_IDLE = 0, - ENGINE_WAIT, - ENGINE_ACTIVE_SEQNO, - ENGINE_ACTIVE_HEAD, - ENGINE_ACTIVE_SUBUNITS, - ENGINE_WAIT_KICK, - ENGINE_DEAD, -}; - -static inline const char * -hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) -{ - switch (a) { - case ENGINE_IDLE: - return "idle"; - case ENGINE_WAIT: - return "wait"; - case ENGINE_ACTIVE_SEQNO: - return "active seqno"; - case ENGINE_ACTIVE_HEAD: - return "active head"; - case ENGINE_ACTIVE_SUBUNITS: - return "active subunits"; - case ENGINE_WAIT_KICK: - return "wait kick"; - case ENGINE_DEAD: - return "dead"; - } - - return "unknown"; -} - -void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); - -static inline bool __execlists_need_preempt(int prio, int last) -{ - /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. - * - * More naturally we would write - * prio >= max(0, last); - * except that we wish to prevent triggering preemption at the same - * priority level: the task that is running should remain running - * to preserve FIFO ordering of dependencies. - */ - return prio > max(I915_PRIORITY_NORMAL - 1, last); -} - -static inline void -execlists_set_active(struct intel_engine_execlists *execlists, - unsigned int bit) -{ - __set_bit(bit, (unsigned long *)&execlists->active); -} - -static inline bool -execlists_set_active_once(struct intel_engine_execlists *execlists, - unsigned int bit) -{ - return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); -} - -static inline void -execlists_clear_active(struct intel_engine_execlists *execlists, - unsigned int bit) -{ - __clear_bit(bit, (unsigned long *)&execlists->active); -} - -static inline void -execlists_clear_all_active(struct intel_engine_execlists *execlists) -{ - execlists->active = 0; -} - -static inline bool -execlists_is_active(const struct intel_engine_execlists *execlists, - unsigned int bit) -{ - return test_bit(bit, (unsigned long *)&execlists->active); -} - -void execlists_user_begin(struct intel_engine_execlists *execlists, - const struct execlist_port *port); -void execlists_user_end(struct intel_engine_execlists *execlists); - -void -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); - -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); - -static inline unsigned int -execlists_num_ports(const struct intel_engine_execlists * const execlists) -{ - return execlists->port_mask + 1; -} - -static inline struct execlist_port * -execlists_port_complete(struct intel_engine_execlists * const execlists, - struct execlist_port * const port) -{ - const unsigned int m = execlists->port_mask; - - GEM_BUG_ON(port_index(port, execlists) != 0); - GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - - memmove(port, port + 1, m * sizeof(struct execlist_port)); - memset(port + m, 0, sizeof(struct execlist_port)); - - return port; -} - -static inline u32 -intel_read_status_page(const struct intel_engine_cs *engine, int reg) -{ - /* Ensure that the compiler doesn't optimize away the load. */ - return READ_ONCE(engine->status_page.addr[reg]); -} - -static inline void -intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) -{ - /* Writing into the status page should be done sparingly. Since - * we do when we are uncertain of the device state, we take a bit - * of extra paranoia to try and ensure that the HWS takes the value - * we give and that it doesn't end up trapped inside the CPU! - */ - if (static_cpu_has(X86_FEATURE_CLFLUSH)) { - mb(); - clflush(&engine->status_page.addr[reg]); - engine->status_page.addr[reg] = value; - clflush(&engine->status_page.addr[reg]); - mb(); - } else { - WRITE_ONCE(engine->status_page.addr[reg], value); - } -} - -/* - * Reads a dword out of the status page, which is written to from the command - * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or - * MI_STORE_DATA_IMM. - * - * The following dwords have a reserved meaning: - * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. - * 0x04: ring 0 head pointer - * 0x05: ring 1 head pointer (915-class) - * 0x06: ring 2 head pointer (915-class) - * 0x10-0x1b: Context status DWords (GM45) - * 0x1f: Last written status offset. (GM45) - * 0x20-0x2f: Reserved (Gen6+) - * - * The area from dword 0x30 to 0x3ff is available for driver usage. - */ -#define I915_GEM_HWS_PREEMPT 0x32 -#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) -#define I915_GEM_HWS_HANGCHECK 0x34 -#define I915_GEM_HWS_HANGCHECK_ADDR (I915_GEM_HWS_HANGCHECK * sizeof(u32)) -#define I915_GEM_HWS_SEQNO 0x40 -#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) -#define I915_GEM_HWS_SCRATCH 0x80 -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) - -#define I915_HWS_CSB_BUF0_INDEX 0x10 -#define I915_HWS_CSB_WRITE_INDEX 0x1f -#define CNL_HWS_CSB_WRITE_INDEX 0x2f - -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, - int size); -int intel_ring_pin(struct intel_ring *ring); -void intel_ring_reset(struct intel_ring *ring, u32 tail); -unsigned int intel_ring_update_space(struct intel_ring *ring); -void intel_ring_unpin(struct intel_ring *ring); -void intel_ring_free(struct kref *ref); - -static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) -{ - kref_get(&ring->ref); - return ring; -} - -static inline void intel_ring_put(struct intel_ring *ring) -{ - kref_put(&ring->ref, intel_ring_free); -} - -void intel_engine_stop(struct intel_engine_cs *engine); -void intel_engine_cleanup(struct intel_engine_cs *engine); - -int __must_check intel_ring_cacheline_align(struct i915_request *rq); - -u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); - -static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) -{ - /* Dummy function. - * - * This serves as a placeholder in the code so that the reader - * can compare against the preceding intel_ring_begin() and - * check that the number of dwords emitted matches the space - * reserved for the command packet (i.e. the value passed to - * intel_ring_begin()). - */ - GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); -} - -static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) -{ - return pos & (ring->size - 1); -} - -static inline bool -intel_ring_offset_valid(const struct intel_ring *ring, - unsigned int pos) -{ - if (pos & -ring->size) /* must be strictly within the ring */ - return false; - - if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ - return false; - - return true; -} - -static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) -{ - /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - rq->ring->vaddr; - GEM_BUG_ON(offset > rq->ring->size); - return intel_ring_wrap(rq->ring, offset); -} - -static inline void -assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) -{ - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); - - /* - * "Ring Buffer Use" - * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 - * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 - * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - * - * We use ring->head as the last known location of the actual RING_HEAD, - * it may have advanced but in the worst case it is equally the same - * as ring->head and so we should never program RING_TAIL to advance - * into the same cacheline as ring->head. - */ -#define cacheline(a) round_down(a, CACHELINE_BYTES) - GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && - tail < ring->head); -#undef cacheline -} - -static inline unsigned int -intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) -{ - /* Whilst writes to the tail are strictly order, there is no - * serialisation between readers and the writers. The tail may be - * read by i915_request_retire() just as it is being updated - * by execlists, as although the breadcrumb is complete, the context - * switch hasn't been seen. - */ - assert_ring_tail_valid(ring, tail); - ring->tail = tail; - return tail; -} - -static inline unsigned int -__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) -{ - /* - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - */ - GEM_BUG_ON(!is_power_of_2(size)); - return (head - tail - CACHELINE_BYTES) & (size - 1); -} - -int intel_engine_setup_common(struct intel_engine_cs *engine); -int intel_engine_init_common(struct intel_engine_cs *engine); -void intel_engine_cleanup_common(struct intel_engine_cs *engine); - -int intel_init_render_ring_buffer(struct intel_engine_cs *engine); -int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); -int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); -int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); - -int intel_engine_stop_cs(struct intel_engine_cs *engine); -void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); - -void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); - -u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); -u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); - -void intel_engine_get_instdone(struct intel_engine_cs *engine, - struct intel_instdone *instdone); - -void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); - -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); - -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); - -static inline void -intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) -{ - irq_work_queue(&engine->breadcrumbs.irq_work); -} - -void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); - -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); - -void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, - struct drm_printer *p); - -static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) -{ - memset(batch, 0, 6 * sizeof(u32)); - - batch[0] = GFX_OP_PIPE_CONTROL(6); - batch[1] = flags; - batch[2] = offset; - - return batch + 6; -} - -static inline u32 * -gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) -{ - /* We're using qword write, offset should be aligned to 8 bytes. */ - GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - - /* w/a for post sync ops following a GPGPU operation we - * need a prior CS_STALL, which is emitted by the flush - * following the batch. - */ - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; - *cs++ = gtt_offset; - *cs++ = 0; - *cs++ = value; - /* We're thrashing one dword of HWS. */ - *cs++ = 0; - - return cs; -} - -static inline u32 * -gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) -{ - /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ - GEM_BUG_ON(gtt_offset & (1 << 5)); - /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ - GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags; - *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; - *cs++ = value; - - return cs; -} - -static inline void intel_engine_reset(struct intel_engine_cs *engine, - bool stalled) -{ - if (engine->reset.reset) - engine->reset.reset(engine, stalled); -} - -void intel_engines_sanitize(struct drm_i915_private *i915, bool force); -void intel_gt_resume(struct drm_i915_private *i915); - -bool intel_engine_is_idle(struct intel_engine_cs *engine); -bool intel_engines_are_idle(struct drm_i915_private *dev_priv); - -void intel_engine_lost_context(struct intel_engine_cs *engine); - -void intel_engines_park(struct drm_i915_private *i915); -void intel_engines_unpark(struct drm_i915_private *i915); - -void intel_engines_reset_default_submission(struct drm_i915_private *i915); -unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); - -bool intel_engine_can_store_dword(struct intel_engine_cs *engine); - -__printf(3, 4) -void intel_engine_dump(struct intel_engine_cs *engine, - struct drm_printer *m, - const char *header, ...); - -struct intel_engine_cs * -intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); - -static inline void intel_engine_context_in(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - if (engine->stats.active++ == 0) - engine->stats.start = ktime_get(); - GEM_BUG_ON(engine->stats.active == 0); - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static inline void intel_engine_context_out(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - ktime_t last; - - if (engine->stats.active && --engine->stats.active == 0) { - /* - * Decrement the active context count and in case GPU - * is now idle add up to the running total. - */ - last = ktime_sub(ktime_get(), engine->stats.start); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } else if (engine->stats.active == 0) { - /* - * After turning on engine stats, context out might be - * the first event in which case we account from the - * time stats gathering was turned on. - */ - last = ktime_sub(ktime_get(), engine->stats.enabled_at); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -int intel_enable_engine_stats(struct intel_engine_cs *engine); -void intel_disable_engine_stats(struct intel_engine_cs *engine); - -ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); - -struct i915_request * -intel_engine_find_active_request(struct intel_engine_cs *engine); - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) - -static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) -{ - if (!execlists->preempt_hang.inject_hang) - return false; - - complete(&execlists->preempt_hang.completion); - return true; -} - -#else - -static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) -{ - return false; -} - -#endif - -static inline u32 -intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine) -{ - return engine->hangcheck.next_seqno = - next_pseudo_random32(engine->hangcheck.next_seqno); -} - -static inline u32 -intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine) -{ - return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK); -} - -#endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/intel_sseu.c deleted file mode 100644 index 7f448f3bea0b..000000000000 --- a/drivers/gpu/drm/i915/intel_sseu.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "intel_lrc_reg.h" -#include "intel_sseu.h" - -u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, - const struct intel_sseu *req_sseu) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - bool subslice_pg = sseu->has_subslice_pg; - struct intel_sseu ctx_sseu; - u8 slices, subslices; - u32 rpcs = 0; - - /* - * No explicit RPCS request is needed to ensure full - * slice/subslice/EU enablement prior to Gen9. - */ - if (INTEL_GEN(i915) < 9) - return 0; - - /* - * If i915/perf is active, we want a stable powergating configuration - * on the system. - * - * We could choose full enablement, but on ICL we know there are use - * cases which disable slices for functional, apart for performance - * reasons. So in this case we select a known stable subset. - */ - if (!i915->perf.oa.exclusive_stream) { - ctx_sseu = *req_sseu; - } else { - ctx_sseu = intel_sseu_from_device_info(sseu); - - if (IS_GEN(i915, 11)) { - /* - * We only need subslice count so it doesn't matter - * which ones we select - just turn off low bits in the - * amount of half of all available subslices per slice. - */ - ctx_sseu.subslice_mask = - ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); - ctx_sseu.slice_mask = 0x1; - } - } - - slices = hweight8(ctx_sseu.slice_mask); - subslices = hweight8(ctx_sseu.subslice_mask); - - /* - * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits - * wide and Icelake has up to eight subslices, specfial programming is - * needed in order to correctly enable all subslices. - * - * According to documentation software must consider the configuration - * as 2x4x8 and hardware will translate this to 1x8x8. - * - * Furthemore, even though SScount is three bits, maximum documented - * value for it is four. From this some rules/restrictions follow: - * - * 1. - * If enabled subslice count is greater than four, two whole slices must - * be enabled instead. - * - * 2. - * When more than one slice is enabled, hardware ignores the subslice - * count altogether. - * - * From these restrictions it follows that it is not possible to enable - * a count of subslices between the SScount maximum of four restriction, - * and the maximum available number on a particular SKU. Either all - * subslices are enabled, or a count between one and four on the first - * slice. - */ - if (IS_GEN(i915, 11) && - slices == 1 && - subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { - GEM_BUG_ON(subslices & 1); - - subslice_pg = false; - slices *= 2; - } - - /* - * Starting in Gen9, render power gating can leave - * slice/subslice/EU in a partially enabled state. We - * must make an explicit request through RPCS for full - * enablement. - */ - if (sseu->has_slice_pg) { - u32 mask, val = slices; - - if (INTEL_GEN(i915) >= 11) { - mask = GEN11_RPCS_S_CNT_MASK; - val <<= GEN11_RPCS_S_CNT_SHIFT; - } else { - mask = GEN8_RPCS_S_CNT_MASK; - val <<= GEN8_RPCS_S_CNT_SHIFT; - } - - GEM_BUG_ON(val & ~mask); - val &= mask; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; - } - - if (subslice_pg) { - u32 val = subslices; - - val <<= GEN8_RPCS_SS_CNT_SHIFT; - - GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); - val &= GEN8_RPCS_SS_CNT_MASK; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; - } - - if (sseu->has_eu_pg) { - u32 val; - - val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); - val &= GEN8_RPCS_EU_MIN_MASK; - - rpcs |= val; - - val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); - val &= GEN8_RPCS_EU_MAX_MASK; - - rpcs |= val; - - rpcs |= GEN8_RPCS_ENABLE; - } - - return rpcs; -} diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/intel_sseu.h deleted file mode 100644 index 73bc824094e8..000000000000 --- a/drivers/gpu/drm/i915/intel_sseu.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef __INTEL_SSEU_H__ -#define __INTEL_SSEU_H__ - -#include - -struct drm_i915_private; - -#define GEN_MAX_SLICES (6) /* CNL upper bound */ -#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ - -struct sseu_dev_info { - u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES]; - u16 eu_total; - u8 eu_per_subslice; - u8 min_eu_in_pool; - /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ - u8 subslice_7eu[3]; - u8 has_slice_pg:1; - u8 has_subslice_pg:1; - u8 has_eu_pg:1; - - /* Topology fields */ - u8 max_slices; - u8 max_subslices; - u8 max_eus_per_subslice; - - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; -}; - -/* - * Powergating configuration for a particular (context,engine). - */ -struct intel_sseu { - u8 slice_mask; - u8 subslice_mask; - u8 min_eus_per_subslice; - u8 max_eus_per_subslice; -}; - -static inline struct intel_sseu -intel_sseu_from_device_info(const struct sseu_dev_info *sseu) -{ - struct intel_sseu value = { - .slice_mask = sseu->slice_mask, - .subslice_mask = sseu->subslice_mask[0], - .min_eus_per_subslice = sseu->max_eus_per_subslice, - .max_eus_per_subslice = sseu->max_eus_per_subslice, - }; - - return value; -} - -u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, - const struct intel_sseu *req_sseu); - -#endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 488dffba04d2..30a8e376d19f 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -22,11 +22,11 @@ * */ +#include "gt/intel_reset.h" #include "intel_uc.h" #include "intel_guc_submission.h" #include "intel_guc.h" #include "i915_drv.h" -#include "i915_reset.h" static void guc_free_load_err_log(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c deleted file mode 100644 index b3cbed1ee1c9..000000000000 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ /dev/null @@ -1,1402 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#include "i915_drv.h" -#include "intel_workarounds.h" - -/** - * DOC: Hardware workarounds - * - * This file is intended as a central place to implement most [1]_ of the - * required workarounds for hardware to work as originally intended. They fall - * in five basic categories depending on how/when they are applied: - * - * - Workarounds that touch registers that are saved/restored to/from the HW - * context image. The list is emitted (via Load Register Immediate commands) - * everytime a new context is created. - * - GT workarounds. The list of these WAs is applied whenever these registers - * revert to default values (on GPU reset, suspend/resume [2]_, etc..). - * - Display workarounds. The list is applied during display clock-gating - * initialization. - * - Workarounds that whitelist a privileged register, so that UMDs can manage - * them directly. This is just a special case of a MMMIO workaround (as we - * write the list of these to/be-whitelisted registers to some special HW - * registers). - * - Workaround batchbuffers, that get executed automatically by the hardware - * on every HW context restore. - * - * .. [1] Please notice that there are other WAs that, due to their nature, - * cannot be applied from a central place. Those are peppered around the rest - * of the code, as needed. - * - * .. [2] Technically, some registers are powercontext saved & restored, so they - * survive a suspend/resume. In practice, writing them again is not too - * costly and simplifies things. We can revisit this in the future. - * - * Layout - * '''''' - * - * Keep things in this file ordered by WA type, as per the above (context, GT, - * display, register whitelist, batchbuffer). Then, inside each type, keep the - * following order: - * - * - Infrastructure functions and macros - * - WAs per platform in standard gen/chrono order - * - Public functions to init or apply the given workaround type. - */ - -static void wa_init_start(struct i915_wa_list *wal, const char *name) -{ - wal->name = name; -} - -#define WA_LIST_CHUNK (1 << 4) - -static void wa_init_finish(struct i915_wa_list *wal) -{ - /* Trim unused entries. */ - if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { - struct i915_wa *list = kmemdup(wal->list, - wal->count * sizeof(*list), - GFP_KERNEL); - - if (list) { - kfree(wal->list); - wal->list = list; - } - } - - if (!wal->count) - return; - - DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n", - wal->wa_count, wal->name); -} - -static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) -{ - unsigned int addr = i915_mmio_reg_offset(wa->reg); - unsigned int start = 0, end = wal->count; - const unsigned int grow = WA_LIST_CHUNK; - struct i915_wa *wa_; - - GEM_BUG_ON(!is_power_of_2(grow)); - - if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ - struct i915_wa *list; - - list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), - GFP_KERNEL); - if (!list) { - DRM_ERROR("No space for workaround init!\n"); - return; - } - - if (wal->list) - memcpy(list, wal->list, sizeof(*wa) * wal->count); - - wal->list = list; - } - - while (start < end) { - unsigned int mid = start + (end - start) / 2; - - if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { - start = mid + 1; - } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { - end = mid; - } else { - wa_ = &wal->list[mid]; - - if ((wa->mask & ~wa_->mask) == 0) { - DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", - i915_mmio_reg_offset(wa_->reg), - wa_->mask, wa_->val); - - wa_->val &= ~wa->mask; - } - - wal->wa_count++; - wa_->val |= wa->val; - wa_->mask |= wa->mask; - wa_->read |= wa->read; - return; - } - } - - wal->wa_count++; - wa_ = &wal->list[wal->count++]; - *wa_ = *wa; - - while (wa_-- > wal->list) { - GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == - i915_mmio_reg_offset(wa_[1].reg)); - if (i915_mmio_reg_offset(wa_[1].reg) > - i915_mmio_reg_offset(wa_[0].reg)) - break; - - swap(wa_[1], wa_[0]); - } -} - -static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = mask, - .val = val, - .read = mask, - }; - - _wa_add(wal, &wa); -} - -static void -wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); -} - -static void -wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - wa_write_masked_or(wal, reg, ~0, val); -} - -static void -wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - wa_write_masked_or(wal, reg, val, val); -} - -static void -ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = mask, - .val = val, - /* Bonkers HW, skip verifying */ - }; - - _wa_add(wal, &wa); -} - -#define WA_SET_BIT_MASKED(addr, mask) \ - wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) - -static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct i915_wa_list *wal = &engine->ctx_wa_list; - - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); - - /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); - - /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:bdw,chv */ - /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); - - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping - * polygons in the same 8x4 pixel/sample area to be processed without - * stalling waiting for the earlier ones to write to Hierarchical Z - * buffer." - * - * This optimization is off by default for BDW and CHV; turn it on. - */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); -} - -static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen8_ctx_workarounds_init(engine); - - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* WaDisableDopClockGating:bdw - * - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() - * to disable EUTC clock gating. - */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); -} - -static void chv_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen8_ctx_workarounds_init(engine); - - /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); -} - -static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - - if (HAS_LLC(i915)) { - /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl - * - * Must match Display Engine. See - * WaCompressedResourceDisplayNewHashMode. - */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN9_PBE_COMPRESSED_HASH_SELECTION); - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); - } - - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - if (!IS_COFFEELAKE(i915)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_YV12_BUGFIX | - GEN9_ENABLE_GPGPU_PREEMPTION); - - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, - GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); - - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); - - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); - - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are - * both tied to WaForceContextSaveRestoreNonCoherent - * in some hsds for skl. We keep the tie for all gen9. The - * documentation is a bit hazy and so we want to get common behaviour, - * even though there is no clear evidence we would need both on kbl/bxt. - * This area has been source of system hangs so we play it safe - * and mimic the skl regardless of what bspec says. - * - * Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - - /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ - if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - - /* - * Supporting preemption with fine-granularity requires changes in the - * batch buffer programming. Since we can't break old userspace, we - * need to set our default preemption level to safe value. Userspace is - * still able to use more fine-grained preemption levels, since in - * WaEnablePreemptionGranularityControlByUMD we're whitelisting the - * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are - * not real HW workarounds, but merely a way to start using preemption - * while maintaining old contract with userspace. - */ - - /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, - GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ - if (IS_GEN9_LP(i915)) - WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); -} - -static void skl_tune_iz_hashing(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - u8 vals[3] = { 0, 0, 0 }; - unsigned int i; - - for (i = 0; i < 3; i++) { - u8 ss; - - /* - * Only consider slices where one, and only one, subslice has 7 - * EUs - */ - if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i])) - continue; - - /* - * subslice_7eu[i] != 0 (because of the check above) and - * ss_max == 4 (maximum number of subslices possible per slice) - * - * -> 0 <= ss <= 3; - */ - ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1; - vals[i] = 3 - ss; - } - - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return; - - /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN9_IZ_HASHING_MASK(2) | - GEN9_IZ_HASHING_MASK(1) | - GEN9_IZ_HASHING_MASK(0), - GEN9_IZ_HASHING(2, vals[2]) | - GEN9_IZ_HASHING(1, vals[1]) | - GEN9_IZ_HASHING(0, vals[0])); -} - -static void skl_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - gen9_ctx_workarounds_init(engine); - skl_tune_iz_hashing(engine); -} - -static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen9_ctx_workarounds_init(engine); - - /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); - - /* WaToEnableHwFixForPushConstHWBug:bxt */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); -} - -static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen9_ctx_workarounds_init(engine); - - /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); -} - -static void glk_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen9_ctx_workarounds_init(engine); - - /* WaToEnableHwFixForPushConstHWBug:glk */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); -} - -static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen9_ctx_workarounds_init(engine); - - /* WaToEnableHwFixForPushConstHWBug:cfl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableSbeCacheDispatchPortSharing:cfl */ - WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); -} - -static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - - /* WaForceContextSaveRestoreNonCoherent:cnl */ - WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - - /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ - if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); - - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ - if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); - - /* WaPushConstantDereferenceHoldDisable:cnl */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); - - /* FtrEnableFastAnisoL1BankingFix:cnl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); - - /* WaDisable3DMidCmdPreemption:cnl */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:cnl */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, - GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaDisableEarlyEOT:cnl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); -} - -static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - - /* Wa_1604370585:icl (pre-prod) - * Formerly known as WaPushConstantDereferenceHoldDisable - */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - PUSH_CONSTANT_DEREF_DISABLE); - - /* WaForceEnableNonCoherent:icl - * This is not the same workaround as in early Gen9 platforms, where - * lacking this could cause system hangs, but coherency performance - * overhead is high and only a few compute workloads really need it - * (the register is whitelisted in hardware now, so UMDs can opt in - * for coherency if they have a good reason). - */ - WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); - - /* Wa_2006611047:icl (pre-prod) - * Formerly known as WaDisableImprovedTdlClkGating - */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - GEN11_TDL_CLOCK_GATING_FIX_DISABLE); - - /* WaEnableStateCacheRedirectToCS:icl */ - WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1, - GEN11_STATE_CACHE_REDIRECT_TO_CS); - - /* Wa_2006665173:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, - GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); - - /* WaEnableFloatBlendOptimization:icl */ - wa_write_masked_or(wal, - GEN10_CACHE_MODE_SS, - 0, /* write-only, so skip validation */ - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); - - /* WaDisableGPGPUMidThreadPreemption:icl */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, - GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); -} - -void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - - wa_init_start(wal, "context"); - - if (IS_GEN(i915, 11)) - icl_ctx_workarounds_init(engine); - else if (IS_CANNONLAKE(i915)) - cnl_ctx_workarounds_init(engine); - else if (IS_COFFEELAKE(i915)) - cfl_ctx_workarounds_init(engine); - else if (IS_GEMINILAKE(i915)) - glk_ctx_workarounds_init(engine); - else if (IS_KABYLAKE(i915)) - kbl_ctx_workarounds_init(engine); - else if (IS_BROXTON(i915)) - bxt_ctx_workarounds_init(engine); - else if (IS_SKYLAKE(i915)) - skl_ctx_workarounds_init(engine); - else if (IS_CHERRYVIEW(i915)) - chv_ctx_workarounds_init(engine); - else if (IS_BROADWELL(i915)) - bdw_ctx_workarounds_init(engine); - else if (INTEL_GEN(i915) < 8) - return; - else - MISSING_CASE(INTEL_GEN(i915)); - - wa_init_finish(wal); -} - -int intel_engine_emit_ctx_wa(struct i915_request *rq) -{ - struct i915_wa_list *wal = &rq->engine->ctx_wa_list; - struct i915_wa *wa; - unsigned int i; - u32 *cs; - int ret; - - if (wal->count == 0) - return 0; - - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); - if (ret) - return ret; - - cs = intel_ring_begin(rq, (wal->count * 2 + 2)); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(wal->count); - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - *cs++ = i915_mmio_reg_offset(wa->reg); - *cs++ = wa->val; - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); - if (ret) - return ret; - - return 0; -} - -static void -gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - /* WaDisableKillLogic:bxt,skl,kbl */ - if (!IS_COFFEELAKE(i915)) - wa_write_or(wal, - GAM_ECOCHK, - ECOCHK_DIS_TLB); - - if (HAS_LLC(i915)) { - /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl - * - * Must match Display Engine. See - * WaCompressedResourceDisplayNewHashMode. - */ - wa_write_or(wal, - MMCD_MISC_CTRL, - MMCD_PCLA | MMCD_HOTSPOT_EN); - } - - /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ - wa_write_or(wal, - GAM_ECOCHK, - BDW_DISABLE_HDC_INVALIDATION); -} - -static void -skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - gen9_gt_workarounds_init(i915, wal); - - /* WaDisableGafsUnitClkGating:skl */ - wa_write_or(wal, - GEN7_UCGCTL4, - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); -} - -static void -bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - gen9_gt_workarounds_init(i915, wal); - - /* WaInPlaceDecompressionHang:bxt */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); -} - -static void -kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - gen9_gt_workarounds_init(i915, wal); - - /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) - wa_write_or(wal, - GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); - - /* WaDisableGafsUnitClkGating:kbl */ - wa_write_or(wal, - GEN7_UCGCTL4, - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaInPlaceDecompressionHang:kbl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); -} - -static void -glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - gen9_gt_workarounds_init(i915, wal); -} - -static void -cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - gen9_gt_workarounds_init(i915, wal); - - /* WaDisableGafsUnitClkGating:cfl */ - wa_write_or(wal, - GEN7_UCGCTL4, - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaInPlaceDecompressionHang:cfl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); -} - -static void -wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - u32 mcr_slice_subslice_mask; - - /* - * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl - * L3Banks could be fused off in single slice scenario. If that is - * the case, we might need to program MCR select to a valid L3Bank - * by default, to make sure we correctly read certain registers - * later on (in the range 0xB100 - 0xB3FF). - * This might be incompatible with - * WaProgramMgsrForCorrectSliceSpecificMmioReads. - * Fortunately, this should not happen in production hardware, so - * we only assert that this is the case (instead of implementing - * something more complex that requires checking the range of every - * MMIO read). - */ - if (INTEL_GEN(i915) >= 10 && - is_power_of_2(sseu->slice_mask)) { - /* - * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches - * enabled subslice, no need to redirect MCR packet - */ - u32 slice = fls(sseu->slice_mask); - u32 fuse3 = - intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3); - u8 ss_mask = sseu->subslice_mask[slice]; - - u8 enabled_mask = (ss_mask | ss_mask >> - GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK; - u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK; - - /* - * Production silicon should have matched L3Bank and - * subslice enabled - */ - WARN_ON((enabled_mask & disabled_mask) != enabled_mask); - } - - if (INTEL_GEN(i915) >= 11) - mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | - GEN11_MCR_SUBSLICE_MASK; - else - mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | - GEN8_MCR_SUBSLICE_MASK; - /* - * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl - * Before any MMIO read into slice/subslice specific registers, MCR - * packet control register needs to be programmed to point to any - * enabled s/ss pair. Otherwise, incorrect values will be returned. - * This means each subsequent MMIO read will be forwarded to an - * specific s/ss combination, but this is OK since these registers - * are consistent across s/ss in almost all cases. In the rare - * occasions, such as INSTDONE, where this value is dependent - * on s/ss combo, the read should be done with read_subslice_reg. - */ - wa_write_masked_or(wal, - GEN8_MCR_SELECTOR, - mcr_slice_subslice_mask, - intel_calculate_mcr_s_ss_select(i915)); -} - -static void -cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - wa_init_mcr(i915, wal); - - /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ - if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) - wa_write_or(wal, - GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); - - /* WaInPlaceDecompressionHang:cnl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); -} - -static void -icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - wa_init_mcr(i915, wal); - - /* WaInPlaceDecompressionHang:icl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaModifyGamTlbPartitioning:icl */ - wa_write_masked_or(wal, - GEN11_GACB_PERF_CTRL, - GEN11_HASH_CTRL_MASK, - GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); - - /* Wa_1405766107:icl - * Formerly known as WaCL2SFHalfMaxAlloc - */ - wa_write_or(wal, - GEN11_LSN_UNSLCVC, - GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | - GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); - - /* Wa_220166154:icl - * Formerly known as WaDisCtxReload - */ - wa_write_or(wal, - GEN8_GAMW_ECO_DEV_RW_IA, - GAMW_ECO_DEV_CTX_RELOAD_DISABLE); - - /* Wa_1405779004:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, - MSCUNIT_CLKGATE_DIS); - - /* Wa_1406680159:icl */ - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE, - GWUNIT_CLKGATE_DIS); - - /* Wa_1406838659:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - wa_write_or(wal, - INF_UNIT_LEVEL_CLKGATE, - CGPSF_CLKGATE_DIS); - - /* Wa_1406463099:icl - * Formerly known as WaGamTlbPendError - */ - wa_write_or(wal, - GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_L3_COH_PIPE); -} - -static void -gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - if (IS_GEN(i915, 11)) - icl_gt_workarounds_init(i915, wal); - else if (IS_CANNONLAKE(i915)) - cnl_gt_workarounds_init(i915, wal); - else if (IS_COFFEELAKE(i915)) - cfl_gt_workarounds_init(i915, wal); - else if (IS_GEMINILAKE(i915)) - glk_gt_workarounds_init(i915, wal); - else if (IS_KABYLAKE(i915)) - kbl_gt_workarounds_init(i915, wal); - else if (IS_BROXTON(i915)) - bxt_gt_workarounds_init(i915, wal); - else if (IS_SKYLAKE(i915)) - skl_gt_workarounds_init(i915, wal); - else if (INTEL_GEN(i915) <= 8) - return; - else - MISSING_CASE(INTEL_GEN(i915)); -} - -void intel_gt_init_workarounds(struct drm_i915_private *i915) -{ - struct i915_wa_list *wal = &i915->gt_wa_list; - - wa_init_start(wal, "GT"); - gt_init_workarounds(i915, wal); - wa_init_finish(wal); -} - -static enum forcewake_domains -wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) -{ - enum forcewake_domains fw = 0; - struct i915_wa *wa; - unsigned int i; - - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - fw |= intel_uncore_forcewake_for_reg(uncore, - wa->reg, - FW_REG_READ | - FW_REG_WRITE); - - return fw; -} - -static bool -wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) -{ - if ((cur ^ wa->val) & wa->read) { - DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), - cur, cur & wa->read, - wa->val, wa->mask); - - return false; - } - - return true; -} - -static void -wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) -{ - enum forcewake_domains fw; - unsigned long flags; - struct i915_wa *wa; - unsigned int i; - - if (!wal->count) - return; - - fw = wal_get_fw_for_rmw(uncore, wal); - - spin_lock_irqsave(&uncore->lock, flags); - intel_uncore_forcewake_get__locked(uncore, fw); - - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - wa_verify(wa, - intel_uncore_read_fw(uncore, wa->reg), - wal->name, "application"); - } - - intel_uncore_forcewake_put__locked(uncore, fw); - spin_unlock_irqrestore(&uncore->lock, flags); -} - -void intel_gt_apply_workarounds(struct drm_i915_private *i915) -{ - wa_list_apply(&i915->uncore, &i915->gt_wa_list); -} - -static bool wa_list_verify(struct intel_uncore *uncore, - const struct i915_wa_list *wal, - const char *from) -{ - struct i915_wa *wa; - unsigned int i; - bool ok = true; - - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - ok &= wa_verify(wa, - intel_uncore_read(uncore, wa->reg), - wal->name, from); - - return ok; -} - -bool intel_gt_verify_workarounds(struct drm_i915_private *i915, - const char *from) -{ - return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from); -} - -static void -whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) -{ - struct i915_wa wa = { - .reg = reg - }; - - if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) - return; - - _wa_add(wal, &wa); -} - -static void gen9_whitelist_build(struct i915_wa_list *w) -{ - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ - whitelist_reg(w, GEN9_CTX_PREEMPT_REG); - - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ - whitelist_reg(w, GEN8_CS_CHICKEN1); - - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ - whitelist_reg(w, GEN8_HDC_CHICKEN1); -} - -static void skl_whitelist_build(struct i915_wa_list *w) -{ - gen9_whitelist_build(w); - - /* WaDisableLSQCROPERFforOCL:skl */ - whitelist_reg(w, GEN8_L3SQCREG4); -} - -static void bxt_whitelist_build(struct i915_wa_list *w) -{ - gen9_whitelist_build(w); -} - -static void kbl_whitelist_build(struct i915_wa_list *w) -{ - gen9_whitelist_build(w); - - /* WaDisableLSQCROPERFforOCL:kbl */ - whitelist_reg(w, GEN8_L3SQCREG4); -} - -static void glk_whitelist_build(struct i915_wa_list *w) -{ - gen9_whitelist_build(w); - - /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ - whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); -} - -static void cfl_whitelist_build(struct i915_wa_list *w) -{ - gen9_whitelist_build(w); -} - -static void cnl_whitelist_build(struct i915_wa_list *w) -{ - /* WaEnablePreemptionGranularityControlByUMD:cnl */ - whitelist_reg(w, GEN8_CS_CHICKEN1); -} - -static void icl_whitelist_build(struct i915_wa_list *w) -{ - /* WaAllowUMDToModifyHalfSliceChicken7:icl */ - whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); - - /* WaAllowUMDToModifySamplerMode:icl */ - whitelist_reg(w, GEN10_SAMPLER_MODE); -} - -void intel_engine_init_whitelist(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *w = &engine->whitelist; - - GEM_BUG_ON(engine->id != RCS0); - - wa_init_start(w, "whitelist"); - - if (IS_GEN(i915, 11)) - icl_whitelist_build(w); - else if (IS_CANNONLAKE(i915)) - cnl_whitelist_build(w); - else if (IS_COFFEELAKE(i915)) - cfl_whitelist_build(w); - else if (IS_GEMINILAKE(i915)) - glk_whitelist_build(w); - else if (IS_KABYLAKE(i915)) - kbl_whitelist_build(w); - else if (IS_BROXTON(i915)) - bxt_whitelist_build(w); - else if (IS_SKYLAKE(i915)) - skl_whitelist_build(w); - else if (INTEL_GEN(i915) <= 8) - return; - else - MISSING_CASE(INTEL_GEN(i915)); - - wa_init_finish(w); -} - -void intel_engine_apply_whitelist(struct intel_engine_cs *engine) -{ - const struct i915_wa_list *wal = &engine->whitelist; - struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; - struct i915_wa *wa; - unsigned int i; - - if (!wal->count) - return; - - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - intel_uncore_write(uncore, - RING_FORCE_TO_NONPRIV(base, i), - i915_mmio_reg_offset(wa->reg)); - - /* And clear the rest just in case of garbage */ - for (; i < RING_MAX_NONPRIV_SLOTS; i++) - intel_uncore_write(uncore, - RING_FORCE_TO_NONPRIV(base, i), - i915_mmio_reg_offset(RING_NOPID(base))); -} - -static void -rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) -{ - struct drm_i915_private *i915 = engine->i915; - - if (IS_GEN(i915, 11)) { - /* This is not an Wa. Enable for better image quality */ - wa_masked_en(wal, - _3D_CHICKEN3, - _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); - - /* WaPipelineFlushCoherentLines:icl */ - ignore_wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES, - GEN8_LQSC_FLUSH_COHERENT_LINES); - - /* - * Wa_1405543622:icl - * Formerly known as WaGAPZPriorityScheme - */ - wa_write_or(wal, - GEN8_GARBCNTL, - GEN11_ARBITRATION_PRIO_ORDER_MASK); - - /* - * Wa_1604223664:icl - * Formerly known as WaL3BankAddressHashing - */ - wa_write_masked_or(wal, - GEN8_GARBCNTL, - GEN11_HASH_CTRL_EXCL_MASK, - GEN11_HASH_CTRL_EXCL_BIT0); - wa_write_masked_or(wal, - GEN11_GLBLINVL, - GEN11_BANK_HASH_ADDR_EXCL_MASK, - GEN11_BANK_HASH_ADDR_EXCL_BIT0); - - /* - * Wa_1405733216:icl - * Formerly known as WaDisableCleanEvicts - */ - ignore_wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE, - GEN11_LQSC_CLEAN_EVICT_DISABLE); - - /* WaForwardProgressSoftReset:icl */ - wa_write_or(wal, - GEN10_SCRATCH_LNCF2, - PMFLUSHDONE_LNICRSDROP | - PMFLUSH_GAPL3UNBLOCK | - PMFLUSHDONE_LNEBLK); - - /* Wa_1406609255:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - wa_write_or(wal, - GEN7_SARCHKMD, - GEN7_DISABLE_DEMAND_PREFETCH | - GEN7_DISABLE_SAMPLER_PREFETCH); - } - - if (IS_GEN_RANGE(i915, 9, 11)) { - /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */ - wa_masked_en(wal, - GEN7_FF_SLICE_CS_CHICKEN1, - GEN9_FFSC_PERCTX_PREEMPT_CTRL); - } - - if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { - /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ - wa_write_or(wal, - GEN8_GARBCNTL, - GEN9_GAPS_TSV_CREDIT_DISABLE); - } - - if (IS_BROXTON(i915)) { - /* WaDisablePooledEuLoadBalancingFix:bxt */ - wa_masked_en(wal, - FF_SLICE_CS_CHICKEN2, - GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); - } - - if (IS_GEN(i915, 9)) { - /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ - wa_masked_en(wal, - GEN9_CSFE_CHICKEN1_RCS, - GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); - - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ - wa_write_or(wal, - BDW_SCRATCH1, - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - - /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ - if (IS_GEN9_LP(i915)) - wa_write_masked_or(wal, - GEN8_L3SQCREG1, - L3_PRIO_CREDITS_MASK, - L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); - - /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); - } -} - -static void -xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) -{ - struct drm_i915_private *i915 = engine->i915; - - /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { - wa_write(wal, - RING_SEMA_WAIT_POLL(engine->mmio_base), - 1); - } -} - -static void -engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) -{ - if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) - return; - - if (engine->id == RCS0) - rcs_engine_wa_init(engine, wal); - else - xcs_engine_wa_init(engine, wal); -} - -void intel_engine_init_workarounds(struct intel_engine_cs *engine) -{ - struct i915_wa_list *wal = &engine->wa_list; - - if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8)) - return; - - wa_init_start(wal, engine->name); - engine_init_workarounds(engine, wal); - wa_init_finish(wal); -} - -void intel_engine_apply_workarounds(struct intel_engine_cs *engine) -{ - wa_list_apply(engine->uncore, &engine->wa_list); -} - -static struct i915_vma * -create_scratch(struct i915_address_space *vm, int count) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - unsigned int size; - int err; - - size = round_up(count * sizeof(u32), PAGE_SIZE); - obj = i915_gem_object_create_internal(vm->i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, - i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); - if (err) - goto err_obj; - - return vma; - -err_obj: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static int -wa_list_srm(struct i915_request *rq, - const struct i915_wa_list *wal, - struct i915_vma *vma) -{ - const struct i915_wa *wa; - unsigned int i; - u32 srm, *cs; - - srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - if (INTEL_GEN(rq->i915) >= 8) - srm++; - - cs = intel_ring_begin(rq, 4 * wal->count); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - *cs++ = srm; - *cs++ = i915_mmio_reg_offset(wa->reg); - *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; - *cs++ = 0; - } - intel_ring_advance(rq, cs); - - return 0; -} - -static int engine_wa_list_verify(struct intel_engine_cs *engine, - const struct i915_wa_list * const wal, - const char *from) -{ - const struct i915_wa *wa; - struct i915_request *rq; - struct i915_vma *vma; - unsigned int i; - u32 *results; - int err; - - if (!wal->count) - return 0; - - vma = create_scratch(&engine->i915->ggtt.vm, wal->count); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - rq = i915_request_alloc(engine, engine->kernel_context->gem_context); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_vma; - } - - err = wa_list_srm(rq, wal, vma); - if (err) - goto err_vma; - - i915_request_add(rq); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { - err = -ETIME; - goto err_vma; - } - - results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(results)) { - err = PTR_ERR(results); - goto err_vma; - } - - err = 0; - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - if (!wa_verify(wa, results[i], wal->name, from)) - err = -ENXIO; - - i915_gem_object_unpin_map(vma->obj); - -err_vma: - i915_vma_unpin(vma); - i915_vma_put(vma); - return err; -} - -int intel_engine_verify_workarounds(struct intel_engine_cs *engine, - const char *from) -{ - return engine_wa_list_verify(engine, &engine->wa_list, from); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_workarounds.c" -#endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h deleted file mode 100644 index fdf7ebb90f28..000000000000 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#ifndef _I915_WORKAROUNDS_H_ -#define _I915_WORKAROUNDS_H_ - -#include - -#include "intel_workarounds_types.h" - -static inline void intel_wa_list_free(struct i915_wa_list *wal) -{ - kfree(wal->list); - memset(wal, 0, sizeof(*wal)); -} - -void intel_engine_init_ctx_wa(struct intel_engine_cs *engine); -int intel_engine_emit_ctx_wa(struct i915_request *rq); - -void intel_gt_init_workarounds(struct drm_i915_private *i915); -void intel_gt_apply_workarounds(struct drm_i915_private *i915); -bool intel_gt_verify_workarounds(struct drm_i915_private *i915, - const char *from); - -void intel_engine_init_whitelist(struct intel_engine_cs *engine); -void intel_engine_apply_whitelist(struct intel_engine_cs *engine); - -void intel_engine_init_workarounds(struct intel_engine_cs *engine); -void intel_engine_apply_workarounds(struct intel_engine_cs *engine); -int intel_engine_verify_workarounds(struct intel_engine_cs *engine, - const char *from); - -#endif diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h deleted file mode 100644 index 42ac1fb99572..000000000000 --- a/drivers/gpu/drm/i915/intel_workarounds_types.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#ifndef __INTEL_WORKAROUNDS_TYPES_H__ -#define __INTEL_WORKAROUNDS_TYPES_H__ - -#include - -#include "i915_reg.h" - -struct i915_wa { - i915_reg_t reg; - u32 mask; - u32 val; - u32 read; -}; - -struct i915_wa_list { - const char *name; - struct i915_wa *list; - unsigned int count; - unsigned int wa_count; -}; - -#endif /* __INTEL_WORKAROUNDS_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index e1cb22f03e8e..6f52ca881173 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -24,8 +24,9 @@ #include -#include "../i915_reset.h" -#include "../i915_selftest.h" +#include "gt/intel_reset.h" +#include "i915_selftest.h" + #include "i915_random.h" #include "igt_flush_test.h" #include "igt_live_test.h" diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c index 208a966da8ca..4f31b137c428 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.c +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c @@ -6,8 +6,9 @@ #include "igt_reset.h" +#include "gt/intel_engine.h" + #include "../i915_drv.h" -#include "../intel_ringbuffer.h" void igt_global_reset_lock(struct drm_i915_private *i915) { diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h index 391777c76dc7..d312e7cdab68 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.h +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h @@ -9,9 +9,10 @@ #include "../i915_selftest.h" +#include "gt/intel_engine.h" + #include "../i915_drv.h" #include "../i915_request.h" -#include "../intel_ringbuffer.h" #include "../i915_gem_context.h" struct igt_spinner { diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/selftests/intel_engine_cs.c deleted file mode 100644 index cfaa6b296835..000000000000 --- a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-2.0 - * - * Copyright © 2018 Intel Corporation - */ - -#include "../i915_selftest.h" - -static int intel_mmio_bases_check(void *arg) -{ - int i, j; - - for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { - const struct engine_info *info = &intel_engines[i]; - char name[INTEL_ENGINE_CS_MAX_NAME]; - u8 prev = U8_MAX; - - __sprint_engine_name(name, info); - - for (j = 0; j < MAX_MMIO_BASES; j++) { - u8 gen = info->mmio_bases[j].gen; - u32 base = info->mmio_bases[j].base; - - if (gen >= prev) { - pr_err("%s: %s: mmio base for gen %x " - "is before the one for gen %x\n", - __func__, name, prev, gen); - return -EINVAL; - } - - if (gen == 0) - break; - - if (!base) { - pr_err("%s: %s: invalid mmio base (%x) " - "for gen %x at entry %u\n", - __func__, name, base, gen, j); - return -EINVAL; - } - - prev = gen; - } - - pr_info("%s: min gen supported for %s = %d\n", - __func__, name, prev); - } - - return 0; -} - -int intel_engine_cs_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(intel_mmio_bases_check), - }; - - return i915_subtests(tests, NULL); -} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c deleted file mode 100644 index 2fd33aad8683..000000000000 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ /dev/null @@ -1,1919 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include - -#include "../i915_selftest.h" -#include "i915_random.h" -#include "igt_flush_test.h" -#include "igt_reset.h" -#include "igt_wedge_me.h" - -#include "mock_context.h" -#include "mock_drm.h" - -#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ - -struct hang { - struct drm_i915_private *i915; - struct drm_i915_gem_object *hws; - struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - u32 *seqno; - u32 *batch; -}; - -static int hang_init(struct hang *h, struct drm_i915_private *i915) -{ - void *vaddr; - int err; - - memset(h, 0, sizeof(*h)); - h->i915 = i915; - - h->ctx = kernel_context(i915); - if (IS_ERR(h->ctx)) - return PTR_ERR(h->ctx); - - GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx)); - - h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(h->hws)) { - err = PTR_ERR(h->hws); - goto err_ctx; - } - - h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(h->obj)) { - err = PTR_ERR(h->obj); - goto err_hws; - } - - i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC); - vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_obj; - } - h->seqno = memset(vaddr, 0xff, PAGE_SIZE); - - vaddr = i915_gem_object_pin_map(h->obj, - i915_coherent_map_type(i915)); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_unpin_hws; - } - h->batch = vaddr; - - return 0; - -err_unpin_hws: - i915_gem_object_unpin_map(h->hws); -err_obj: - i915_gem_object_put(h->obj); -err_hws: - i915_gem_object_put(h->hws); -err_ctx: - kernel_context_close(h->ctx); - return err; -} - -static u64 hws_address(const struct i915_vma *hws, - const struct i915_request *rq) -{ - return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); -} - -static int move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) -{ - int err; - - err = i915_vma_move_to_active(vma, rq, flags); - if (err) - return err; - - if (!i915_gem_object_has_active_reference(vma->obj)) { - i915_gem_object_get(vma->obj); - i915_gem_object_set_active_reference(vma->obj); - } - - return 0; -} - -static struct i915_request * -hang_create_request(struct hang *h, struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = h->i915; - struct i915_address_space *vm = - h->ctx->ppgtt ? &h->ctx->ppgtt->vm : &i915->ggtt.vm; - struct i915_request *rq = NULL; - struct i915_vma *hws, *vma; - unsigned int flags; - u32 *batch; - int err; - - if (i915_gem_object_is_active(h->obj)) { - struct drm_i915_gem_object *obj; - void *vaddr; - - obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vaddr = i915_gem_object_pin_map(obj, - i915_coherent_map_type(h->i915)); - if (IS_ERR(vaddr)) { - i915_gem_object_put(obj); - return ERR_CAST(vaddr); - } - - i915_gem_object_unpin_map(h->obj); - i915_gem_object_put(h->obj); - - h->obj = obj; - h->batch = vaddr; - } - - vma = i915_vma_instance(h->obj, vm, NULL); - if (IS_ERR(vma)) - return ERR_CAST(vma); - - hws = i915_vma_instance(h->hws, vm, NULL); - if (IS_ERR(hws)) - return ERR_CAST(hws); - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return ERR_PTR(err); - - err = i915_vma_pin(hws, 0, 0, PIN_USER); - if (err) - goto unpin_vma; - - rq = i915_request_alloc(engine, h->ctx); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto unpin_hws; - } - - err = move_to_active(vma, rq, 0); - if (err) - goto cancel_rq; - - err = move_to_active(hws, rq, 0); - if (err) - goto cancel_rq; - - batch = h->batch; - if (INTEL_GEN(i915) >= 8) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(hws_address(hws, rq)); - *batch++ = upper_32_bits(hws_address(hws, rq)); - *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; - - memset(batch, 0, 1024); - batch += 1024 / sizeof(*batch); - - *batch++ = MI_ARB_CHECK; - *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; - *batch++ = lower_32_bits(vma->node.start); - *batch++ = upper_32_bits(vma->node.start); - } else if (INTEL_GEN(i915) >= 6) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = lower_32_bits(hws_address(hws, rq)); - *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; - - memset(batch, 0, 1024); - batch += 1024 / sizeof(*batch); - - *batch++ = MI_ARB_CHECK; - *batch++ = MI_BATCH_BUFFER_START | 1 << 8; - *batch++ = lower_32_bits(vma->node.start); - } else if (INTEL_GEN(i915) >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = lower_32_bits(hws_address(hws, rq)); - *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; - - memset(batch, 0, 1024); - batch += 1024 / sizeof(*batch); - - *batch++ = MI_ARB_CHECK; - *batch++ = MI_BATCH_BUFFER_START | 2 << 6; - *batch++ = lower_32_bits(vma->node.start); - } else { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = lower_32_bits(hws_address(hws, rq)); - *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; - - memset(batch, 0, 1024); - batch += 1024 / sizeof(*batch); - - *batch++ = MI_ARB_CHECK; - *batch++ = MI_BATCH_BUFFER_START | 2 << 6; - *batch++ = lower_32_bits(vma->node.start); - } - *batch++ = MI_BATCH_BUFFER_END; /* not reached */ - i915_gem_chipset_flush(h->i915); - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (err) - goto cancel_rq; - } - - flags = 0; - if (INTEL_GEN(vm->i915) <= 5) - flags |= I915_DISPATCH_SECURE; - - err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); - -cancel_rq: - if (err) { - i915_request_skip(rq, err); - i915_request_add(rq); - } -unpin_hws: - i915_vma_unpin(hws); -unpin_vma: - i915_vma_unpin(vma); - return err ? ERR_PTR(err) : rq; -} - -static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) -{ - return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); -} - -static void hang_fini(struct hang *h) -{ - *h->batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(h->i915); - - i915_gem_object_unpin_map(h->obj); - i915_gem_object_put(h->obj); - - i915_gem_object_unpin_map(h->hws); - i915_gem_object_put(h->hws); - - kernel_context_close(h->ctx); - - igt_flush_test(h->i915, I915_WAIT_LOCKED); -} - -static bool wait_until_running(struct hang *h, struct i915_request *rq) -{ - return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), - rq->fence.seqno), - 10) && - wait_for(i915_seqno_passed(hws_seqno(h, rq), - rq->fence.seqno), - 1000)); -} - -static int igt_hang_sanitycheck(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_request *rq; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct hang h; - int err; - - /* Basic check that we can execute our hanging batch */ - - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - if (err) - goto unlock; - - for_each_engine(engine, i915, id) { - struct igt_wedge_me w; - long timeout; - - if (!intel_engine_can_store_dword(engine)) - continue; - - rq = hang_create_request(&h, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - pr_err("Failed to create request for %s, err=%d\n", - engine->name, err); - goto fini; - } - - i915_request_get(rq); - - *h.batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_request_add(rq); - - timeout = 0; - igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) - timeout = i915_request_wait(rq, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (i915_reset_failed(i915)) - timeout = -EIO; - - i915_request_put(rq); - - if (timeout < 0) { - err = timeout; - pr_err("Wait for request failed on %s, err=%d\n", - engine->name, err); - goto fini; - } - } - -fini: - hang_fini(&h); -unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int igt_global_reset(void *arg) -{ - struct drm_i915_private *i915 = arg; - unsigned int reset_count; - int err = 0; - - /* Check that we can issue a global GPU reset */ - - igt_global_reset_lock(i915); - - reset_count = i915_reset_count(&i915->gpu_error); - - i915_reset(i915, ALL_ENGINES, NULL); - - if (i915_reset_count(&i915->gpu_error) == reset_count) { - pr_err("No GPU reset recorded!\n"); - err = -EINVAL; - } - - igt_global_reset_unlock(i915); - - if (i915_reset_failed(i915)) - err = -EIO; - - return err; -} - -static int igt_wedged_reset(void *arg) -{ - struct drm_i915_private *i915 = arg; - intel_wakeref_t wakeref; - - /* Check that we can recover a wedged device with a GPU reset */ - - igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(i915); - - i915_gem_set_wedged(i915); - - GEM_BUG_ON(!i915_reset_failed(i915)); - i915_reset(i915, ALL_ENGINES, NULL); - - intel_runtime_pm_put(i915, wakeref); - igt_global_reset_unlock(i915); - - return i915_reset_failed(i915) ? -EIO : 0; -} - -static bool wait_for_idle(struct intel_engine_cs *engine) -{ - return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; -} - -static int igt_reset_nop(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - unsigned int reset_count, count; - enum intel_engine_id id; - intel_wakeref_t wakeref; - struct drm_file *file; - IGT_TIMEOUT(end_time); - int err = 0; - - /* Check that we can reset during non-user portions of requests */ - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } - - i915_gem_context_clear_bannable(ctx); - wakeref = intel_runtime_pm_get(i915); - reset_count = i915_reset_count(&i915->gpu_error); - count = 0; - do { - mutex_lock(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) { - int i; - - for (i = 0; i < 16; i++) { - struct i915_request *rq; - - rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - break; - } - - i915_request_add(rq); - } - } - mutex_unlock(&i915->drm.struct_mutex); - - igt_global_reset_lock(i915); - i915_reset(i915, ALL_ENGINES, NULL); - igt_global_reset_unlock(i915); - if (i915_reset_failed(i915)) { - err = -EIO; - break; - } - - if (i915_reset_count(&i915->gpu_error) != - reset_count + ++count) { - pr_err("Full GPU reset not recorded!\n"); - err = -EINVAL; - break; - } - - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } - - err = igt_flush_test(i915, 0); - if (err) - break; - } while (time_before(jiffies, end_time)); - pr_info("%s: %d resets\n", __func__, count); - - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - - intel_runtime_pm_put(i915, wakeref); - -out: - mock_file_free(i915, file); - if (i915_reset_failed(i915)) - err = -EIO; - return err; -} - -static int igt_reset_nop_engine(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - enum intel_engine_id id; - intel_wakeref_t wakeref; - struct drm_file *file; - int err = 0; - - /* Check that we can engine-reset during non-user portions */ - - if (!intel_has_reset_engine(i915)) - return 0; - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } - - i915_gem_context_clear_bannable(ctx); - wakeref = intel_runtime_pm_get(i915); - for_each_engine(engine, i915, id) { - unsigned int reset_count, reset_engine_count; - unsigned int count; - IGT_TIMEOUT(end_time); - - reset_count = i915_reset_count(&i915->gpu_error); - reset_engine_count = i915_reset_engine_count(&i915->gpu_error, - engine); - count = 0; - - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - do { - int i; - - if (!wait_for_idle(engine)) { - pr_err("%s failed to idle before reset\n", - engine->name); - err = -EIO; - break; - } - - mutex_lock(&i915->drm.struct_mutex); - for (i = 0; i < 16; i++) { - struct i915_request *rq; - - rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - break; - } - - i915_request_add(rq); - } - mutex_unlock(&i915->drm.struct_mutex); - - err = i915_reset_engine(engine, NULL); - if (err) { - pr_err("i915_reset_engine failed\n"); - break; - } - - if (i915_reset_count(&i915->gpu_error) != reset_count) { - pr_err("Full GPU reset recorded! (engine reset expected)\n"); - err = -EINVAL; - break; - } - - if (i915_reset_engine_count(&i915->gpu_error, engine) != - reset_engine_count + ++count) { - pr_err("%s engine reset not recorded!\n", - engine->name); - err = -EINVAL; - break; - } - - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } - } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - pr_info("%s(%s): %d resets\n", __func__, engine->name, count); - - if (err) - break; - - err = igt_flush_test(i915, 0); - if (err) - break; - } - - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - - intel_runtime_pm_put(i915, wakeref); -out: - mock_file_free(i915, file); - if (i915_reset_failed(i915)) - err = -EIO; - return err; -} - -static int __igt_reset_engine(struct drm_i915_private *i915, bool active) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct hang h; - int err = 0; - - /* Check that we can issue an engine reset on an idle engine (no-op) */ - - if (!intel_has_reset_engine(i915)) - return 0; - - if (active) { - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - mutex_unlock(&i915->drm.struct_mutex); - if (err) - return err; - } - - for_each_engine(engine, i915, id) { - unsigned int reset_count, reset_engine_count; - IGT_TIMEOUT(end_time); - - if (active && !intel_engine_can_store_dword(engine)) - continue; - - if (!wait_for_idle(engine)) { - pr_err("%s failed to idle before reset\n", - engine->name); - err = -EIO; - break; - } - - reset_count = i915_reset_count(&i915->gpu_error); - reset_engine_count = i915_reset_engine_count(&i915->gpu_error, - engine); - - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - do { - if (active) { - struct i915_request *rq; - - mutex_lock(&i915->drm.struct_mutex); - rq = hang_create_request(&h, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - mutex_unlock(&i915->drm.struct_mutex); - break; - } - - i915_request_get(rq); - i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); - - if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); - - pr_err("%s: Failed to start request %llx, at %x\n", - __func__, rq->fence.seqno, hws_seqno(&h, rq)); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - i915_request_put(rq); - err = -EIO; - break; - } - - i915_request_put(rq); - } - - err = i915_reset_engine(engine, NULL); - if (err) { - pr_err("i915_reset_engine failed\n"); - break; - } - - if (i915_reset_count(&i915->gpu_error) != reset_count) { - pr_err("Full GPU reset recorded! (engine reset expected)\n"); - err = -EINVAL; - break; - } - - if (i915_reset_engine_count(&i915->gpu_error, engine) != - ++reset_engine_count) { - pr_err("%s engine reset not recorded!\n", - engine->name); - err = -EINVAL; - break; - } - - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } - } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - - if (err) - break; - - err = igt_flush_test(i915, 0); - if (err) - break; - } - - if (i915_reset_failed(i915)) - err = -EIO; - - if (active) { - mutex_lock(&i915->drm.struct_mutex); - hang_fini(&h); - mutex_unlock(&i915->drm.struct_mutex); - } - - return err; -} - -static int igt_reset_idle_engine(void *arg) -{ - return __igt_reset_engine(arg, false); -} - -static int igt_reset_active_engine(void *arg) -{ - return __igt_reset_engine(arg, true); -} - -struct active_engine { - struct task_struct *task; - struct intel_engine_cs *engine; - unsigned long resets; - unsigned int flags; -}; - -#define TEST_ACTIVE BIT(0) -#define TEST_OTHERS BIT(1) -#define TEST_SELF BIT(2) -#define TEST_PRIORITY BIT(3) - -static int active_request_put(struct i915_request *rq) -{ - int err = 0; - - if (!rq) - return 0; - - if (i915_request_wait(rq, 0, 5 * HZ) < 0) { - GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", - rq->engine->name, - rq->fence.context, - rq->fence.seqno); - GEM_TRACE_DUMP(); - - i915_gem_set_wedged(rq->i915); - err = -EIO; - } - - i915_request_put(rq); - - return err; -} - -static int active_engine(void *data) -{ - I915_RND_STATE(prng); - struct active_engine *arg = data; - struct intel_engine_cs *engine = arg->engine; - struct i915_request *rq[8] = {}; - struct i915_gem_context *ctx[ARRAY_SIZE(rq)]; - struct drm_file *file; - unsigned long count = 0; - int err = 0; - - file = mock_file(engine->i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - for (count = 0; count < ARRAY_SIZE(ctx); count++) { - mutex_lock(&engine->i915->drm.struct_mutex); - ctx[count] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); - if (IS_ERR(ctx[count])) { - err = PTR_ERR(ctx[count]); - while (--count) - i915_gem_context_put(ctx[count]); - goto err_file; - } - } - - while (!kthread_should_stop()) { - unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); - struct i915_request *old = rq[idx]; - struct i915_request *new; - - mutex_lock(&engine->i915->drm.struct_mutex); - new = i915_request_alloc(engine, ctx[idx]); - if (IS_ERR(new)) { - mutex_unlock(&engine->i915->drm.struct_mutex); - err = PTR_ERR(new); - break; - } - - if (arg->flags & TEST_PRIORITY) - ctx[idx]->sched.priority = - i915_prandom_u32_max_state(512, &prng); - - rq[idx] = i915_request_get(new); - i915_request_add(new); - mutex_unlock(&engine->i915->drm.struct_mutex); - - err = active_request_put(old); - if (err) - break; - - cond_resched(); - } - - for (count = 0; count < ARRAY_SIZE(rq); count++) { - int err__ = active_request_put(rq[count]); - - /* Keep the first error */ - if (!err) - err = err__; - } - -err_file: - mock_file_free(engine->i915, file); - return err; -} - -static int __igt_reset_engines(struct drm_i915_private *i915, - const char *test_name, - unsigned int flags) -{ - struct intel_engine_cs *engine, *other; - enum intel_engine_id id, tmp; - struct hang h; - int err = 0; - - /* Check that issuing a reset on one engine does not interfere - * with any other engine. - */ - - if (!intel_has_reset_engine(i915)) - return 0; - - if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - mutex_unlock(&i915->drm.struct_mutex); - if (err) - return err; - - if (flags & TEST_PRIORITY) - h.ctx->sched.priority = 1024; - } - - for_each_engine(engine, i915, id) { - struct active_engine threads[I915_NUM_ENGINES] = {}; - unsigned long global = i915_reset_count(&i915->gpu_error); - unsigned long count = 0, reported; - IGT_TIMEOUT(end_time); - - if (flags & TEST_ACTIVE && - !intel_engine_can_store_dword(engine)) - continue; - - if (!wait_for_idle(engine)) { - pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", - engine->name, test_name); - err = -EIO; - break; - } - - memset(threads, 0, sizeof(threads)); - for_each_engine(other, i915, tmp) { - struct task_struct *tsk; - - threads[tmp].resets = - i915_reset_engine_count(&i915->gpu_error, - other); - - if (!(flags & TEST_OTHERS)) - continue; - - if (other == engine && !(flags & TEST_SELF)) - continue; - - threads[tmp].engine = other; - threads[tmp].flags = flags; - - tsk = kthread_run(active_engine, &threads[tmp], - "igt/%s", other->name); - if (IS_ERR(tsk)) { - err = PTR_ERR(tsk); - goto unwind; - } - - threads[tmp].task = tsk; - get_task_struct(tsk); - } - - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - do { - struct i915_request *rq = NULL; - - if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); - rq = hang_create_request(&h, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - mutex_unlock(&i915->drm.struct_mutex); - break; - } - - i915_request_get(rq); - i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); - - if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); - - pr_err("%s: Failed to start request %llx, at %x\n", - __func__, rq->fence.seqno, hws_seqno(&h, rq)); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - i915_request_put(rq); - err = -EIO; - break; - } - } - - err = i915_reset_engine(engine, NULL); - if (err) { - pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", - engine->name, test_name, err); - break; - } - - count++; - - if (rq) { - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("i915_reset_engine(%s:%s):" - " failed to complete request after reset\n", - engine->name, test_name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - i915_request_put(rq); - - GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - err = -EIO; - break; - } - - i915_request_put(rq); - } - - if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("i915_reset_engine(%s:%s):" - " failed to idle after reset\n", - engine->name, test_name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } - } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - pr_info("i915_reset_engine(%s:%s): %lu resets\n", - engine->name, test_name, count); - - reported = i915_reset_engine_count(&i915->gpu_error, engine); - reported -= threads[engine->id].resets; - if (reported != count) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", - engine->name, test_name, count, reported); - if (!err) - err = -EINVAL; - } - -unwind: - for_each_engine(other, i915, tmp) { - int ret; - - if (!threads[tmp].task) - continue; - - ret = kthread_stop(threads[tmp].task); - if (ret) { - pr_err("kthread for other engine %s failed, err=%d\n", - other->name, ret); - if (!err) - err = ret; - } - put_task_struct(threads[tmp].task); - - if (other != engine && - threads[tmp].resets != - i915_reset_engine_count(&i915->gpu_error, other)) { - pr_err("Innocent engine %s was reset (count=%ld)\n", - other->name, - i915_reset_engine_count(&i915->gpu_error, - other) - - threads[tmp].resets); - if (!err) - err = -EINVAL; - } - } - - if (global != i915_reset_count(&i915->gpu_error)) { - pr_err("Global reset (count=%ld)!\n", - i915_reset_count(&i915->gpu_error) - global); - if (!err) - err = -EINVAL; - } - - if (err) - break; - - err = igt_flush_test(i915, 0); - if (err) - break; - } - - if (i915_reset_failed(i915)) - err = -EIO; - - if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); - hang_fini(&h); - mutex_unlock(&i915->drm.struct_mutex); - } - - return err; -} - -static int igt_reset_engines(void *arg) -{ - static const struct { - const char *name; - unsigned int flags; - } phases[] = { - { "idle", 0 }, - { "active", TEST_ACTIVE }, - { "others-idle", TEST_OTHERS }, - { "others-active", TEST_OTHERS | TEST_ACTIVE }, - { - "others-priority", - TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY - }, - { - "self-priority", - TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, - }, - { } - }; - struct drm_i915_private *i915 = arg; - typeof(*phases) *p; - int err; - - for (p = phases; p->name; p++) { - if (p->flags & TEST_PRIORITY) { - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - continue; - } - - err = __igt_reset_engines(arg, p->name, p->flags); - if (err) - return err; - } - - return 0; -} - -static u32 fake_hangcheck(struct drm_i915_private *i915, - intel_engine_mask_t mask) -{ - u32 count = i915_reset_count(&i915->gpu_error); - - i915_reset(i915, mask, NULL); - - return count; -} - -static int igt_reset_wait(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_request *rq; - unsigned int reset_count; - struct hang h; - long timeout; - int err; - - if (!intel_engine_can_store_dword(i915->engine[RCS0])) - return 0; - - /* Check that we detect a stuck waiter and issue a reset */ - - igt_global_reset_lock(i915); - - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - if (err) - goto unlock; - - rq = hang_create_request(&h, i915->engine[RCS0]); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto fini; - } - - i915_request_get(rq); - i915_request_add(rq); - - if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); - - pr_err("%s: Failed to start request %llx, at %x\n", - __func__, rq->fence.seqno, hws_seqno(&h, rq)); - intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - - i915_gem_set_wedged(i915); - - err = -EIO; - goto out_rq; - } - - reset_count = fake_hangcheck(i915, ALL_ENGINES); - - timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); - if (timeout < 0) { - pr_err("i915_request_wait failed on a stuck request: err=%ld\n", - timeout); - err = timeout; - goto out_rq; - } - - if (i915_reset_count(&i915->gpu_error) == reset_count) { - pr_err("No GPU reset recorded!\n"); - err = -EINVAL; - goto out_rq; - } - -out_rq: - i915_request_put(rq); -fini: - hang_fini(&h); -unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); - - if (i915_reset_failed(i915)) - return -EIO; - - return err; -} - -struct evict_vma { - struct completion completion; - struct i915_vma *vma; -}; - -static int evict_vma(void *data) -{ - struct evict_vma *arg = data; - struct i915_address_space *vm = arg->vma->vm; - struct drm_i915_private *i915 = vm->i915; - struct drm_mm_node evict = arg->vma->node; - int err; - - complete(&arg->completion); - - mutex_lock(&i915->drm.struct_mutex); - err = i915_gem_evict_for_node(vm, &evict, 0); - mutex_unlock(&i915->drm.struct_mutex); - - return err; -} - -static int evict_fence(void *data) -{ - struct evict_vma *arg = data; - struct drm_i915_private *i915 = arg->vma->vm->i915; - int err; - - complete(&arg->completion); - - mutex_lock(&i915->drm.struct_mutex); - - /* Mark the fence register as dirty to force the mmio update. */ - err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); - if (err) { - pr_err("Invalid Y-tiling settings; err:%d\n", err); - goto out_unlock; - } - - err = i915_vma_pin_fence(arg->vma); - if (err) { - pr_err("Unable to pin Y-tiled fence; err:%d\n", err); - goto out_unlock; - } - - i915_vma_unpin_fence(arg->vma); - -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - return err; -} - -static int __igt_reset_evict_vma(struct drm_i915_private *i915, - struct i915_address_space *vm, - int (*fn)(void *), - unsigned int flags) -{ - struct drm_i915_gem_object *obj; - struct task_struct *tsk = NULL; - struct i915_request *rq; - struct evict_vma arg; - struct hang h; - int err; - - if (!intel_engine_can_store_dword(i915->engine[RCS0])) - return 0; - - /* Check that we can recover an unbind stuck on a hanging request */ - - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - if (err) - goto unlock; - - obj = i915_gem_object_create_internal(i915, SZ_1M); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto fini; - } - - if (flags & EXEC_OBJECT_NEEDS_FENCE) { - err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512); - if (err) { - pr_err("Invalid X-tiling settings; err:%d\n", err); - goto out_obj; - } - } - - arg.vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(arg.vma)) { - err = PTR_ERR(arg.vma); - goto out_obj; - } - - rq = hang_create_request(&h, i915->engine[RCS0]); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_obj; - } - - err = i915_vma_pin(arg.vma, 0, 0, - i915_vma_is_ggtt(arg.vma) ? - PIN_GLOBAL | PIN_MAPPABLE : - PIN_USER); - if (err) { - i915_request_add(rq); - goto out_obj; - } - - if (flags & EXEC_OBJECT_NEEDS_FENCE) { - err = i915_vma_pin_fence(arg.vma); - if (err) { - pr_err("Unable to pin X-tiled fence; err:%d\n", err); - i915_vma_unpin(arg.vma); - i915_request_add(rq); - goto out_obj; - } - } - - err = i915_vma_move_to_active(arg.vma, rq, flags); - - if (flags & EXEC_OBJECT_NEEDS_FENCE) - i915_vma_unpin_fence(arg.vma); - i915_vma_unpin(arg.vma); - - i915_request_get(rq); - i915_request_add(rq); - if (err) - goto out_rq; - - mutex_unlock(&i915->drm.struct_mutex); - - if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); - - pr_err("%s: Failed to start request %llx, at %x\n", - __func__, rq->fence.seqno, hws_seqno(&h, rq)); - intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - - i915_gem_set_wedged(i915); - goto out_reset; - } - - init_completion(&arg.completion); - - tsk = kthread_run(fn, &arg, "igt/evict_vma"); - if (IS_ERR(tsk)) { - err = PTR_ERR(tsk); - tsk = NULL; - goto out_reset; - } - get_task_struct(tsk); - - wait_for_completion(&arg.completion); - - if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); - - pr_err("igt/evict_vma kthread did not wait\n"); - intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - - i915_gem_set_wedged(i915); - goto out_reset; - } - -out_reset: - igt_global_reset_lock(i915); - fake_hangcheck(rq->i915, rq->engine->mask); - igt_global_reset_unlock(i915); - - if (tsk) { - struct igt_wedge_me w; - - /* The reset, even indirectly, should take less than 10ms. */ - igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) - err = kthread_stop(tsk); - - put_task_struct(tsk); - } - - mutex_lock(&i915->drm.struct_mutex); -out_rq: - i915_request_put(rq); -out_obj: - i915_gem_object_put(obj); -fini: - hang_fini(&h); -unlock: - mutex_unlock(&i915->drm.struct_mutex); - - if (i915_reset_failed(i915)) - return -EIO; - - return err; -} - -static int igt_reset_evict_ggtt(void *arg) -{ - struct drm_i915_private *i915 = arg; - - return __igt_reset_evict_vma(i915, &i915->ggtt.vm, - evict_vma, EXEC_OBJECT_WRITE); -} - -static int igt_reset_evict_ppgtt(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx; - struct drm_file *file; - int err; - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out; - } - - err = 0; - if (ctx->ppgtt) /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm, - evict_vma, EXEC_OBJECT_WRITE); - -out: - mock_file_free(i915, file); - return err; -} - -static int igt_reset_evict_fence(void *arg) -{ - struct drm_i915_private *i915 = arg; - - return __igt_reset_evict_vma(i915, &i915->ggtt.vm, - evict_fence, EXEC_OBJECT_NEEDS_FENCE); -} - -static int wait_for_others(struct drm_i915_private *i915, - struct intel_engine_cs *exclude) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - if (engine == exclude) - continue; - - if (!wait_for_idle(engine)) - return -EIO; - } - - return 0; -} - -static int igt_reset_queue(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct hang h; - int err; - - /* Check that we replay pending requests following a hang */ - - igt_global_reset_lock(i915); - - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - if (err) - goto unlock; - - for_each_engine(engine, i915, id) { - struct i915_request *prev; - IGT_TIMEOUT(end_time); - unsigned int count; - - if (!intel_engine_can_store_dword(engine)) - continue; - - prev = hang_create_request(&h, engine); - if (IS_ERR(prev)) { - err = PTR_ERR(prev); - goto fini; - } - - i915_request_get(prev); - i915_request_add(prev); - - count = 0; - do { - struct i915_request *rq; - unsigned int reset_count; - - rq = hang_create_request(&h, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto fini; - } - - i915_request_get(rq); - i915_request_add(rq); - - /* - * XXX We don't handle resetting the kernel context - * very well. If we trigger a device reset twice in - * quick succession while the kernel context is - * executing, we may end up skipping the breadcrumb. - * This is really only a problem for the selftest as - * normally there is a large interlude between resets - * (hangcheck), or we focus on resetting just one - * engine and so avoid repeatedly resetting innocents. - */ - err = wait_for_others(i915, engine); - if (err) { - pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", - __func__, engine->name); - i915_request_put(rq); - i915_request_put(prev); - - GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - goto fini; - } - - if (!wait_until_running(&h, prev)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); - - pr_err("%s(%s): Failed to start request %llx, at %x\n", - __func__, engine->name, - prev->fence.seqno, hws_seqno(&h, prev)); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - i915_request_put(rq); - i915_request_put(prev); - - i915_gem_set_wedged(i915); - - err = -EIO; - goto fini; - } - - reset_count = fake_hangcheck(i915, BIT(id)); - - if (prev->fence.error != -EIO) { - pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", - prev->fence.error); - i915_request_put(rq); - i915_request_put(prev); - err = -EINVAL; - goto fini; - } - - if (rq->fence.error) { - pr_err("Fence error status not zero [%d] after unrelated reset\n", - rq->fence.error); - i915_request_put(rq); - i915_request_put(prev); - err = -EINVAL; - goto fini; - } - - if (i915_reset_count(&i915->gpu_error) == reset_count) { - pr_err("No GPU reset recorded!\n"); - i915_request_put(rq); - i915_request_put(prev); - err = -EINVAL; - goto fini; - } - - i915_request_put(prev); - prev = rq; - count++; - } while (time_before(jiffies, end_time)); - pr_info("%s: Completed %d resets\n", engine->name, count); - - *h.batch = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_request_put(prev); - - err = igt_flush_test(i915, I915_WAIT_LOCKED); - if (err) - break; - } - -fini: - hang_fini(&h); -unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); - - if (i915_reset_failed(i915)) - return -EIO; - - return err; -} - -static int igt_handle_error(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS0]; - struct hang h; - struct i915_request *rq; - struct i915_gpu_state *error; - int err; - - /* Check that we can issue a global GPU and engine reset */ - - if (!intel_has_reset_engine(i915)) - return 0; - - if (!engine || !intel_engine_can_store_dword(engine)) - return 0; - - mutex_lock(&i915->drm.struct_mutex); - - err = hang_init(&h, i915); - if (err) - goto err_unlock; - - rq = hang_create_request(&h, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_fini; - } - - i915_request_get(rq); - i915_request_add(rq); - - if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); - - pr_err("%s: Failed to start request %llx, at %x\n", - __func__, rq->fence.seqno, hws_seqno(&h, rq)); - intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - - i915_gem_set_wedged(i915); - - err = -EIO; - goto err_request; - } - - mutex_unlock(&i915->drm.struct_mutex); - - /* Temporarily disable error capture */ - error = xchg(&i915->gpu_error.first_error, (void *)-1); - - i915_handle_error(i915, engine->mask, 0, NULL); - - xchg(&i915->gpu_error.first_error, error); - - mutex_lock(&i915->drm.struct_mutex); - - if (rq->fence.error != -EIO) { - pr_err("Guilty request not identified!\n"); - err = -EINVAL; - goto err_request; - } - -err_request: - i915_request_put(rq); -err_fini: - hang_fini(&h); -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static void __preempt_begin(void) -{ - preempt_disable(); -} - -static void __preempt_end(void) -{ - preempt_enable(); -} - -static void __softirq_begin(void) -{ - local_bh_disable(); -} - -static void __softirq_end(void) -{ - local_bh_enable(); -} - -static void __hardirq_begin(void) -{ - local_irq_disable(); -} - -static void __hardirq_end(void) -{ - local_irq_enable(); -} - -struct atomic_section { - const char *name; - void (*critical_section_begin)(void); - void (*critical_section_end)(void); -}; - -static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, - const struct atomic_section *p, - const char *mode) -{ - struct tasklet_struct * const t = &engine->execlists.tasklet; - int err; - - GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", - engine->name, mode, p->name); - - tasklet_disable_nosync(t); - p->critical_section_begin(); - - err = i915_reset_engine(engine, NULL); - - p->critical_section_end(); - tasklet_enable(t); - - if (err) - pr_err("i915_reset_engine(%s:%s) failed under %s\n", - engine->name, mode, p->name); - - return err; -} - -static int igt_atomic_reset_engine(struct intel_engine_cs *engine, - const struct atomic_section *p) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_request *rq; - struct hang h; - int err; - - err = __igt_atomic_reset_engine(engine, p, "idle"); - if (err) - return err; - - err = hang_init(&h, i915); - if (err) - return err; - - rq = hang_create_request(&h, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - i915_request_get(rq); - i915_request_add(rq); - - if (wait_until_running(&h, rq)) { - err = __igt_atomic_reset_engine(engine, p, "active"); - } else { - pr_err("%s(%s): Failed to start request %llx, at %x\n", - __func__, engine->name, - rq->fence.seqno, hws_seqno(&h, rq)); - i915_gem_set_wedged(i915); - err = -EIO; - } - - if (err == 0) { - struct igt_wedge_me w; - - igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/) - i915_request_wait(rq, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (i915_reset_failed(i915)) - err = -EIO; - } - - i915_request_put(rq); -out: - hang_fini(&h); - return err; -} - -static void force_reset(struct drm_i915_private *i915) -{ - i915_gem_set_wedged(i915); - i915_reset(i915, 0, NULL); -} - -static int igt_atomic_reset(void *arg) -{ - static const struct atomic_section phases[] = { - { "preempt", __preempt_begin, __preempt_end }, - { "softirq", __softirq_begin, __softirq_end }, - { "hardirq", __hardirq_begin, __hardirq_end }, - { } - }; - struct drm_i915_private *i915 = arg; - intel_wakeref_t wakeref; - int err = 0; - - /* Check that the resets are usable from atomic context */ - - igt_global_reset_lock(i915); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - /* Flush any requests before we get started and check basics */ - force_reset(i915); - if (i915_reset_failed(i915)) - goto unlock; - - if (intel_has_gpu_reset(i915)) { - const typeof(*phases) *p; - - for (p = phases; p->name; p++) { - GEM_TRACE("intel_gpu_reset under %s\n", p->name); - - p->critical_section_begin(); - err = intel_gpu_reset(i915, ALL_ENGINES); - p->critical_section_end(); - - if (err) { - pr_err("intel_gpu_reset failed under %s\n", - p->name); - goto out; - } - } - - force_reset(i915); - } - - if (USES_GUC_SUBMISSION(i915)) - goto unlock; - - if (intel_has_reset_engine(i915)) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - const typeof(*phases) *p; - - for (p = phases; p->name; p++) { - err = igt_atomic_reset_engine(engine, p); - if (err) - goto out; - } - } - } - -out: - /* As we poke around the guts, do a full reset before continuing. */ - force_reset(i915); - -unlock: - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); - - return err; -} - -int intel_hangcheck_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_global_reset), /* attempt to recover GPU first */ - SUBTEST(igt_wedged_reset), - SUBTEST(igt_hang_sanitycheck), - SUBTEST(igt_reset_nop), - SUBTEST(igt_reset_nop_engine), - SUBTEST(igt_reset_idle_engine), - SUBTEST(igt_reset_active_engine), - SUBTEST(igt_reset_engines), - SUBTEST(igt_reset_queue), - SUBTEST(igt_reset_wait), - SUBTEST(igt_reset_evict_ggtt), - SUBTEST(igt_reset_evict_ppgtt), - SUBTEST(igt_reset_evict_fence), - SUBTEST(igt_handle_error), - SUBTEST(igt_atomic_reset), - }; - intel_wakeref_t wakeref; - bool saved_hangcheck; - int err; - - if (!intel_has_gpu_reset(i915)) - return 0; - - if (i915_terminally_wedged(i915)) - return -EIO; /* we're long past hope of a successful reset */ - - wakeref = intel_runtime_pm_get(i915); - saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); - drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */ - - err = i915_subtests(tests, i915); - - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - - i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(i915, wakeref); - - return err; -} diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c deleted file mode 100644 index fbee030db940..000000000000 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ /dev/null @@ -1,1326 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - */ - -#include - -#include "../i915_reset.h" - -#include "../i915_selftest.h" -#include "igt_flush_test.h" -#include "igt_live_test.h" -#include "igt_spinner.h" -#include "i915_random.h" - -#include "mock_context.h" - -static int live_sanitycheck(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - enum intel_engine_id id; - struct igt_spinner spin; - intel_wakeref_t wakeref; - int err = -ENOMEM; - - if (!HAS_LOGICAL_RING_CONTEXTS(i915)) - return 0; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - if (igt_spinner_init(&spin, i915)) - goto err_unlock; - - ctx = kernel_context(i915); - if (!ctx) - goto err_spin; - - for_each_engine(engine, i915, id) { - struct i915_request *rq; - - rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ctx; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin, rq)) { - GEM_TRACE("spinner failed to start\n"); - GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_ctx; - } - - igt_spinner_end(&spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { - err = -EIO; - goto err_ctx; - } - } - - err = 0; -err_ctx: - kernel_context_close(ctx); -err_spin: - igt_spinner_fini(&spin); -err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int live_busywait_preempt(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct intel_engine_cs *engine; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = -ENOMEM; - u32 *map; - - /* - * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can - * preempt the busywaits used to synchronise between rings. - */ - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - ctx_hi = kernel_context(i915); - if (!ctx_hi) - goto err_unlock; - ctx_hi->sched.priority = INT_MAX; - - ctx_lo = kernel_context(i915); - if (!ctx_lo) - goto err_ctx_hi; - ctx_lo->sched.priority = INT_MIN; - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_ctx_lo; - } - - map = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto err_obj; - } - - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_map; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); - if (err) - goto err_map; - - for_each_engine(engine, i915, id) { - struct i915_request *lo, *hi; - struct igt_live_test t; - u32 *cs; - - if (!intel_engine_can_store_dword(engine)) - continue; - - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { - err = -EIO; - goto err_vma; - } - - /* - * We create two requests. The low priority request - * busywaits on a semaphore (inside the ringbuffer where - * is should be preemptible) and the high priority requests - * uses a MI_STORE_DWORD_IMM to update the semaphore value - * allowing the first request to complete. If preemption - * fails, we hang instead. - */ - - lo = i915_request_alloc(engine, ctx_lo); - if (IS_ERR(lo)) { - err = PTR_ERR(lo); - goto err_vma; - } - - cs = intel_ring_begin(lo, 8); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - i915_request_add(lo); - goto err_vma; - } - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_ggtt_offset(vma); - *cs++ = 0; - *cs++ = 1; - - /* XXX Do we need a flush + invalidate here? */ - - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_EQ_SDD; - *cs++ = 0; - *cs++ = i915_ggtt_offset(vma); - *cs++ = 0; - - intel_ring_advance(lo, cs); - i915_request_add(lo); - - if (wait_for(READ_ONCE(*map), 10)) { - err = -ETIMEDOUT; - goto err_vma; - } - - /* Low priority request should be busywaiting now */ - if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) { - pr_err("%s: Busywaiting request did not!\n", - engine->name); - err = -EIO; - goto err_vma; - } - - hi = i915_request_alloc(engine, ctx_hi); - if (IS_ERR(hi)) { - err = PTR_ERR(hi); - goto err_vma; - } - - cs = intel_ring_begin(hi, 4); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - i915_request_add(hi); - goto err_vma; - } - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_ggtt_offset(vma); - *cs++ = 0; - *cs++ = 0; - - intel_ring_advance(hi, cs); - i915_request_add(hi); - - if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) { - struct drm_printer p = drm_info_printer(i915->drm.dev); - - pr_err("%s: Failed to preempt semaphore busywait!\n", - engine->name); - - intel_engine_dump(engine, &p, "%s\n", engine->name); - GEM_TRACE_DUMP(); - - i915_gem_set_wedged(i915); - err = -EIO; - goto err_vma; - } - GEM_BUG_ON(READ_ONCE(*map)); - - if (igt_live_test_end(&t)) { - err = -EIO; - goto err_vma; - } - } - - err = 0; -err_vma: - i915_vma_unpin(vma); -err_map: - i915_gem_object_unpin_map(obj); -err_obj: - i915_gem_object_put(obj); -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); -err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int live_preempt(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct igt_spinner spin_hi, spin_lo; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = -ENOMEM; - - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) - return 0; - - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) - pr_err("Logical preemption supported, but not exposed\n"); - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - if (igt_spinner_init(&spin_hi, i915)) - goto err_unlock; - - if (igt_spinner_init(&spin_lo, i915)) - goto err_spin_hi; - - ctx_hi = kernel_context(i915); - if (!ctx_hi) - goto err_spin_lo; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - - ctx_lo = kernel_context(i915); - if (!ctx_lo) - goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - - for_each_engine(engine, i915, id) { - struct igt_live_test t; - struct i915_request *rq; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { - err = -EIO; - goto err_ctx_lo; - } - - rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_lo, rq)) { - GEM_TRACE("lo spinner failed to start\n"); - GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_ctx_lo; - } - - rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - igt_spinner_end(&spin_lo); - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_hi, rq)) { - GEM_TRACE("hi spinner failed to start\n"); - GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_ctx_lo; - } - - igt_spinner_end(&spin_hi); - igt_spinner_end(&spin_lo); - - if (igt_live_test_end(&t)) { - err = -EIO; - goto err_ctx_lo; - } - } - - err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); -err_spin_lo: - igt_spinner_fini(&spin_lo); -err_spin_hi: - igt_spinner_fini(&spin_hi); -err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int live_late_preempt(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct igt_spinner spin_hi, spin_lo; - struct intel_engine_cs *engine; - struct i915_sched_attr attr = {}; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = -ENOMEM; - - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) - return 0; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - if (igt_spinner_init(&spin_hi, i915)) - goto err_unlock; - - if (igt_spinner_init(&spin_lo, i915)) - goto err_spin_hi; - - ctx_hi = kernel_context(i915); - if (!ctx_hi) - goto err_spin_lo; - - ctx_lo = kernel_context(i915); - if (!ctx_lo) - goto err_ctx_hi; - - for_each_engine(engine, i915, id) { - struct igt_live_test t; - struct i915_request *rq; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { - err = -EIO; - goto err_ctx_lo; - } - - rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_lo, rq)) { - pr_err("First context failed to start\n"); - goto err_wedged; - } - - rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, - MI_NOOP); - if (IS_ERR(rq)) { - igt_spinner_end(&spin_lo); - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (igt_wait_for_spinner(&spin_hi, rq)) { - pr_err("Second context overtook first?\n"); - goto err_wedged; - } - - attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); - engine->schedule(rq, &attr); - - if (!igt_wait_for_spinner(&spin_hi, rq)) { - pr_err("High priority context failed to preempt the low priority context\n"); - GEM_TRACE_DUMP(); - goto err_wedged; - } - - igt_spinner_end(&spin_hi); - igt_spinner_end(&spin_lo); - - if (igt_live_test_end(&t)) { - err = -EIO; - goto err_ctx_lo; - } - } - - err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); -err_spin_lo: - igt_spinner_fini(&spin_lo); -err_spin_hi: - igt_spinner_fini(&spin_hi); -err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; - -err_wedged: - igt_spinner_end(&spin_hi); - igt_spinner_end(&spin_lo); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_ctx_lo; -} - -struct preempt_client { - struct igt_spinner spin; - struct i915_gem_context *ctx; -}; - -static int preempt_client_init(struct drm_i915_private *i915, - struct preempt_client *c) -{ - c->ctx = kernel_context(i915); - if (!c->ctx) - return -ENOMEM; - - if (igt_spinner_init(&c->spin, i915)) - goto err_ctx; - - return 0; - -err_ctx: - kernel_context_close(c->ctx); - return -ENOMEM; -} - -static void preempt_client_fini(struct preempt_client *c) -{ - igt_spinner_fini(&c->spin); - kernel_context_close(c->ctx); -} - -static int live_suppress_self_preempt(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) - }; - struct preempt_client a, b; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = -ENOMEM; - - /* - * Verify that if a preemption request does not cause a change in - * the current execution order, the preempt-to-idle injection is - * skipped and that we do not accidentally apply it after the CS - * completion event. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) - return 0; - - if (USES_GUC_SUBMISSION(i915)) - return 0; /* presume black blox */ - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - if (preempt_client_init(i915, &a)) - goto err_unlock; - if (preempt_client_init(i915, &b)) - goto err_client_a; - - for_each_engine(engine, i915, id) { - struct i915_request *rq_a, *rq_b; - int depth; - - if (!intel_engine_has_preemption(engine)) - continue; - - engine->execlists.preempt_hang.count = 0; - - rq_a = igt_spinner_create_request(&a.spin, - a.ctx, engine, - MI_NOOP); - if (IS_ERR(rq_a)) { - err = PTR_ERR(rq_a); - goto err_client_b; - } - - i915_request_add(rq_a); - if (!igt_wait_for_spinner(&a.spin, rq_a)) { - pr_err("First client failed to start\n"); - goto err_wedged; - } - - for (depth = 0; depth < 8; depth++) { - rq_b = igt_spinner_create_request(&b.spin, - b.ctx, engine, - MI_NOOP); - if (IS_ERR(rq_b)) { - err = PTR_ERR(rq_b); - goto err_client_b; - } - i915_request_add(rq_b); - - GEM_BUG_ON(i915_request_completed(rq_a)); - engine->schedule(rq_a, &attr); - igt_spinner_end(&a.spin); - - if (!igt_wait_for_spinner(&b.spin, rq_b)) { - pr_err("Second client failed to start\n"); - goto err_wedged; - } - - swap(a, b); - rq_a = rq_b; - } - igt_spinner_end(&a.spin); - - if (engine->execlists.preempt_hang.count) { - pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n", - engine->execlists.preempt_hang.count, - depth); - err = -EINVAL; - goto err_client_b; - } - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - goto err_wedged; - } - - err = 0; -err_client_b: - preempt_client_fini(&b); -err_client_a: - preempt_client_fini(&a); -err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; - -err_wedged: - igt_spinner_end(&b.spin); - igt_spinner_end(&a.spin); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_client_b; -} - -static int __i915_sw_fence_call -dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) -{ - return NOTIFY_DONE; -} - -static struct i915_request *dummy_request(struct intel_engine_cs *engine) -{ - struct i915_request *rq; - - rq = kzalloc(sizeof(*rq), GFP_KERNEL); - if (!rq) - return NULL; - - INIT_LIST_HEAD(&rq->active_list); - rq->engine = engine; - - i915_sched_node_init(&rq->sched); - - /* mark this request as permanently incomplete */ - rq->fence.seqno = 1; - BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ - rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; - GEM_BUG_ON(i915_request_completed(rq)); - - i915_sw_fence_init(&rq->submit, dummy_notify); - i915_sw_fence_commit(&rq->submit); - - return rq; -} - -static void dummy_request_free(struct i915_request *dummy) -{ - i915_request_mark_complete(dummy); - i915_sched_node_fini(&dummy->sched); - i915_sw_fence_fini(&dummy->submit); - - dma_fence_free(&dummy->fence); -} - -static int live_suppress_wait_preempt(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct preempt_client client[4]; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = -ENOMEM; - int i; - - /* - * Waiters are given a little priority nudge, but not enough - * to actually cause any preemption. Double check that we do - * not needlessly generate preempt-to-idle cycles. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) - return 0; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - if (preempt_client_init(i915, &client[0])) /* ELSP[0] */ - goto err_unlock; - if (preempt_client_init(i915, &client[1])) /* ELSP[1] */ - goto err_client_0; - if (preempt_client_init(i915, &client[2])) /* head of queue */ - goto err_client_1; - if (preempt_client_init(i915, &client[3])) /* bystander */ - goto err_client_2; - - for_each_engine(engine, i915, id) { - int depth; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (!engine->emit_init_breadcrumb) - continue; - - for (depth = 0; depth < ARRAY_SIZE(client); depth++) { - struct i915_request *rq[ARRAY_SIZE(client)]; - struct i915_request *dummy; - - engine->execlists.preempt_hang.count = 0; - - dummy = dummy_request(engine); - if (!dummy) - goto err_client_3; - - for (i = 0; i < ARRAY_SIZE(client); i++) { - rq[i] = igt_spinner_create_request(&client[i].spin, - client[i].ctx, engine, - MI_NOOP); - if (IS_ERR(rq[i])) { - err = PTR_ERR(rq[i]); - goto err_wedged; - } - - /* Disable NEWCLIENT promotion */ - __i915_active_request_set(&rq[i]->timeline->last_request, - dummy); - i915_request_add(rq[i]); - } - - dummy_request_free(dummy); - - GEM_BUG_ON(i915_request_completed(rq[0])); - if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { - pr_err("%s: First client failed to start\n", - engine->name); - goto err_wedged; - } - GEM_BUG_ON(!i915_request_started(rq[0])); - - if (i915_request_wait(rq[depth], - I915_WAIT_LOCKED | - I915_WAIT_PRIORITY, - 1) != -ETIME) { - pr_err("%s: Waiter depth:%d completed!\n", - engine->name, depth); - goto err_wedged; - } - - for (i = 0; i < ARRAY_SIZE(client); i++) - igt_spinner_end(&client[i].spin); - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - goto err_wedged; - - if (engine->execlists.preempt_hang.count) { - pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", - engine->name, - engine->execlists.preempt_hang.count, - depth); - err = -EINVAL; - goto err_client_3; - } - } - } - - err = 0; -err_client_3: - preempt_client_fini(&client[3]); -err_client_2: - preempt_client_fini(&client[2]); -err_client_1: - preempt_client_fini(&client[1]); -err_client_0: - preempt_client_fini(&client[0]); -err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; - -err_wedged: - for (i = 0; i < ARRAY_SIZE(client); i++) - igt_spinner_end(&client[i].spin); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_client_3; -} - -static int live_chain_preempt(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct preempt_client hi, lo; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = -ENOMEM; - - /* - * Build a chain AB...BA between two contexts (A, B) and request - * preemption of the last request. It should then complete before - * the previously submitted spinner in B. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) - return 0; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - if (preempt_client_init(i915, &hi)) - goto err_unlock; - - if (preempt_client_init(i915, &lo)) - goto err_client_hi; - - for_each_engine(engine, i915, id) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), - }; - struct igt_live_test t; - struct i915_request *rq; - int ring_size, count, i; - - if (!intel_engine_has_preemption(engine)) - continue; - - rq = igt_spinner_create_request(&lo.spin, - lo.ctx, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) - goto err_wedged; - i915_request_add(rq); - - ring_size = rq->wa_tail - rq->head; - if (ring_size < 0) - ring_size += rq->ring->size; - ring_size = rq->ring->size / ring_size; - pr_debug("%s(%s): Using maximum of %d requests\n", - __func__, engine->name, ring_size); - - igt_spinner_end(&lo.spin); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 2) < 0) { - pr_err("Timed out waiting to flush %s\n", engine->name); - goto err_wedged; - } - - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { - err = -EIO; - goto err_wedged; - } - - for_each_prime_number_from(count, 1, ring_size) { - rq = igt_spinner_create_request(&hi.spin, - hi.ctx, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) - goto err_wedged; - i915_request_add(rq); - if (!igt_wait_for_spinner(&hi.spin, rq)) - goto err_wedged; - - rq = igt_spinner_create_request(&lo.spin, - lo.ctx, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) - goto err_wedged; - i915_request_add(rq); - - for (i = 0; i < count; i++) { - rq = i915_request_alloc(engine, lo.ctx); - if (IS_ERR(rq)) - goto err_wedged; - i915_request_add(rq); - } - - rq = i915_request_alloc(engine, hi.ctx); - if (IS_ERR(rq)) - goto err_wedged; - i915_request_add(rq); - engine->schedule(rq, &attr); - - igt_spinner_end(&hi.spin); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("Failed to preempt over chain of %d\n", - count); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - goto err_wedged; - } - igt_spinner_end(&lo.spin); - - rq = i915_request_alloc(engine, lo.ctx); - if (IS_ERR(rq)) - goto err_wedged; - i915_request_add(rq); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("Failed to flush low priority chain of %d requests\n", - count); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - goto err_wedged; - } - } - - if (igt_live_test_end(&t)) { - err = -EIO; - goto err_wedged; - } - } - - err = 0; -err_client_lo: - preempt_client_fini(&lo); -err_client_hi: - preempt_client_fini(&hi); -err_unlock: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; - -err_wedged: - igt_spinner_end(&hi.spin); - igt_spinner_end(&lo.spin); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_client_lo; -} - -static int live_preempt_hang(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct igt_spinner spin_hi, spin_lo; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = -ENOMEM; - - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) - return 0; - - if (!intel_has_reset_engine(i915)) - return 0; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - if (igt_spinner_init(&spin_hi, i915)) - goto err_unlock; - - if (igt_spinner_init(&spin_lo, i915)) - goto err_spin_hi; - - ctx_hi = kernel_context(i915); - if (!ctx_hi) - goto err_spin_lo; - ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - - ctx_lo = kernel_context(i915); - if (!ctx_lo) - goto err_ctx_hi; - ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; - - for_each_engine(engine, i915, id) { - struct i915_request *rq; - - if (!intel_engine_has_preemption(engine)) - continue; - - rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_lo, rq)) { - GEM_TRACE("lo spinner failed to start\n"); - GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_ctx_lo; - } - - rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - igt_spinner_end(&spin_lo); - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - init_completion(&engine->execlists.preempt_hang.completion); - engine->execlists.preempt_hang.inject_hang = true; - - i915_request_add(rq); - - if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion, - HZ / 10)) { - pr_err("Preemption did not occur within timeout!"); - GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_ctx_lo; - } - - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - i915_reset_engine(engine, NULL); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - - engine->execlists.preempt_hang.inject_hang = false; - - if (!igt_wait_for_spinner(&spin_hi, rq)) { - GEM_TRACE("hi spinner failed to start\n"); - GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); - err = -EIO; - goto err_ctx_lo; - } - - igt_spinner_end(&spin_hi); - igt_spinner_end(&spin_lo); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { - err = -EIO; - goto err_ctx_lo; - } - } - - err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); -err_spin_lo: - igt_spinner_fini(&spin_lo); -err_spin_hi: - igt_spinner_fini(&spin_hi); -err_unlock: - igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int random_range(struct rnd_state *rnd, int min, int max) -{ - return i915_prandom_u32_max_state(max - min, rnd) + min; -} - -static int random_priority(struct rnd_state *rnd) -{ - return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); -} - -struct preempt_smoke { - struct drm_i915_private *i915; - struct i915_gem_context **contexts; - struct intel_engine_cs *engine; - struct drm_i915_gem_object *batch; - unsigned int ncontext; - struct rnd_state prng; - unsigned long count; -}; - -static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) -{ - return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, - &smoke->prng)]; -} - -static int smoke_submit(struct preempt_smoke *smoke, - struct i915_gem_context *ctx, int prio, - struct drm_i915_gem_object *batch) -{ - struct i915_request *rq; - struct i915_vma *vma = NULL; - int err = 0; - - if (batch) { - vma = i915_vma_instance(batch, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; - } - - ctx->sched.priority = prio; - - rq = i915_request_alloc(smoke->engine, ctx); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto unpin; - } - - if (vma) { - err = rq->engine->emit_bb_start(rq, - vma->node.start, - PAGE_SIZE, 0); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); - } - - i915_request_add(rq); - -unpin: - if (vma) - i915_vma_unpin(vma); - - return err; -} - -static int smoke_crescendo_thread(void *arg) -{ - struct preempt_smoke *smoke = arg; - IGT_TIMEOUT(end_time); - unsigned long count; - - count = 0; - do { - struct i915_gem_context *ctx = smoke_context(smoke); - int err; - - mutex_lock(&smoke->i915->drm.struct_mutex); - err = smoke_submit(smoke, - ctx, count % I915_PRIORITY_MAX, - smoke->batch); - mutex_unlock(&smoke->i915->drm.struct_mutex); - if (err) - return err; - - count++; - } while (!__igt_timeout(end_time, NULL)); - - smoke->count = count; - return 0; -} - -static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) -#define BATCH BIT(0) -{ - struct task_struct *tsk[I915_NUM_ENGINES] = {}; - struct preempt_smoke arg[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned long count; - int err = 0; - - mutex_unlock(&smoke->i915->drm.struct_mutex); - - for_each_engine(engine, smoke->i915, id) { - arg[id] = *smoke; - arg[id].engine = engine; - if (!(flags & BATCH)) - arg[id].batch = NULL; - arg[id].count = 0; - - tsk[id] = kthread_run(smoke_crescendo_thread, &arg, - "igt/smoke:%d", id); - if (IS_ERR(tsk[id])) { - err = PTR_ERR(tsk[id]); - break; - } - get_task_struct(tsk[id]); - } - - count = 0; - for_each_engine(engine, smoke->i915, id) { - int status; - - if (IS_ERR_OR_NULL(tsk[id])) - continue; - - status = kthread_stop(tsk[id]); - if (status && !err) - err = status; - - count += arg[id].count; - - put_task_struct(tsk[id]); - } - - mutex_lock(&smoke->i915->drm.struct_mutex); - - pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", - count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); - return 0; -} - -static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) -{ - enum intel_engine_id id; - IGT_TIMEOUT(end_time); - unsigned long count; - - count = 0; - do { - for_each_engine(smoke->engine, smoke->i915, id) { - struct i915_gem_context *ctx = smoke_context(smoke); - int err; - - err = smoke_submit(smoke, - ctx, random_priority(&smoke->prng), - flags & BATCH ? smoke->batch : NULL); - if (err) - return err; - - count++; - } - } while (!__igt_timeout(end_time, NULL)); - - pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", - count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); - return 0; -} - -static int live_preempt_smoke(void *arg) -{ - struct preempt_smoke smoke = { - .i915 = arg, - .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), - .ncontext = 1024, - }; - const unsigned int phase[] = { 0, BATCH }; - intel_wakeref_t wakeref; - struct igt_live_test t; - int err = -ENOMEM; - u32 *cs; - int n; - - if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915)) - return 0; - - smoke.contexts = kmalloc_array(smoke.ncontext, - sizeof(*smoke.contexts), - GFP_KERNEL); - if (!smoke.contexts) - return -ENOMEM; - - mutex_lock(&smoke.i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(smoke.i915); - - smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); - if (IS_ERR(smoke.batch)) { - err = PTR_ERR(smoke.batch); - goto err_unlock; - } - - cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_batch; - } - for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) - cs[n] = MI_ARB_CHECK; - cs[n] = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(smoke.batch); - i915_gem_object_unpin_map(smoke.batch); - - if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) { - err = -EIO; - goto err_batch; - } - - for (n = 0; n < smoke.ncontext; n++) { - smoke.contexts[n] = kernel_context(smoke.i915); - if (!smoke.contexts[n]) - goto err_ctx; - } - - for (n = 0; n < ARRAY_SIZE(phase); n++) { - err = smoke_crescendo(&smoke, phase[n]); - if (err) - goto err_ctx; - - err = smoke_random(&smoke, phase[n]); - if (err) - goto err_ctx; - } - -err_ctx: - if (igt_live_test_end(&t)) - err = -EIO; - - for (n = 0; n < smoke.ncontext; n++) { - if (!smoke.contexts[n]) - break; - kernel_context_close(smoke.contexts[n]); - } - -err_batch: - i915_gem_object_put(smoke.batch); -err_unlock: - intel_runtime_pm_put(smoke.i915, wakeref); - mutex_unlock(&smoke.i915->drm.struct_mutex); - kfree(smoke.contexts); - - return err; -} - -int intel_execlists_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_sanitycheck), - SUBTEST(live_busywait_preempt), - SUBTEST(live_preempt), - SUBTEST(live_late_preempt), - SUBTEST(live_suppress_self_preempt), - SUBTEST(live_suppress_wait_preempt), - SUBTEST(live_chain_preempt), - SUBTEST(live_preempt_hang), - SUBTEST(live_preempt_smoke), - }; - - if (!HAS_EXECLISTS(i915)) - return 0; - - if (i915_terminally_wedged(i915)) - return 0; - - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c deleted file mode 100644 index aa841e4d3031..000000000000 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ /dev/null @@ -1,1172 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - */ - -#include "../i915_selftest.h" -#include "../i915_reset.h" - -#include "igt_flush_test.h" -#include "igt_reset.h" -#include "igt_spinner.h" -#include "igt_wedge_me.h" -#include "mock_context.h" -#include "mock_drm.h" - -static const struct wo_register { - enum intel_platform platform; - u32 reg; -} wo_registers[] = { - { INTEL_GEMINILAKE, 0x731c } -}; - -#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 4) -struct wa_lists { - struct i915_wa_list gt_wa_list; - struct { - char name[REF_NAME_MAX]; - struct i915_wa_list wa_list; - } engine[I915_NUM_ENGINES]; -}; - -static void -reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - memset(lists, 0, sizeof(*lists)); - - wa_init_start(&lists->gt_wa_list, "GT_REF"); - gt_init_workarounds(i915, &lists->gt_wa_list); - wa_init_finish(&lists->gt_wa_list); - - for_each_engine(engine, i915, id) { - struct i915_wa_list *wal = &lists->engine[id].wa_list; - char *name = lists->engine[id].name; - - snprintf(name, REF_NAME_MAX, "%s_REF", engine->name); - - wa_init_start(wal, name); - engine_init_workarounds(engine, wal); - wa_init_finish(wal); - } -} - -static void -reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) - intel_wa_list_free(&lists->engine[id].wa_list); - - intel_wa_list_free(&lists->gt_wa_list); -} - -static struct drm_i915_gem_object * -read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) -{ - const u32 base = engine->mmio_base; - struct drm_i915_gem_object *result; - intel_wakeref_t wakeref; - struct i915_request *rq; - struct i915_vma *vma; - u32 srm, *cs; - int err; - int i; - - result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(result)) - return result; - - i915_gem_object_set_cache_coherency(result, I915_CACHE_LLC); - - cs = i915_gem_object_pin_map(result, I915_MAP_WB); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_obj; - } - memset(cs, 0xc5, PAGE_SIZE); - i915_gem_object_flush_map(result); - i915_gem_object_unpin_map(result); - - vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); - if (err) - goto err_obj; - - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(engine->i915, wakeref) - rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_pin; - } - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto err_req; - - srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - if (INTEL_GEN(ctx->i915) >= 8) - srm++; - - cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_req; - } - - for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { - *cs++ = srm; - *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i)); - *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; - *cs++ = 0; - } - intel_ring_advance(rq, cs); - - i915_gem_object_get(result); - i915_gem_object_set_active_reference(result); - - i915_request_add(rq); - i915_vma_unpin(vma); - - return result; - -err_req: - i915_request_add(rq); -err_pin: - i915_vma_unpin(vma); -err_obj: - i915_gem_object_put(result); - return ERR_PTR(err); -} - -static u32 -get_whitelist_reg(const struct intel_engine_cs *engine, unsigned int i) -{ - i915_reg_t reg = i < engine->whitelist.count ? - engine->whitelist.list[i].reg : - RING_NOPID(engine->mmio_base); - - return i915_mmio_reg_offset(reg); -} - -static void -print_results(const struct intel_engine_cs *engine, const u32 *results) -{ - unsigned int i; - - for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { - u32 expected = get_whitelist_reg(engine, i); - u32 actual = results[i]; - - pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n", - i, expected, actual); - } -} - -static int check_whitelist(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct drm_i915_gem_object *results; - struct igt_wedge_me wedge; - u32 *vaddr; - int err; - int i; - - results = read_nonprivs(ctx, engine); - if (IS_ERR(results)) - return PTR_ERR(results); - - err = 0; - igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */ - err = i915_gem_object_set_to_cpu_domain(results, false); - if (i915_terminally_wedged(ctx->i915)) - err = -EIO; - if (err) - goto out_put; - - vaddr = i915_gem_object_pin_map(results, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto out_put; - } - - for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { - u32 expected = get_whitelist_reg(engine, i); - u32 actual = vaddr[i]; - - if (expected != actual) { - print_results(engine, vaddr); - pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n", - i, expected, actual); - - err = -EINVAL; - break; - } - } - - i915_gem_object_unpin_map(results); -out_put: - i915_gem_object_put(results); - return err; -} - -static int do_device_reset(struct intel_engine_cs *engine) -{ - i915_reset(engine->i915, engine->mask, "live_workarounds"); - return 0; -} - -static int do_engine_reset(struct intel_engine_cs *engine) -{ - return i915_reset_engine(engine, "live_workarounds"); -} - -static int -switch_to_scratch_context(struct intel_engine_cs *engine, - struct igt_spinner *spin) -{ - struct i915_gem_context *ctx; - struct i915_request *rq; - intel_wakeref_t wakeref; - int err = 0; - - ctx = kernel_context(engine->i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - GEM_BUG_ON(i915_gem_context_is_bannable(ctx)); - - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(engine->i915, wakeref) - rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); - - kernel_context_close(ctx); - - if (IS_ERR(rq)) { - spin = NULL; - err = PTR_ERR(rq); - goto err; - } - - i915_request_add(rq); - - if (spin && !igt_wait_for_spinner(spin, rq)) { - pr_err("Spinner failed to start\n"); - err = -ETIMEDOUT; - } - -err: - if (err && spin) - igt_spinner_end(spin); - - return err; -} - -static int check_whitelist_across_reset(struct intel_engine_cs *engine, - int (*reset)(struct intel_engine_cs *), - const char *name) -{ - struct drm_i915_private *i915 = engine->i915; - struct i915_gem_context *ctx; - struct igt_spinner spin; - intel_wakeref_t wakeref; - int err; - - pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n", - engine->whitelist.count, name); - - err = igt_spinner_init(&spin, i915); - if (err) - return err; - - ctx = kernel_context(i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - err = check_whitelist(ctx, engine); - if (err) { - pr_err("Invalid whitelist *before* %s reset!\n", name); - goto out; - } - - err = switch_to_scratch_context(engine, &spin); - if (err) - goto out; - - with_intel_runtime_pm(i915, wakeref) - err = reset(engine); - - igt_spinner_end(&spin); - igt_spinner_fini(&spin); - - if (err) { - pr_err("%s reset failed\n", name); - goto out; - } - - err = check_whitelist(ctx, engine); - if (err) { - pr_err("Whitelist not preserved in context across %s reset!\n", - name); - goto out; - } - - kernel_context_close(ctx); - - ctx = kernel_context(i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - err = check_whitelist(ctx, engine); - if (err) { - pr_err("Invalid whitelist *after* %s reset in fresh context!\n", - name); - goto out; - } - -out: - kernel_context_close(ctx); - return err; -} - -static struct i915_vma *create_batch(struct i915_gem_context *ctx) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int err; - - obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err_obj; - - err = i915_gem_object_set_to_wc_domain(obj, true); - if (err) - goto err_obj; - - return vma; - -err_obj: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static u32 reg_write(u32 old, u32 new, u32 rsvd) -{ - if (rsvd == 0x0000ffff) { - old &= ~(new >> 16); - old |= new & (new >> 16); - } else { - old &= ~rsvd; - old |= new & rsvd; - } - - return old; -} - -static bool wo_register(struct intel_engine_cs *engine, u32 reg) -{ - enum intel_platform platform = INTEL_INFO(engine->i915)->platform; - int i; - - for (i = 0; i < ARRAY_SIZE(wo_registers); i++) { - if (wo_registers[i].platform == platform && - wo_registers[i].reg == reg) - return true; - } - - return false; -} - -static int check_dirty_whitelist(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - const u32 values[] = { - 0x00000000, - 0x01010101, - 0x10100101, - 0x03030303, - 0x30300303, - 0x05050505, - 0x50500505, - 0x0f0f0f0f, - 0xf00ff00f, - 0x10101010, - 0xf0f01010, - 0x30303030, - 0xa0a03030, - 0x50505050, - 0xc0c05050, - 0xf0f0f0f0, - 0x11111111, - 0x33333333, - 0x55555555, - 0x0000ffff, - 0x00ff00ff, - 0xff0000ff, - 0xffff00ff, - 0xffffffff, - }; - struct i915_vma *scratch; - struct i915_vma *batch; - int err = 0, i, v; - u32 *cs, *results; - - scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1); - if (IS_ERR(scratch)) - return PTR_ERR(scratch); - - batch = create_batch(ctx); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_scratch; - } - - for (i = 0; i < engine->whitelist.count; i++) { - u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); - u64 addr = scratch->node.start; - struct i915_request *rq; - u32 srm, lrm, rsvd; - u32 expect; - int idx; - - if (wo_register(engine, reg)) - continue; - - srm = MI_STORE_REGISTER_MEM; - lrm = MI_LOAD_REGISTER_MEM; - if (INTEL_GEN(ctx->i915) >= 8) - lrm++, srm++; - - pr_debug("%s: Writing garbage to %x\n", - engine->name, reg); - - cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto out_batch; - } - - /* SRM original */ - *cs++ = srm; - *cs++ = reg; - *cs++ = lower_32_bits(addr); - *cs++ = upper_32_bits(addr); - - idx = 1; - for (v = 0; v < ARRAY_SIZE(values); v++) { - /* LRI garbage */ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = reg; - *cs++ = values[v]; - - /* SRM result */ - *cs++ = srm; - *cs++ = reg; - *cs++ = lower_32_bits(addr + sizeof(u32) * idx); - *cs++ = upper_32_bits(addr + sizeof(u32) * idx); - idx++; - } - for (v = 0; v < ARRAY_SIZE(values); v++) { - /* LRI garbage */ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = reg; - *cs++ = ~values[v]; - - /* SRM result */ - *cs++ = srm; - *cs++ = reg; - *cs++ = lower_32_bits(addr + sizeof(u32) * idx); - *cs++ = upper_32_bits(addr + sizeof(u32) * idx); - idx++; - } - GEM_BUG_ON(idx * sizeof(u32) > scratch->size); - - /* LRM original -- don't leave garbage in the context! */ - *cs++ = lrm; - *cs++ = reg; - *cs++ = lower_32_bits(addr); - *cs++ = upper_32_bits(addr); - - *cs++ = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(batch->obj); - i915_gem_object_unpin_map(batch->obj); - i915_gem_chipset_flush(ctx->i915); - - rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - if (engine->emit_init_breadcrumb) { /* Be nice if we hang */ - err = engine->emit_init_breadcrumb(rq); - if (err) - goto err_request; - } - - err = engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - 0); - if (err) - goto err_request; - -err_request: - i915_request_add(rq); - if (err) - goto out_batch; - - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { - pr_err("%s: Futzing %x timedout; cancelling test\n", - engine->name, reg); - i915_gem_set_wedged(ctx->i915); - err = -EIO; - goto out_batch; - } - - results = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); - if (IS_ERR(results)) { - err = PTR_ERR(results); - goto out_batch; - } - - GEM_BUG_ON(values[ARRAY_SIZE(values) - 1] != 0xffffffff); - rsvd = results[ARRAY_SIZE(values)]; /* detect write masking */ - if (!rsvd) { - pr_err("%s: Unable to write to whitelisted register %x\n", - engine->name, reg); - err = -EINVAL; - goto out_unpin; - } - - expect = results[0]; - idx = 1; - for (v = 0; v < ARRAY_SIZE(values); v++) { - expect = reg_write(expect, values[v], rsvd); - if (results[idx] != expect) - err++; - idx++; - } - for (v = 0; v < ARRAY_SIZE(values); v++) { - expect = reg_write(expect, ~values[v], rsvd); - if (results[idx] != expect) - err++; - idx++; - } - if (err) { - pr_err("%s: %d mismatch between values written to whitelisted register [%x], and values read back!\n", - engine->name, err, reg); - - pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n", - engine->name, reg, results[0], rsvd); - - expect = results[0]; - idx = 1; - for (v = 0; v < ARRAY_SIZE(values); v++) { - u32 w = values[v]; - - expect = reg_write(expect, w, rsvd); - pr_info("Wrote %08x, read %08x, expect %08x\n", - w, results[idx], expect); - idx++; - } - for (v = 0; v < ARRAY_SIZE(values); v++) { - u32 w = ~values[v]; - - expect = reg_write(expect, w, rsvd); - pr_info("Wrote %08x, read %08x, expect %08x\n", - w, results[idx], expect); - idx++; - } - - err = -EINVAL; - } -out_unpin: - i915_gem_object_unpin_map(scratch->obj); - if (err) - break; - } - - if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED)) - err = -EIO; -out_batch: - i915_vma_unpin_and_release(&batch, 0); -out_scratch: - i915_vma_unpin_and_release(&scratch, 0); - return err; -} - -static int live_dirty_whitelist(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - enum intel_engine_id id; - intel_wakeref_t wakeref; - struct drm_file *file; - int err = 0; - - /* Can the user write to the whitelisted registers? */ - - if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */ - return 0; - - wakeref = intel_runtime_pm_get(i915); - - mutex_unlock(&i915->drm.struct_mutex); - file = mock_file(i915); - mutex_lock(&i915->drm.struct_mutex); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto out_rpm; - } - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } - - for_each_engine(engine, i915, id) { - if (engine->whitelist.count == 0) - continue; - - err = check_dirty_whitelist(ctx, engine); - if (err) - goto out_file; - } - -out_file: - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); - mutex_lock(&i915->drm.struct_mutex); -out_rpm: - intel_runtime_pm_put(i915, wakeref); - return err; -} - -static int live_reset_whitelist(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS0]; - int err = 0; - - /* If we reset the gpu, we should not lose the RING_NONPRIV */ - - if (!engine || engine->whitelist.count == 0) - return 0; - - igt_global_reset_lock(i915); - - if (intel_has_reset_engine(i915)) { - err = check_whitelist_across_reset(engine, - do_engine_reset, - "engine"); - if (err) - goto out; - } - - if (intel_has_gpu_reset(i915)) { - err = check_whitelist_across_reset(engine, - do_device_reset, - "device"); - if (err) - goto out; - } - -out: - igt_global_reset_unlock(i915); - return err; -} - -static int read_whitelisted_registers(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct i915_vma *results) -{ - intel_wakeref_t wakeref; - struct i915_request *rq; - int i, err = 0; - u32 srm, *cs; - - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(engine->i915, wakeref) - rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - srm = MI_STORE_REGISTER_MEM; - if (INTEL_GEN(ctx->i915) >= 8) - srm++; - - cs = intel_ring_begin(rq, 4 * engine->whitelist.count); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_req; - } - - for (i = 0; i < engine->whitelist.count; i++) { - u64 offset = results->node.start + sizeof(u32) * i; - - *cs++ = srm; - *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - } - intel_ring_advance(rq, cs); - -err_req: - i915_request_add(rq); - - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) - err = -EIO; - - return err; -} - -static int scrub_whitelisted_registers(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - intel_wakeref_t wakeref; - struct i915_request *rq; - struct i915_vma *batch; - int i, err = 0; - u32 *cs; - - batch = create_batch(ctx); - if (IS_ERR(batch)) - return PTR_ERR(batch); - - cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_batch; - } - - *cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count); - for (i = 0; i < engine->whitelist.count; i++) { - *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); - *cs++ = 0xffffffff; - } - *cs++ = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(batch->obj); - i915_gem_chipset_flush(ctx->i915); - - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(engine->i915, wakeref) - rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - if (engine->emit_init_breadcrumb) { /* Be nice if we hang */ - err = engine->emit_init_breadcrumb(rq); - if (err) - goto err_request; - } - - /* Perform the writes from an unprivileged "user" batch */ - err = engine->emit_bb_start(rq, batch->node.start, 0, 0); - -err_request: - i915_request_add(rq); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) - err = -EIO; - -err_unpin: - i915_gem_object_unpin_map(batch->obj); -err_batch: - i915_vma_unpin_and_release(&batch, 0); - return err; -} - -struct regmask { - i915_reg_t reg; - unsigned long gen_mask; -}; - -static bool find_reg(struct drm_i915_private *i915, - i915_reg_t reg, - const struct regmask *tbl, - unsigned long count) -{ - u32 offset = i915_mmio_reg_offset(reg); - - while (count--) { - if (INTEL_INFO(i915)->gen_mask & tbl->gen_mask && - i915_mmio_reg_offset(tbl->reg) == offset) - return true; - tbl++; - } - - return false; -} - -static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg) -{ - /* Alas, we must pardon some whitelists. Mistakes already made */ - static const struct regmask pardon[] = { - { GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) }, - { GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) }, - }; - - return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon)); -} - -static bool result_eq(struct intel_engine_cs *engine, - u32 a, u32 b, i915_reg_t reg) -{ - if (a != b && !pardon_reg(engine->i915, reg)) { - pr_err("Whitelisted register 0x%4x not context saved: A=%08x, B=%08x\n", - i915_mmio_reg_offset(reg), a, b); - return false; - } - - return true; -} - -static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg) -{ - /* Some registers do not seem to behave and our writes unreadable */ - static const struct regmask wo[] = { - { GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) }, - }; - - return find_reg(i915, reg, wo, ARRAY_SIZE(wo)); -} - -static bool result_neq(struct intel_engine_cs *engine, - u32 a, u32 b, i915_reg_t reg) -{ - if (a == b && !writeonly_reg(engine->i915, reg)) { - pr_err("Whitelist register 0x%4x:%08x was unwritable\n", - i915_mmio_reg_offset(reg), a); - return false; - } - - return true; -} - -static int -check_whitelisted_registers(struct intel_engine_cs *engine, - struct i915_vma *A, - struct i915_vma *B, - bool (*fn)(struct intel_engine_cs *engine, - u32 a, u32 b, - i915_reg_t reg)) -{ - u32 *a, *b; - int i, err; - - a = i915_gem_object_pin_map(A->obj, I915_MAP_WB); - if (IS_ERR(a)) - return PTR_ERR(a); - - b = i915_gem_object_pin_map(B->obj, I915_MAP_WB); - if (IS_ERR(b)) { - err = PTR_ERR(b); - goto err_a; - } - - err = 0; - for (i = 0; i < engine->whitelist.count; i++) { - if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) - err = -EINVAL; - } - - i915_gem_object_unpin_map(B->obj); -err_a: - i915_gem_object_unpin_map(A->obj); - return err; -} - -static int live_isolated_whitelist(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct { - struct i915_gem_context *ctx; - struct i915_vma *scratch[2]; - } client[2] = {}; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int i, err = 0; - - /* - * Check that a write into a whitelist register works, but - * invisible to a second context. - */ - - if (!intel_engines_has_context_isolation(i915)) - return 0; - - if (!i915->kernel_context->ppgtt) - return 0; - - for (i = 0; i < ARRAY_SIZE(client); i++) { - struct i915_gem_context *c; - - c = kernel_context(i915); - if (IS_ERR(c)) { - err = PTR_ERR(c); - goto err; - } - - client[i].scratch[0] = create_scratch(&c->ppgtt->vm, 1024); - if (IS_ERR(client[i].scratch[0])) { - err = PTR_ERR(client[i].scratch[0]); - kernel_context_close(c); - goto err; - } - - client[i].scratch[1] = create_scratch(&c->ppgtt->vm, 1024); - if (IS_ERR(client[i].scratch[1])) { - err = PTR_ERR(client[i].scratch[1]); - i915_vma_unpin_and_release(&client[i].scratch[0], 0); - kernel_context_close(c); - goto err; - } - - client[i].ctx = c; - } - - for_each_engine(engine, i915, id) { - if (!engine->whitelist.count) - continue; - - /* Read default values */ - err = read_whitelisted_registers(client[0].ctx, engine, - client[0].scratch[0]); - if (err) - goto err; - - /* Try to overwrite registers (should only affect ctx0) */ - err = scrub_whitelisted_registers(client[0].ctx, engine); - if (err) - goto err; - - /* Read values from ctx1, we expect these to be defaults */ - err = read_whitelisted_registers(client[1].ctx, engine, - client[1].scratch[0]); - if (err) - goto err; - - /* Verify that both reads return the same default values */ - err = check_whitelisted_registers(engine, - client[0].scratch[0], - client[1].scratch[0], - result_eq); - if (err) - goto err; - - /* Read back the updated values in ctx0 */ - err = read_whitelisted_registers(client[0].ctx, engine, - client[0].scratch[1]); - if (err) - goto err; - - /* User should be granted privilege to overwhite regs */ - err = check_whitelisted_registers(engine, - client[0].scratch[0], - client[0].scratch[1], - result_neq); - if (err) - goto err; - } - -err: - for (i = 0; i < ARRAY_SIZE(client); i++) { - if (!client[i].ctx) - break; - - i915_vma_unpin_and_release(&client[i].scratch[1], 0); - i915_vma_unpin_and_release(&client[i].scratch[0], 0); - kernel_context_close(client[i].ctx); - } - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - - return err; -} - -static bool verify_gt_engine_wa(struct drm_i915_private *i915, - struct wa_lists *lists, const char *str) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - bool ok = true; - - ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); - - for_each_engine(engine, i915, id) { - ok &= engine_wa_list_verify(engine, - &lists->engine[id].wa_list, - str) == 0; - } - - return ok; -} - -static int -live_gpu_reset_gt_engine_workarounds(void *arg) -{ - struct drm_i915_private *i915 = arg; - intel_wakeref_t wakeref; - struct wa_lists lists; - bool ok; - - if (!intel_has_gpu_reset(i915)) - return 0; - - pr_info("Verifying after GPU reset...\n"); - - igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(i915); - - reference_lists_init(i915, &lists); - - ok = verify_gt_engine_wa(i915, &lists, "before reset"); - if (!ok) - goto out; - - i915_reset(i915, ALL_ENGINES, "live_workarounds"); - - ok = verify_gt_engine_wa(i915, &lists, "after reset"); - -out: - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(i915, wakeref); - igt_global_reset_unlock(i915); - - return ok ? 0 : -ESRCH; -} - -static int -live_engine_reset_gt_engine_workarounds(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct igt_spinner spin; - enum intel_engine_id id; - struct i915_request *rq; - intel_wakeref_t wakeref; - struct wa_lists lists; - int ret = 0; - - if (!intel_has_reset_engine(i915)) - return 0; - - ctx = kernel_context(i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(i915); - - reference_lists_init(i915, &lists); - - for_each_engine(engine, i915, id) { - bool ok; - - pr_info("Verifying after %s reset...\n", engine->name); - - ok = verify_gt_engine_wa(i915, &lists, "before reset"); - if (!ok) { - ret = -ESRCH; - goto err; - } - - i915_reset_engine(engine, "live_workarounds"); - - ok = verify_gt_engine_wa(i915, &lists, "after idle reset"); - if (!ok) { - ret = -ESRCH; - goto err; - } - - ret = igt_spinner_init(&spin, i915); - if (ret) - goto err; - - rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - igt_spinner_fini(&spin); - goto err; - } - - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { - pr_err("Spinner failed to start\n"); - igt_spinner_fini(&spin); - ret = -ETIMEDOUT; - goto err; - } - - i915_reset_engine(engine, "live_workarounds"); - - igt_spinner_end(&spin); - igt_spinner_fini(&spin); - - ok = verify_gt_engine_wa(i915, &lists, "after busy reset"); - if (!ok) { - ret = -ESRCH; - goto err; - } - } - -err: - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(i915, wakeref); - igt_global_reset_unlock(i915); - kernel_context_close(ctx); - - igt_flush_test(i915, I915_WAIT_LOCKED); - - return ret; -} - -int intel_workarounds_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_dirty_whitelist), - SUBTEST(live_reset_whitelist), - SUBTEST(live_isolated_whitelist), - SUBTEST(live_gpu_reset_gt_engine_workarounds), - SUBTEST(live_engine_reset_gt_engine_workarounds), - }; - int err; - - if (i915_terminally_wedged(i915)) - return 0; - - mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); - mutex_unlock(&i915->drm.struct_mutex); - - return err; -} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c deleted file mode 100644 index 61a8206ed677..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "mock_engine.h" -#include "mock_request.h" - -struct mock_ring { - struct intel_ring base; - struct i915_timeline timeline; -}; - -static void mock_timeline_pin(struct i915_timeline *tl) -{ - tl->pin_count++; -} - -static void mock_timeline_unpin(struct i915_timeline *tl) -{ - GEM_BUG_ON(!tl->pin_count); - tl->pin_count--; -} - -static struct intel_ring *mock_ring(struct intel_engine_cs *engine) -{ - const unsigned long sz = PAGE_SIZE / 2; - struct mock_ring *ring; - - ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); - if (!ring) - return NULL; - - if (i915_timeline_init(engine->i915, &ring->timeline, NULL)) { - kfree(ring); - return NULL; - } - - kref_init(&ring->base.ref); - ring->base.size = sz; - ring->base.effective_size = sz; - ring->base.vaddr = (void *)(ring + 1); - ring->base.timeline = &ring->timeline; - - INIT_LIST_HEAD(&ring->base.request_list); - intel_ring_update_space(&ring->base); - - return &ring->base; -} - -static void mock_ring_free(struct intel_ring *base) -{ - struct mock_ring *ring = container_of(base, typeof(*ring), base); - - i915_timeline_fini(&ring->timeline); - kfree(ring); -} - -static struct i915_request *first_request(struct mock_engine *engine) -{ - return list_first_entry_or_null(&engine->hw_queue, - struct i915_request, - mock.link); -} - -static void advance(struct i915_request *request) -{ - list_del_init(&request->mock.link); - i915_request_mark_complete(request); - GEM_BUG_ON(!i915_request_completed(request)); - - intel_engine_queue_breadcrumbs(request->engine); -} - -static void hw_delay_complete(struct timer_list *t) -{ - struct mock_engine *engine = from_timer(engine, t, hw_delay); - struct i915_request *request; - unsigned long flags; - - spin_lock_irqsave(&engine->hw_lock, flags); - - /* Timer fired, first request is complete */ - request = first_request(engine); - if (request) - advance(request); - - /* - * Also immediately signal any subsequent 0-delay requests, but - * requeue the timer for the next delayed request. - */ - while ((request = first_request(engine))) { - if (request->mock.delay) { - mod_timer(&engine->hw_delay, - jiffies + request->mock.delay); - break; - } - - advance(request); - } - - spin_unlock_irqrestore(&engine->hw_lock, flags); -} - -static void mock_context_unpin(struct intel_context *ce) -{ - mock_timeline_unpin(ce->ring->timeline); -} - -static void mock_context_destroy(struct kref *ref) -{ - struct intel_context *ce = container_of(ref, typeof(*ce), ref); - - GEM_BUG_ON(intel_context_is_pinned(ce)); - - if (ce->ring) - mock_ring_free(ce->ring); - - intel_context_free(ce); -} - -static int mock_context_pin(struct intel_context *ce) -{ - if (!ce->ring) { - ce->ring = mock_ring(ce->engine); - if (!ce->ring) - return -ENOMEM; - } - - mock_timeline_pin(ce->ring->timeline); - return 0; -} - -static const struct intel_context_ops mock_context_ops = { - .pin = mock_context_pin, - .unpin = mock_context_unpin, - - .destroy = mock_context_destroy, -}; - -static int mock_request_alloc(struct i915_request *request) -{ - INIT_LIST_HEAD(&request->mock.link); - request->mock.delay = 0; - - return 0; -} - -static int mock_emit_flush(struct i915_request *request, - unsigned int flags) -{ - return 0; -} - -static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs) -{ - return cs; -} - -static void mock_submit_request(struct i915_request *request) -{ - struct mock_engine *engine = - container_of(request->engine, typeof(*engine), base); - unsigned long flags; - - i915_request_submit(request); - - spin_lock_irqsave(&engine->hw_lock, flags); - list_add_tail(&request->mock.link, &engine->hw_queue); - if (list_is_first(&request->mock.link, &engine->hw_queue)) { - if (request->mock.delay) - mod_timer(&engine->hw_delay, - jiffies + request->mock.delay); - else - advance(request); - } - spin_unlock_irqrestore(&engine->hw_lock, flags); -} - -static void mock_reset_prepare(struct intel_engine_cs *engine) -{ -} - -static void mock_reset(struct intel_engine_cs *engine, bool stalled) -{ - GEM_BUG_ON(stalled); -} - -static void mock_reset_finish(struct intel_engine_cs *engine) -{ -} - -static void mock_cancel_requests(struct intel_engine_cs *engine) -{ - struct i915_request *request; - unsigned long flags; - - spin_lock_irqsave(&engine->timeline.lock, flags); - - /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline.requests, sched.link) { - if (!i915_request_signaled(request)) - dma_fence_set_error(&request->fence, -EIO); - - i915_request_mark_complete(request); - } - - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, - const char *name, - int id) -{ - struct mock_engine *engine; - - GEM_BUG_ON(id >= I915_NUM_ENGINES); - - engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); - if (!engine) - return NULL; - - /* minimal engine setup for requests */ - engine->base.i915 = i915; - snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); - engine->base.id = id; - engine->base.mask = BIT(id); - engine->base.status_page.addr = (void *)(engine + 1); - - engine->base.cops = &mock_context_ops; - engine->base.request_alloc = mock_request_alloc; - engine->base.emit_flush = mock_emit_flush; - engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; - engine->base.submit_request = mock_submit_request; - - engine->base.reset.prepare = mock_reset_prepare; - engine->base.reset.reset = mock_reset; - engine->base.reset.finish = mock_reset_finish; - engine->base.cancel_requests = mock_cancel_requests; - - if (i915_timeline_init(i915, &engine->base.timeline, NULL)) - goto err_free; - i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); - - intel_engine_init_breadcrumbs(&engine->base); - - /* fake hw queue */ - spin_lock_init(&engine->hw_lock); - timer_setup(&engine->hw_delay, hw_delay_complete, 0); - INIT_LIST_HEAD(&engine->hw_queue); - - if (pin_context(i915->kernel_context, &engine->base, - &engine->base.kernel_context)) - goto err_breadcrumbs; - - return &engine->base; - -err_breadcrumbs: - intel_engine_fini_breadcrumbs(&engine->base); - i915_timeline_fini(&engine->base.timeline); -err_free: - kfree(engine); - return NULL; -} - -void mock_engine_flush(struct intel_engine_cs *engine) -{ - struct mock_engine *mock = - container_of(engine, typeof(*mock), base); - struct i915_request *request, *rn; - - del_timer_sync(&mock->hw_delay); - - spin_lock_irq(&mock->hw_lock); - list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link) - advance(request); - spin_unlock_irq(&mock->hw_lock); -} - -void mock_engine_reset(struct intel_engine_cs *engine) -{ -} - -void mock_engine_free(struct intel_engine_cs *engine) -{ - struct mock_engine *mock = - container_of(engine, typeof(*mock), base); - struct intel_context *ce; - - GEM_BUG_ON(timer_pending(&mock->hw_delay)); - - ce = fetch_and_zero(&engine->last_retired_context); - if (ce) - intel_context_unpin(ce); - - intel_context_unpin(engine->kernel_context); - - intel_engine_fini_breadcrumbs(engine); - i915_timeline_fini(&engine->timeline); - - kfree(engine); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h deleted file mode 100644 index b9cc3a245f16..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __MOCK_ENGINE_H__ -#define __MOCK_ENGINE_H__ - -#include -#include -#include - -#include "../intel_ringbuffer.h" - -struct mock_engine { - struct intel_engine_cs base; - - spinlock_t hw_lock; - struct list_head hw_queue; - struct timer_list hw_delay; -}; - -struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, - const char *name, - int id); -void mock_engine_flush(struct intel_engine_cs *engine); -void mock_engine_reset(struct intel_engine_cs *engine); -void mock_engine_free(struct intel_engine_cs *engine); - -#endif /* !__MOCK_ENGINE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 60bbf8b4df40..f444ee5add27 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -25,7 +25,8 @@ #include #include -#include "mock_engine.h" +#include "gt/mock_engine.h" + #include "mock_context.h" #include "mock_request.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index d1a7c9608712..f739ba63057f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -22,7 +22,8 @@ * */ -#include "mock_engine.h" +#include "gt/mock_engine.h" + #include "mock_request.h" struct i915_request * -- cgit From d91e657876a96af4f00cc374e26a7a9e8c40d6de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:13 +0100 Subject: drm/i915: Introduce struct intel_wakeref For controlling runtime pm of the GT and engines, we would like to have a callback to do extra work the first time we wake up and the last time we drop the wakeref. This first/last access needs serialisation and so we encompass a mutex with the regular intel_wakeref_t tracker. v2: Drop the _once naming and report the errors. Signed-off-by: Chris Wilson Cc; Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/Makefile.header-test | 3 +- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/intel_wakeref.c | 61 ++++++++++++++ drivers/gpu/drm/i915/intel_wakeref.h | 133 ++++++++++++++++++++++++++++++ 5 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_wakeref.c create mode 100644 drivers/gpu/drm/i915/intel_wakeref.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 40130cf5c003..233bad5e361f 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -50,6 +50,7 @@ i915-y += i915_drv.o \ intel_device_info.o \ intel_pm.o \ intel_runtime_pm.o \ + intel_wakeref.o \ intel_uncore.o # core library code diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index 96a5d90629ec..e6b3e7588860 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -31,7 +31,8 @@ header_test := \ intel_psr.h \ intel_sdvo.h \ intel_sprite.h \ - intel_tv.h + intel_tv.h \ + intel_wakeref.h quiet_cmd_header_test = HDRTEST $@ cmd_header_test = echo "\#include \"$( $@ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d37832ffb471..437e394d9fa6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -74,6 +74,7 @@ #include "intel_opregion.h" #include "intel_uc.h" #include "intel_uncore.h" +#include "intel_wakeref.h" #include "intel_wopcm.h" #include "i915_gem.h" @@ -134,8 +135,6 @@ bool i915_error_injected(void); __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ fmt, ##__VA_ARGS__) -typedef depot_stack_handle_t intel_wakeref_t; - enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c new file mode 100644 index 000000000000..1f94bc4ff9e4 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -0,0 +1,61 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_drv.h" +#include "intel_wakeref.h" + +int __intel_wakeref_get_first(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)) +{ + /* + * Treat get/put as different subclasses, as we may need to run + * the put callback from under the shrinker and do not want to + * cross-contanimate that callback with any extra work performed + * upon acquiring the wakeref. + */ + mutex_lock_nested(&wf->mutex, SINGLE_DEPTH_NESTING); + if (!atomic_read(&wf->count)) { + int err; + + wf->wakeref = intel_runtime_pm_get(i915); + + err = fn(wf); + if (unlikely(err)) { + intel_runtime_pm_put(i915, wf->wakeref); + mutex_unlock(&wf->mutex); + return err; + } + + smp_mb__before_atomic(); /* release wf->count */ + } + atomic_inc(&wf->count); + mutex_unlock(&wf->mutex); + + return 0; +} + +int __intel_wakeref_put_last(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)) +{ + int err; + + err = fn(wf); + if (likely(!err)) + intel_runtime_pm_put(i915, wf->wakeref); + else + atomic_inc(&wf->count); + mutex_unlock(&wf->mutex); + + return err; +} + +void __intel_wakeref_init(struct intel_wakeref *wf, struct lock_class_key *key) +{ + __mutex_init(&wf->mutex, "wakeref", key); + atomic_set(&wf->count, 0); +} diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h new file mode 100644 index 000000000000..a979d638344b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_WAKEREF_H +#define INTEL_WAKEREF_H + +#include +#include +#include + +struct drm_i915_private; + +typedef depot_stack_handle_t intel_wakeref_t; + +struct intel_wakeref { + atomic_t count; + struct mutex mutex; + intel_wakeref_t wakeref; +}; + +void __intel_wakeref_init(struct intel_wakeref *wf, + struct lock_class_key *key); +#define intel_wakeref_init(wf) do { \ + static struct lock_class_key __key; \ + \ + __intel_wakeref_init((wf), &__key); \ +} while (0) + +int __intel_wakeref_get_first(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)); +int __intel_wakeref_put_last(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)); + +/** + * intel_wakeref_get: Acquire the wakeref + * @i915: the drm_i915_private device + * @wf: the wakeref + * @fn: callback for acquired the wakeref, called only on first acquire. + * + * Acquire a hold on the wakeref. The first user to do so, will acquire + * the runtime pm wakeref and then call the @fn underneath the wakeref + * mutex. + * + * Note that @fn is allowed to fail, in which case the runtime-pm wakeref + * will be released and the acquisition unwound, and an error reported. + * + * Returns: 0 if the wakeref was acquired successfully, or a negative error + * code otherwise. + */ +static inline int +intel_wakeref_get(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)) +{ + if (unlikely(!atomic_inc_not_zero(&wf->count))) + return __intel_wakeref_get_first(i915, wf, fn); + + return 0; +} + +/** + * intel_wakeref_put: Release the wakeref + * @i915: the drm_i915_private device + * @wf: the wakeref + * @fn: callback for releasing the wakeref, called only on final release. + * + * Release our hold on the wakeref. When there are no more users, + * the runtime pm wakeref will be released after the @fn callback is called + * underneath the wakeref mutex. + * + * Note that @fn is allowed to fail, in which case the runtime-pm wakeref + * is retained and an error reported. + * + * Returns: 0 if the wakeref was released successfully, or a negative error + * code otherwise. + */ +static inline int +intel_wakeref_put(struct drm_i915_private *i915, + struct intel_wakeref *wf, + int (*fn)(struct intel_wakeref *wf)) +{ + if (atomic_dec_and_mutex_lock(&wf->count, &wf->mutex)) + return __intel_wakeref_put_last(i915, wf, fn); + + return 0; +} + +/** + * intel_wakeref_lock: Lock the wakeref (mutex) + * @wf: the wakeref + * + * Locks the wakeref to prevent it being acquired or released. New users + * can still adjust the counter, but the wakeref itself (and callback) + * cannot be acquired or released. + */ +static inline void +intel_wakeref_lock(struct intel_wakeref *wf) + __acquires(wf->mutex) +{ + mutex_lock(&wf->mutex); +} + +/** + * intel_wakeref_unlock: Unlock the wakeref + * @wf: the wakeref + * + * Releases a previously acquired intel_wakeref_lock(). + */ +static inline void +intel_wakeref_unlock(struct intel_wakeref *wf) + __releases(wf->mutex) +{ + mutex_unlock(&wf->mutex); +} + +/** + * intel_wakeref_active: Query whether the wakeref is currently held + * @wf: the wakeref + * + * Returns: true if the wakeref is currently held. + */ +static inline bool +intel_wakeref_active(struct intel_wakeref *wf) +{ + return atomic_read(&wf->count); +} + +#endif /* INTEL_WAKEREF_H */ -- cgit From 23c3c3d04fa7fcc60c91f1368cc5652a6774626b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:14 +0100 Subject: drm/i915: Pull the GEM powermangement coupling into its own file Split out the powermanagement portion (GT wakeref, suspend/resume) of GEM from i915_gem.c into its own file. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/Makefile.header-test | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 12 +- drivers/gpu/drm/i915/i915_gem.c | 363 +-------------------- drivers/gpu/drm/i915/i915_gem_pm.c | 365 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_pm.h | 28 ++ drivers/gpu/drm/i915/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 8 +- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 10 +- 10 files changed, 418 insertions(+), 376 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_pm.c create mode 100644 drivers/gpu/drm/i915/i915_gem_pm.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 233bad5e361f..858642c7bc40 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -97,6 +97,7 @@ i915-y += \ i915_gem_internal.o \ i915_gem.o \ i915_gem_object.o \ + i915_gem_pm.o \ i915_gem_render_state.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index e6b3e7588860..702e3a7ade4c 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -5,6 +5,7 @@ header_test := \ i915_active_types.h \ i915_gem_context_types.h \ + i915_gem_pm.h \ i915_priolist_types.h \ i915_scheduler_types.h \ i915_timeline_types.h \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b3fbd9e361ae..f77263d42253 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3942,8 +3942,8 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_IDLE) { do { if (READ_ONCE(i915->gt.active_requests)) - flush_delayed_work(&i915->gt.retire_work); - drain_delayed_work(&i915->gt.idle_work); + flush_delayed_work(&i915->gem.retire_work); + drain_delayed_work(&i915->gem.idle_work); } while (READ_ONCE(i915->gt.awake)); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 437e394d9fa6..45e027f45e62 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2020,6 +2020,12 @@ struct drm_i915_private { */ intel_wakeref_t awake; + ktime_t last_init_time; + + struct i915_vma *scratch; + } gt; + + struct { /** * We leave the user IRQ off as much as possible, * but this means that requests will finish and never @@ -2037,11 +2043,7 @@ struct drm_i915_private { * off the idle_work. */ struct delayed_work idle_work; - - ktime_t last_init_time; - - struct i915_vma *scratch; - } gt; + } gem; /* For i945gm vblank irq vs. C3 workaround */ struct { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 21adeb340357..7f833c97138e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,7 +46,7 @@ #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gemfs.h" -#include "i915_globals.h" +#include "i915_gem_pm.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -103,105 +103,6 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, spin_unlock(&dev_priv->mm.object_stat_lock); } -static void __i915_gem_park(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); - GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); - - if (!i915->gt.awake) - return; - - /* - * Be paranoid and flush a concurrent interrupt to make sure - * we don't reactivate any irq tasklets after parking. - * - * FIXME: Note that even though we have waited for execlists to be idle, - * there may still be an in-flight interrupt even though the CSB - * is now empty. synchronize_irq() makes sure that a residual interrupt - * is completed before we continue, but it doesn't prevent the HW from - * raising a spurious interrupt later. To complete the shield we should - * coordinate disabling the CS irq with flushing the interrupts. - */ - synchronize_irq(i915->drm.irq); - - intel_engines_park(i915); - i915_timelines_park(i915); - - i915_pmu_gt_parked(i915); - i915_vma_parked(i915); - - wakeref = fetch_and_zero(&i915->gt.awake); - GEM_BUG_ON(!wakeref); - - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); - - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); - - i915_globals_park(); -} - -void i915_gem_park(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); - - if (!i915->gt.awake) - return; - - /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ - mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); -} - -void i915_gem_unpark(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!i915->gt.active_requests); - assert_rpm_wakelock_held(i915); - - if (i915->gt.awake) - return; - - /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. - * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. - */ - i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - GEM_BUG_ON(!i915->gt.awake); - - i915_globals_unpark(); - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); - - i915_queue_hangcheck(i915); - - queue_delayed_work(i915->wq, - &i915->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -2088,7 +1989,7 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) if (!err) break; - } while (flush_delayed_work(&dev_priv->gt.retire_work)); + } while (flush_delayed_work(&dev_priv->gem.retire_work)); return err; } @@ -2848,132 +2749,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static void -i915_gem_retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), gt.retire_work.work); - struct drm_device *dev = &dev_priv->drm; - - /* Come back later if the device is busy... */ - if (mutex_trylock(&dev->struct_mutex)) { - i915_retire_requests(dev_priv); - mutex_unlock(&dev->struct_mutex); - } - - /* - * Keep the retire handler running until we are finally idle. - * We do not need to do this test under locking as in the worst-case - * we queue the retire worker once too often. - */ - if (READ_ONCE(dev_priv->gt.awake)) - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, - unsigned long mask) -{ - bool result = true; - - /* - * Even if we fail to switch, give whatever is running a small chance - * to save itself before we report the failure. Yes, this may be a - * false positive due to e.g. ENOMEM, caveat emptor! - */ - if (i915_gem_switch_to_kernel_context(i915, mask)) - result = false; - - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT)) - result = false; - - if (!result) { - if (i915_modparams.reset) { /* XXX hide warning from gem_eio */ - dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* Forcibly cancel outstanding work and leave the gpu quiet. */ - i915_gem_set_wedged(i915); - } - - i915_retire_requests(i915); /* ensure we flush after wedging */ - return result; -} - -static bool load_power_context(struct drm_i915_private *i915) -{ - /* Force loading the kernel context on all engines */ - if (!switch_to_kernel_context_sync(i915, ALL_ENGINES)) - return false; - - /* - * Immediately park the GPU so that we enable powersaving and - * treat it as idle. The next time we issue a request, we will - * unpark and start using the engine->pinned_default_state, otherwise - * it is in limbo and an early reset may fail. - */ - __i915_gem_park(i915); - - return true; -} - -static void -i915_gem_idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gt.idle_work.work); - bool rearm_hangcheck; - - if (!READ_ONCE(i915->gt.awake)) - return; - - if (READ_ONCE(i915->gt.active_requests)) - return; - - rearm_hangcheck = - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - if (!mutex_trylock(&i915->drm.struct_mutex)) { - /* Currently busy, come back later */ - mod_delayed_work(i915->wq, - &i915->gt.idle_work, - msecs_to_jiffies(50)); - goto out_rearm; - } - - /* - * Flush out the last user context, leaving only the pinned - * kernel context resident. Should anything unfortunate happen - * while we are idle (such as the GPU being power cycled), no users - * will be harmed. - */ - if (!work_pending(&i915->gt.idle_work.work) && - !i915->gt.active_requests) { - ++i915->gt.active_requests; /* don't requeue idle */ - - switch_to_kernel_context_sync(i915, i915->gt.active_engines); - - if (!--i915->gt.active_requests) { - __i915_gem_park(i915); - rearm_hangcheck = false; - } - } - - mutex_unlock(&i915->drm.struct_mutex); - -out_rearm: - if (rearm_hangcheck) { - GEM_BUG_ON(!i915->gt.awake); - i915_queue_hangcheck(i915); - } -} - void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { struct drm_i915_private *i915 = to_i915(gem->dev); @@ -4389,133 +4164,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915) mutex_unlock(&i915->drm.struct_mutex); } -void i915_gem_suspend(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); - - wakeref = intel_runtime_pm_get(i915); - - flush_workqueue(i915->wq); - - mutex_lock(&i915->drm.struct_mutex); - - /* - * We have to flush all the executing contexts to main memory so - * that they can saved in the hibernation image. To ensure the last - * context image is coherent, we have to switch away from it. That - * leaves the i915->kernel_context still active when - * we actually suspend, and its image in memory may not match the GPU - * state. Fortunately, the kernel_context is disposable and we do - * not rely on its state. - */ - switch_to_kernel_context_sync(i915, i915->gt.active_engines); - - mutex_unlock(&i915->drm.struct_mutex); - i915_reset_flush(i915); - - drain_delayed_work(&i915->gt.retire_work); - - /* - * As the idle_work is rearming if it detects a race, play safe and - * repeat the flush until it is definitely idle. - */ - drain_delayed_work(&i915->gt.idle_work); - - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); - - intel_uc_suspend(i915); - - intel_runtime_pm_put(i915, wakeref); -} - -void i915_gem_suspend_late(struct drm_i915_private *i915) -{ - struct drm_i915_gem_object *obj; - struct list_head *phases[] = { - &i915->mm.unbound_list, - &i915->mm.bound_list, - NULL - }, **phase; - - /* - * Neither the BIOS, ourselves or any other kernel - * expects the system to be in execlists mode on startup, - * so we need to reset the GPU back to legacy mode. And the only - * known way to disable logical contexts is through a GPU reset. - * - * So in order to leave the system in a known default configuration, - * always reset the GPU upon unload and suspend. Afterwards we then - * clean up the GEM state tracking, flushing off the requests and - * leaving the system in a known idle state. - * - * Note that is of the upmost importance that the GPU is idle and - * all stray writes are flushed *before* we dismantle the backing - * storage for the pinned objects. - * - * However, since we are uncertain that resetting the GPU on older - * machines is a good idea, we don't - just in case it leaves the - * machine in an unusable condition. - */ - - mutex_lock(&i915->drm.struct_mutex); - for (phase = phases; *phase; phase++) { - list_for_each_entry(obj, *phase, mm.link) - WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); - } - mutex_unlock(&i915->drm.struct_mutex); - - intel_uc_sanitize(i915); - i915_gem_sanitize(i915); -} - -void i915_gem_resume(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - WARN_ON(i915->gt.awake); - - mutex_lock(&i915->drm.struct_mutex); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - - /* - * As we didn't flush the kernel context before suspend, we cannot - * guarantee that the context image is complete. So let's just reset - * it and start again. - */ - intel_gt_resume(i915); - - if (i915_gem_init_hw(i915)) - goto err_wedged; - - intel_uc_resume(i915); - - /* Always reload a context for powersaving. */ - if (!load_power_context(i915)) - goto err_wedged; - -out_unlock: - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); - return; - -err_wedged: - if (!i915_reset_failed(i915)) { - dev_err(i915->drm.dev, - "Failed to re-initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(i915); - } - goto out_unlock; -} - void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) < 5 || @@ -4698,7 +4346,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) } /* Flush the default context image to memory, and enable powersaving. */ - if (!load_power_context(i915)) { + if (!i915_gem_load_power_context(i915)) { err = -EIO; goto err_active; } @@ -5113,11 +4761,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->gt.closed_vma); i915_gem_init__mm(dev_priv); + i915_gem_init__pm(dev_priv); - INIT_DELAYED_WORK(&dev_priv->gt.retire_work, - i915_gem_retire_work_handler); - INIT_DELAYED_WORK(&dev_priv->gt.idle_work, - i915_gem_idle_work_handler); init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); mutex_init(&dev_priv->gpu_error.wedge_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c new file mode 100644 index 000000000000..9fb0e8d567a2 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_pm.c @@ -0,0 +1,365 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_gem_pm.h" +#include "i915_globals.h" +#include "intel_pm.h" + +static void __i915_gem_park(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + + GEM_TRACE("\n"); + + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); + + if (!i915->gt.awake) + return; + + /* + * Be paranoid and flush a concurrent interrupt to make sure + * we don't reactivate any irq tasklets after parking. + * + * FIXME: Note that even though we have waited for execlists to be idle, + * there may still be an in-flight interrupt even though the CSB + * is now empty. synchronize_irq() makes sure that a residual interrupt + * is completed before we continue, but it doesn't prevent the HW from + * raising a spurious interrupt later. To complete the shield we should + * coordinate disabling the CS irq with flushing the interrupts. + */ + synchronize_irq(i915->drm.irq); + + intel_engines_park(i915); + i915_timelines_park(i915); + + i915_pmu_gt_parked(i915); + i915_vma_parked(i915); + + wakeref = fetch_and_zero(&i915->gt.awake); + GEM_BUG_ON(!wakeref); + + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + + i915_globals_park(); +} + +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, + unsigned long mask) +{ + bool result = true; + + /* + * Even if we fail to switch, give whatever is running a small chance + * to save itself before we report the failure. Yes, this may be a + * false positive due to e.g. ENOMEM, caveat emptor! + */ + if (i915_gem_switch_to_kernel_context(i915, mask)) + result = false; + + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + I915_GEM_IDLE_TIMEOUT)) + result = false; + + if (!result) { + if (i915_modparams.reset) { /* XXX hide warning from gem_eio */ + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } + + /* Forcibly cancel outstanding work and leave the gpu quiet. */ + i915_gem_set_wedged(i915); + } + + i915_retire_requests(i915); /* ensure we flush after wedging */ + return result; +} + +static void idle_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.idle_work.work); + bool rearm_hangcheck; + + if (!READ_ONCE(i915->gt.awake)) + return; + + if (READ_ONCE(i915->gt.active_requests)) + return; + + rearm_hangcheck = + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + + if (!mutex_trylock(&i915->drm.struct_mutex)) { + /* Currently busy, come back later */ + mod_delayed_work(i915->wq, + &i915->gem.idle_work, + msecs_to_jiffies(50)); + goto out_rearm; + } + + /* + * Flush out the last user context, leaving only the pinned + * kernel context resident. Should anything unfortunate happen + * while we are idle (such as the GPU being power cycled), no users + * will be harmed. + */ + if (!work_pending(&i915->gem.idle_work.work) && + !i915->gt.active_requests) { + ++i915->gt.active_requests; /* don't requeue idle */ + + switch_to_kernel_context_sync(i915, i915->gt.active_engines); + + if (!--i915->gt.active_requests) { + __i915_gem_park(i915); + rearm_hangcheck = false; + } + } + + mutex_unlock(&i915->drm.struct_mutex); + +out_rearm: + if (rearm_hangcheck) { + GEM_BUG_ON(!i915->gt.awake); + i915_queue_hangcheck(i915); + } +} + +static void retire_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.retire_work.work); + + /* Come back later if the device is busy... */ + if (mutex_trylock(&i915->drm.struct_mutex)) { + i915_retire_requests(i915); + mutex_unlock(&i915->drm.struct_mutex); + } + + /* + * Keep the retire handler running until we are finally idle. + * We do not need to do this test under locking as in the worst-case + * we queue the retire worker once too often. + */ + if (READ_ONCE(i915->gt.awake)) + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +void i915_gem_park(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + + if (!i915->gt.awake) + return; + + /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ + mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(100)); +} + +void i915_gem_unpark(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(!i915->gt.active_requests); + assert_rpm_wakelock_held(i915); + + if (i915->gt.awake) + return; + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!i915->gt.awake); + + i915_globals_unpark(); + + intel_enable_gt_powersave(i915); + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); + + intel_engines_unpark(i915); + + i915_queue_hangcheck(i915); + + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +bool i915_gem_load_power_context(struct drm_i915_private *i915) +{ + /* Force loading the kernel context on all engines */ + if (!switch_to_kernel_context_sync(i915, ALL_ENGINES)) + return false; + + /* + * Immediately park the GPU so that we enable powersaving and + * treat it as idle. The next time we issue a request, we will + * unpark and start using the engine->pinned_default_state, otherwise + * it is in limbo and an early reset may fail. + */ + __i915_gem_park(i915); + + return true; +} + +void i915_gem_suspend(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + + GEM_TRACE("\n"); + + wakeref = intel_runtime_pm_get(i915); + + mutex_lock(&i915->drm.struct_mutex); + + /* + * We have to flush all the executing contexts to main memory so + * that they can saved in the hibernation image. To ensure the last + * context image is coherent, we have to switch away from it. That + * leaves the i915->kernel_context still active when + * we actually suspend, and its image in memory may not match the GPU + * state. Fortunately, the kernel_context is disposable and we do + * not rely on its state. + */ + switch_to_kernel_context_sync(i915, i915->gt.active_engines); + + mutex_unlock(&i915->drm.struct_mutex); + i915_reset_flush(i915); + + drain_delayed_work(&i915->gem.retire_work); + + /* + * As the idle_work is rearming if it detects a race, play safe and + * repeat the flush until it is definitely idle. + */ + drain_delayed_work(&i915->gem.idle_work); + + flush_workqueue(i915->wq); + + /* + * Assert that we successfully flushed all the work and + * reset the GPU back to its idle, low power state. + */ + GEM_BUG_ON(i915->gt.awake); + + intel_uc_suspend(i915); + + intel_runtime_pm_put(i915, wakeref); +} + +void i915_gem_suspend_late(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct list_head *phases[] = { + &i915->mm.unbound_list, + &i915->mm.bound_list, + NULL + }, **phase; + + /* + * Neither the BIOS, ourselves or any other kernel + * expects the system to be in execlists mode on startup, + * so we need to reset the GPU back to legacy mode. And the only + * known way to disable logical contexts is through a GPU reset. + * + * So in order to leave the system in a known default configuration, + * always reset the GPU upon unload and suspend. Afterwards we then + * clean up the GEM state tracking, flushing off the requests and + * leaving the system in a known idle state. + * + * Note that is of the upmost importance that the GPU is idle and + * all stray writes are flushed *before* we dismantle the backing + * storage for the pinned objects. + * + * However, since we are uncertain that resetting the GPU on older + * machines is a good idea, we don't - just in case it leaves the + * machine in an unusable condition. + */ + + mutex_lock(&i915->drm.struct_mutex); + for (phase = phases; *phase; phase++) { + list_for_each_entry(obj, *phase, mm.link) + WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + } + mutex_unlock(&i915->drm.struct_mutex); + + intel_uc_sanitize(i915); + i915_gem_sanitize(i915); +} + +void i915_gem_resume(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + WARN_ON(i915->gt.awake); + + mutex_lock(&i915->drm.struct_mutex); + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); + + /* + * As we didn't flush the kernel context before suspend, we cannot + * guarantee that the context image is complete. So let's just reset + * it and start again. + */ + intel_gt_resume(i915); + + if (i915_gem_init_hw(i915)) + goto err_wedged; + + intel_uc_resume(i915); + + /* Always reload a context for powersaving. */ + if (!i915_gem_load_power_context(i915)) + goto err_wedged; + +out_unlock: + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + mutex_unlock(&i915->drm.struct_mutex); + return; + +err_wedged: + if (!i915_reset_failed(i915)) { + dev_err(i915->drm.dev, + "Failed to re-initialize GPU, declaring it wedged!\n"); + i915_gem_set_wedged(i915); + } + goto out_unlock; +} + +void i915_gem_init__pm(struct drm_i915_private *i915) +{ + INIT_DELAYED_WORK(&i915->gem.idle_work, idle_work_handler); + INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); +} diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h new file mode 100644 index 000000000000..52f65e3f06b5 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_pm.h @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_PM_H__ +#define __I915_GEM_PM_H__ + +#include + +struct drm_i915_private; +struct work_struct; + +void i915_gem_init__pm(struct drm_i915_private *i915); + +bool i915_gem_load_power_context(struct drm_i915_private *i915); +void i915_gem_resume(struct drm_i915_private *i915); + +void i915_gem_unpark(struct drm_i915_private *i915); +void i915_gem_park(struct drm_i915_private *i915); + +void i915_gem_idle_work_handler(struct work_struct *work); + +void i915_gem_suspend(struct drm_i915_private *i915); +void i915_gem_suspend_late(struct drm_i915_private *i915); + +#endif /* __I915_GEM_PM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 6f52ca881173..9d646fa1b74e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1658,7 +1658,7 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, /* XXX Bonus points for proving we are the kernel context! */ mutex_unlock(&i915->drm.struct_mutex); - drain_delayed_work(&i915->gt.idle_work); + drain_delayed_work(&i915->gem.idle_work); mutex_lock(&i915->drm.struct_mutex); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 971148fbe6f5..12fc53c694a6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -514,8 +514,8 @@ static void disable_retire_worker(struct drm_i915_private *i915) } mutex_unlock(&i915->drm.struct_mutex); - cancel_delayed_work_sync(&i915->gt.retire_work); - cancel_delayed_work_sync(&i915->gt.idle_work); + cancel_delayed_work_sync(&i915->gem.retire_work); + cancel_delayed_work_sync(&i915->gem.idle_work); } static int igt_mmap_offset_exhaustion(void *arg) @@ -617,9 +617,9 @@ out: out_park: mutex_lock(&i915->drm.struct_mutex); if (--i915->gt.active_requests) - queue_delayed_work(i915->wq, &i915->gt.retire_work, 0); + queue_delayed_work(i915->wq, &i915->gem.retire_work, 0); else - queue_delayed_work(i915->wq, &i915->gt.idle_work, 0); + queue_delayed_work(i915->wq, &i915->gem.idle_work, 0); mutex_unlock(&i915->drm.struct_mutex); i915_gem_shrinker_register(i915); return err; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index f444ee5add27..fb677b4019a0 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -59,8 +59,8 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); - drain_delayed_work(&i915->gt.retire_work); - drain_delayed_work(&i915->gt.idle_work); + drain_delayed_work(&i915->gem.retire_work); + drain_delayed_work(&i915->gem.idle_work); i915_gem_drain_workqueue(i915); mutex_lock(&i915->drm.struct_mutex); @@ -111,7 +111,7 @@ static void mock_retire_work_handler(struct work_struct *work) static void mock_idle_work_handler(struct work_struct *work) { struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gt.idle_work.work); + container_of(work, typeof(*i915), gem.idle_work.work); i915->gt.active_engines = 0; } @@ -197,8 +197,8 @@ struct drm_i915_private *mock_gem_device(void) mock_init_contexts(i915); - INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler); - INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler); + INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); + INIT_DELAYED_WORK(&i915->gem.idle_work, mock_idle_work_handler); i915->gt.awake = true; -- cgit From 6eee33e87f6d1f6263162ce0874c1ef503eff041 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:15 +0100 Subject: drm/i915: Introduce context->enter() and context->exit() We wish to start segregating the power management into different control domains, both with respect to the hardware and the user interface. The first step is that at the lowest level flow of requests, we want to process a context event (and not a global GEM operation). In this patch, we introduce the context callbacks that in future patches will be redirected to per-engine interfaces leading to global operations as required. The intent is that this will be guarded by the timeline->mutex, except that retiring has not quite finished transitioning over from being guarded by struct_mutex. So at the moment it is protected by struct_mutex with a reminded to switch. v2: Rename default handlers to intel_context_enter_engine. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_context.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/gt/intel_context.h | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_context_types.h | 5 +++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 3 +++ drivers/gpu/drm/i915/gt/mock_engine.c | 3 +++ drivers/gpu/drm/i915/i915_request.c | 22 ++++------------------ 7 files changed, 56 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ebd1e5919a4a..4410e20e8e13 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -266,3 +266,20 @@ int __init i915_global_context_init(void) i915_global_register(&global.base); return 0; } + +void intel_context_enter_engine(struct intel_context *ce) +{ + struct drm_i915_private *i915 = ce->gem_context->i915; + + if (!i915->gt.active_requests++) + i915_gem_unpark(i915); +} + +void intel_context_exit_engine(struct intel_context *ce) +{ + struct drm_i915_private *i915 = ce->gem_context->i915; + + GEM_BUG_ON(!i915->gt.active_requests); + if (!--i915->gt.active_requests) + i915_gem_park(i915); +} diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index ebc861b1a49e..b732cf99efcb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -73,6 +73,27 @@ static inline void __intel_context_pin(struct intel_context *ce) void intel_context_unpin(struct intel_context *ce); +void intel_context_enter_engine(struct intel_context *ce); +void intel_context_exit_engine(struct intel_context *ce); + +static inline void intel_context_enter(struct intel_context *ce) +{ + if (!ce->active_count++) + ce->ops->enter(ce); +} + +static inline void intel_context_mark_active(struct intel_context *ce) +{ + ++ce->active_count; +} + +static inline void intel_context_exit(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->active_count); + if (!--ce->active_count) + ce->ops->exit(ce); +} + static inline struct intel_context *intel_context_get(struct intel_context *ce) { kref_get(&ce->ref); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 9ec4f787c908..f02d27734e3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -25,6 +25,9 @@ struct intel_context_ops { int (*pin)(struct intel_context *ce); void (*unpin)(struct intel_context *ce); + void (*enter)(struct intel_context *ce); + void (*exit)(struct intel_context *ce); + void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); }; @@ -46,6 +49,8 @@ struct intel_context { u32 *lrc_reg_state; u64 lrc_desc; + unsigned int active_count; /* notionally protected by timeline->mutex */ + atomic_t pin_count; struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 5cadf8f6a23d..edec7f183688 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1315,6 +1315,9 @@ static const struct intel_context_ops execlists_context_ops = { .pin = execlists_context_pin, .unpin = execlists_context_unpin, + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + .reset = execlists_context_reset, .destroy = execlists_context_destroy, }; diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index ac84a383748e..5404fe382691 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1510,6 +1510,9 @@ static const struct intel_context_ops ring_context_ops = { .pin = ring_context_pin, .unpin = ring_context_unpin, + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + .reset = ring_context_reset, .destroy = ring_context_destroy, }; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 414afd2f27fe..bcfeb0c67997 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -157,6 +157,9 @@ static const struct intel_context_ops mock_context_ops = { .pin = mock_context_pin, .unpin = mock_context_unpin, + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + .destroy = mock_context_destroy, }; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 64ca8b3ea12f..9a2665ee012a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -131,19 +131,6 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static void reserve_gt(struct drm_i915_private *i915) -{ - if (!i915->gt.active_requests++) - i915_gem_unpark(i915); -} - -static void unreserve_gt(struct drm_i915_private *i915) -{ - GEM_BUG_ON(!i915->gt.active_requests); - if (!--i915->gt.active_requests) - i915_gem_park(i915); -} - static void advance_ring(struct i915_request *request) { struct intel_ring *ring = request->ring; @@ -301,11 +288,10 @@ static void i915_request_retire(struct i915_request *request) i915_request_remove_from_client(request); - intel_context_unpin(request->hw_context); - __retire_engine_upto(request->engine, request); - unreserve_gt(request->i915); + intel_context_exit(request->hw_context); + intel_context_unpin(request->hw_context); i915_sched_node_fini(&request->sched); i915_request_put(request); @@ -659,8 +645,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (IS_ERR(ce)) return ERR_CAST(ce); - reserve_gt(i915); mutex_lock(&ce->ring->timeline->mutex); + intel_context_enter(ce); /* Move our oldest request to the slab-cache (if not in use!) */ rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); @@ -791,8 +777,8 @@ err_unwind: err_free: kmem_cache_free(global.slab_requests, rq); err_unreserve: + intel_context_exit(ce); mutex_unlock(&ce->ring->timeline->mutex); - unreserve_gt(i915); intel_context_unpin(ce); return ERR_PTR(ret); } -- cgit From 2ccdf6a1c3f7ff51d721ee7a5bed96e03da77205 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:16 +0100 Subject: drm/i915: Pass intel_context to i915_request_create() Start acquiring the logical intel_context and using that as our primary means for request allocation. This is the initial step to allow us to avoid requiring struct_mutex for request allocation along the perma-pinned kernel context, but it also provides a foundation for breaking up the complex request allocation to handle different scenarios inside execbuf. For the purpose of emitting a request from inside retirement (see the next patch for engine power management), we also need to lift control over the timeline mutex to the caller. v2: Note that the request carries the active reference upon construction. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_context.h | 12 + drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 3 - drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 247 ++++++++++++--------- drivers/gpu/drm/i915/i915_request.h | 7 + drivers/gpu/drm/i915/intel_overlay.c | 5 +- drivers/gpu/drm/i915/selftests/i915_active.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 9 +- drivers/gpu/drm/i915/selftests/i915_timeline.c | 4 +- 14 files changed, 177 insertions(+), 126 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index b732cf99efcb..60379eb37949 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -105,4 +105,16 @@ static inline void intel_context_put(struct intel_context *ce) kref_put(&ce->ref, ce->ops->destroy); } +static inline void intel_context_timeline_lock(struct intel_context *ce) + __acquires(&ce->ring->timeline->mutex) +{ + mutex_lock(&ce->ring->timeline->mutex); +} + +static inline void intel_context_timeline_unlock(struct intel_context *ce) + __releases(&ce->ring->timeline->mutex) +{ + mutex_unlock(&ce->ring->timeline->mutex); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 7db498567843..cdf184403d46 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -783,7 +783,7 @@ static void restart_work(struct work_struct *work) if (!intel_engine_is_idle(engine)) continue; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (!IS_ERR(rq)) i915_request_add(rq); } diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 5404fe382691..f89541274d44 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1772,7 +1772,6 @@ static int switch_context(struct i915_request *rq) u32 hw_flags = 0; int ret, i; - lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (ppgtt) { @@ -1902,8 +1901,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) struct i915_request *target; long timeout; - lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); - if (intel_ring_update_space(ring) >= bytes) return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index f46ed0e2f07c..364696221fd7 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1356,7 +1356,7 @@ static int engine_wa_list_verify(struct intel_engine_cs *engine, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_alloc(engine, engine->kernel_context->gem_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_vma; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 37dff694456c..3eb1a664b5fa 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -942,7 +942,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, struct intel_ring *ring; struct i915_request *rq; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -1188,7 +1188,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) /* Submitting requests etc needs the hw awake. */ wakeref = intel_runtime_pm_get(i915); - rq = i915_request_alloc(ce->engine, i915->kernel_context); + rq = i915_request_create(ce->engine->kernel_context); if (IS_ERR(rq)) { ret = PTR_ERR(rq); goto out_put; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a87f790335c1..328a740e72cb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1762,7 +1762,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * Apply the configuration by doing one context restore of the edited * context image. */ - rq = i915_request_alloc(engine, dev_priv->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9a2665ee012a..705c125bafc6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -581,7 +581,7 @@ static void ring_retire_requests(struct intel_ring *ring) } static noinline struct i915_request * -i915_request_alloc_slow(struct intel_context *ce) +request_alloc_slow(struct intel_context *ce, gfp_t gfp) { struct intel_ring *ring = ce->ring; struct i915_request *rq; @@ -589,6 +589,9 @@ i915_request_alloc_slow(struct intel_context *ce) if (list_empty(&ring->request_list)) goto out; + if (!gfpflags_allow_blocking(gfp)) + goto out; + /* Ratelimit ourselves to prevent oom from malicious clients */ rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); cond_synchronize_rcu(rq->rcustate); @@ -597,62 +600,21 @@ i915_request_alloc_slow(struct intel_context *ce) ring_retire_requests(ring); out: - return kmem_cache_alloc(global.slab_requests, GFP_KERNEL); + return kmem_cache_alloc(global.slab_requests, gfp); } -/** - * i915_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ struct i915_request * -i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +__i915_request_create(struct intel_context *ce, gfp_t gfp) { - struct drm_i915_private *i915 = engine->i915; - struct intel_context *ce; - struct i915_timeline *tl; + struct i915_timeline *tl = ce->ring->timeline; struct i915_request *rq; u32 seqno; int ret; - lockdep_assert_held(&i915->drm.struct_mutex); - - /* - * Preempt contexts are reserved for exclusive use to inject a - * preemption context switch. They are never to be used for any trivial - * request! - */ - GEM_BUG_ON(ctx == i915->preempt_context); - - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - ret = i915_terminally_wedged(i915); - if (ret) - return ERR_PTR(ret); + might_sleep_if(gfpflags_allow_blocking(gfp)); - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - ce = intel_context_pin(ctx, engine); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - mutex_lock(&ce->ring->timeline->mutex); - intel_context_enter(ce); - - /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && - i915_request_completed(rq)) - i915_request_retire(rq); + /* Check that the caller provided an already pinned context */ + __intel_context_pin(ce); /* * Beware: Dragons be flying overhead. @@ -684,30 +646,26 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * Do not use kmem_cache_zalloc() here! */ rq = kmem_cache_alloc(global.slab_requests, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { - rq = i915_request_alloc_slow(ce); + rq = request_alloc_slow(ce, gfp); if (!rq) { ret = -ENOMEM; goto err_unreserve; } } - INIT_LIST_HEAD(&rq->active_list); - INIT_LIST_HEAD(&rq->execute_cb); - - tl = ce->ring->timeline; ret = i915_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; - rq->i915 = i915; - rq->engine = engine; - rq->gem_context = ctx; + rq->i915 = ce->engine->i915; rq->hw_context = ce; + rq->gem_context = ce->gem_context; + rq->engine = ce->engine; rq->ring = ce->ring; rq->timeline = tl; - GEM_BUG_ON(rq->timeline == &engine->timeline); + GEM_BUG_ON(rq->timeline == &ce->engine->timeline); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -728,6 +686,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->capture_list = NULL; rq->waitboost = false; + INIT_LIST_HEAD(&rq->active_list); + INIT_LIST_HEAD(&rq->execute_cb); + /* * Reserve space in the ring buffer for all the commands required to * eventually emit this request. This is to guarantee that the @@ -740,7 +701,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); + rq->reserved_space = + 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -750,20 +712,16 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ rq->head = rq->ring->emit; - ret = engine->request_alloc(rq); + ret = rq->engine->request_alloc(rq); if (ret) goto err_unwind; - /* Keep a second pin for the dual retirement along engine and ring */ - __intel_context_pin(ce); - rq->infix = rq->ring->emit; /* end of header; start of user payload */ - /* Check that we didn't interrupt ourselves with a new request */ - lockdep_assert_held(&rq->timeline->mutex); - GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); - rq->cookie = lockdep_pin_lock(&rq->timeline->mutex); + /* Keep a second pin for the dual retirement along engine and ring */ + __intel_context_pin(ce); + intel_context_mark_active(ce); return rq; err_unwind: @@ -777,12 +735,86 @@ err_unwind: err_free: kmem_cache_free(global.slab_requests, rq); err_unreserve: - intel_context_exit(ce); - mutex_unlock(&ce->ring->timeline->mutex); intel_context_unpin(ce); return ERR_PTR(ret); } +struct i915_request * +i915_request_create(struct intel_context *ce) +{ + struct i915_request *rq; + + intel_context_timeline_lock(ce); + + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && + i915_request_completed(rq)) + i915_request_retire(rq); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_KERNEL); + intel_context_exit(ce); /* active reference transferred to request */ + if (IS_ERR(rq)) + goto err_unlock; + + /* Check that we do not interrupt ourselves with a new request */ + rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex); + + return rq; + +err_unlock: + intel_context_timeline_unlock(ce); + return rq; +} + +/** + * i915_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct i915_request * +i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; + struct i915_request *rq; + int ret; + + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == i915->preempt_context); + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + ret = i915_terminally_wedged(i915); + if (ret) + return ERR_PTR(ret); + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ce = intel_context_pin(ctx, engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = i915_request_create(ce); + intel_context_unpin(ce); + + return rq; +} + static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, @@ -1043,8 +1075,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) * precludes optimising to use semaphores serialisation of a single * timeline across engines. */ - prev = i915_active_request_raw(&timeline->last_request, - &rq->i915->drm.struct_mutex); + prev = rcu_dereference_protected(timeline->last_request.request, 1); if (prev && !i915_request_completed(prev)) { if (is_power_of_2(prev->engine->mask | rq->engine->mask)) i915_sw_fence_await_sw_fence(&rq->submit, @@ -1065,6 +1096,11 @@ __i915_request_add_to_timeline(struct i915_request *rq) list_add_tail(&rq->link, &timeline->requests); spin_unlock_irq(&timeline->lock); + /* + * Make sure that no request gazumped us - if it was allocated after + * our i915_request_alloc() and called __i915_request_add() before + * us, the timeline will hold its seqno which is later than ours. + */ GEM_BUG_ON(timeline->seqno != rq->fence.seqno); __i915_active_request_set(&timeline->last_request, rq); @@ -1076,36 +1112,23 @@ __i915_request_add_to_timeline(struct i915_request *rq) * request is not being tracked for completion but the work itself is * going to happen on the hardware. This would be a Bad Thing(tm). */ -void i915_request_add(struct i915_request *request) +struct i915_request *__i915_request_commit(struct i915_request *rq) { - struct intel_engine_cs *engine = request->engine; - struct i915_timeline *timeline = request->timeline; - struct intel_ring *ring = request->ring; + struct intel_engine_cs *engine = rq->engine; + struct intel_ring *ring = rq->ring; struct i915_request *prev; u32 *cs; GEM_TRACE("%s fence %llx:%lld\n", - engine->name, request->fence.context, request->fence.seqno); - - lockdep_assert_held(&request->timeline->mutex); - lockdep_unpin_lock(&request->timeline->mutex, request->cookie); - - trace_i915_request_add(request); - - /* - * Make sure that no request gazumped us - if it was allocated after - * our i915_request_alloc() and called __i915_request_add() before - * us, the timeline will hold its seqno which is later than ours. - */ - GEM_BUG_ON(timeline->seqno != request->fence.seqno); + engine->name, rq->fence.context, rq->fence.seqno); /* * To ensure that this call will not fail, space for its emissions * should already have been reserved in the ring buffer. Let the ring * know that it is time to use that space up. */ - GEM_BUG_ON(request->reserved_space > request->ring->space); - request->reserved_space = 0; + GEM_BUG_ON(rq->reserved_space > ring->space); + rq->reserved_space = 0; /* * Record the position of the start of the breadcrumb so that @@ -1113,17 +1136,17 @@ void i915_request_add(struct i915_request *request) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw); + cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw); GEM_BUG_ON(IS_ERR(cs)); - request->postfix = intel_ring_offset(request, cs); + rq->postfix = intel_ring_offset(rq, cs); - prev = __i915_request_add_to_timeline(request); + prev = __i915_request_add_to_timeline(rq); - list_add_tail(&request->ring_link, &ring->request_list); - if (list_is_first(&request->ring_link, &ring->request_list)) - list_add(&ring->active_link, &request->i915->gt.active_rings); - request->i915->gt.active_engines |= request->engine->mask; - request->emitted_jiffies = jiffies; + list_add_tail(&rq->ring_link, &ring->request_list); + if (list_is_first(&rq->ring_link, &ring->request_list)) + list_add(&ring->active_link, &rq->i915->gt.active_rings); + rq->i915->gt.active_engines |= rq->engine->mask; + rq->emitted_jiffies = jiffies; /* * Let the backend know a new request has arrived that may need @@ -1137,10 +1160,10 @@ void i915_request_add(struct i915_request *request) * run at the earliest possible convenience. */ local_bh_disable(); - i915_sw_fence_commit(&request->semaphore); + i915_sw_fence_commit(&rq->semaphore); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) { - struct i915_sched_attr attr = request->gem_context->sched; + struct i915_sched_attr attr = rq->gem_context->sched; /* * Boost actual workloads past semaphores! @@ -1154,7 +1177,7 @@ void i915_request_add(struct i915_request *request) * far in the distance past over useful work, we keep a history * of any semaphore use along our dependency chain. */ - if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) attr.priority |= I915_PRIORITY_NOSEMAPHORE; /* @@ -1163,15 +1186,29 @@ void i915_request_add(struct i915_request *request) * Allow interactive/synchronous clients to jump ahead of * the bulk clients. (FQ_CODEL) */ - if (list_empty(&request->sched.signalers_list)) + if (list_empty(&rq->sched.signalers_list)) attr.priority |= I915_PRIORITY_NEWCLIENT; - engine->schedule(request, &attr); + engine->schedule(rq, &attr); } rcu_read_unlock(); - i915_sw_fence_commit(&request->submit); + i915_sw_fence_commit(&rq->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + return prev; +} + +void i915_request_add(struct i915_request *rq) +{ + struct i915_request *prev; + + lockdep_assert_held(&rq->timeline->mutex); + lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); + + trace_i915_request_add(rq); + + prev = __i915_request_commit(rq); + /* * In typical scenarios, we do not expect the previous request on * the timeline to be still tracked by timeline->last_request if it @@ -1192,7 +1229,7 @@ void i915_request_add(struct i915_request *request) if (prev && i915_request_completed(prev)) i915_request_retire_upto(prev); - mutex_unlock(&request->timeline->mutex); + mutex_unlock(&rq->timeline->mutex); } static unsigned long local_clock_us(unsigned int *cpu) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a982664618c2..36f13b74ec58 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -239,6 +239,13 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) return fence->ops == &i915_fence_ops; } +struct i915_request * __must_check +__i915_request_create(struct intel_context *ce, gfp_t gfp); +struct i915_request * __must_check +i915_request_create(struct intel_context *ce); + +struct i915_request *__i915_request_commit(struct i915_request *request); + struct i915_request * __must_check i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index eb317759b5d3..5c496b11ab5c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -235,10 +235,9 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, static struct i915_request *alloc_request(struct intel_overlay *overlay) { - struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = dev_priv->engine[RCS0]; + struct intel_engine_cs *engine = overlay->i915->engine[RCS0]; - return i915_request_alloc(engine, dev_priv->kernel_context); + return i915_request_create(engine->kernel_context); } /* overlay needs to be disable in OCMD reg */ diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 27d8f853111b..eee838dc0634 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -46,7 +46,7 @@ static int __live_active_setup(struct drm_i915_private *i915, for_each_engine(engine, i915, id) { struct i915_request *rq; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index e43630b40fce..046a38743152 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -202,7 +202,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); + rq = i915_request_create(i915->engine[RCS0]->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 12fc53c694a6..12203d665a4e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -468,7 +468,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); + rq = i915_request_create(i915->engine[RCS0]->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index e6ffe2240126..098d7b3aa131 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -551,8 +551,7 @@ static int live_nop_request(void *arg) times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { - request = i915_request_alloc(engine, - i915->kernel_context); + request = i915_request_create(engine->kernel_context); if (IS_ERR(request)) { err = PTR_ERR(request); goto out_unlock; @@ -649,7 +648,7 @@ empty_request(struct intel_engine_cs *engine, struct i915_request *request; int err; - request = i915_request_alloc(engine, engine->i915->kernel_context); + request = i915_request_create(engine->kernel_context); if (IS_ERR(request)) return request; @@ -853,7 +852,7 @@ static int live_all_engines(void *arg) } for_each_engine(engine, i915, id) { - request[id] = i915_request_alloc(engine, i915->kernel_context); + request[id] = i915_request_create(engine->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed with err=%d\n", @@ -962,7 +961,7 @@ static int live_sequential_engines(void *arg) goto out_unlock; } - request[id] = i915_request_alloc(engine, i915->kernel_context); + request[id] = i915_request_create(engine->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed for %s with err=%d\n", diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index bd96afcadfe7..ff9ebe50fae8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -454,7 +454,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) goto out; } - rq = i915_request_alloc(engine, engine->i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) goto out_unpin; @@ -678,7 +678,7 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; -- cgit From 79ffac8599c4d8aa84d313920d3d86d7361c252b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Apr 2019 21:07:17 +0100 Subject: drm/i915: Invert the GEM wakeref hierarchy In the current scheme, on submitting a request we take a single global GEM wakeref, which trickles down to wake up all GT power domains. This is undesirable as we would like to be able to localise our power management to the available power domains and to remove the global GEM operations from the heart of the driver. (The intent there is to push global GEM decisions to the boundary as used by the GEM user interface.) Now during request construction, each request is responsible via its logical context to acquire a wakeref on each power domain it intends to utilize. Currently, each request takes a wakeref on the engine(s) and the engines themselves take a chipset wakeref. This gives us a transition on each engine which we can extend if we want to insert more powermangement control (such as soft rc6). The global GEM operations that currently require a struct_mutex are reduced to listening to pm events from the chipset GT wakeref. As we reduce the struct_mutex requirement, these listeners should evaporate. Perhaps the biggest immediate change is that this removes the struct_mutex requirement around GT power management, allowing us greater flexibility in request construction. Another important knock-on effect, is that by tracking engine usage, we can insert a switch back to the kernel context on that engine immediately, avoiding any extra delay or inserting global synchronisation barriers. This makes tracking when an engine and its associated contexts are idle much easier -- important for when we forgo our assumed execution ordering and need idle barriers to unpin used contexts. In the process, it means we remove a large chunk of code whose only purpose was to switch back to the kernel context. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Imre Deak Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 2 + drivers/gpu/drm/i915/gt/intel_context.c | 18 +- drivers/gpu/drm/i915/gt/intel_engine.h | 9 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 142 +----------- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 147 ++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_pm.h | 20 ++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 143 ++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_pm.h | 27 +++ drivers/gpu/drm/i915/gt/intel_hangcheck.c | 7 + drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +- drivers/gpu/drm/i915/gt/intel_reset.c | 101 +-------- drivers/gpu/drm/i915/gt/intel_reset.h | 1 - drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 13 +- drivers/gpu/drm/i915/gt/mock_engine.c | 3 + drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 49 +--- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 5 +- drivers/gpu/drm/i915/i915_debugfs.c | 16 +- drivers/gpu/drm/i915/i915_drv.c | 5 +- drivers/gpu/drm/i915/i915_drv.h | 8 +- drivers/gpu/drm/i915/i915_gem.c | 41 ++-- drivers/gpu/drm/i915/i915_gem.h | 3 - drivers/gpu/drm/i915/i915_gem_context.c | 85 +------ drivers/gpu/drm/i915/i915_gem_context.h | 4 - drivers/gpu/drm/i915/i915_gem_evict.c | 47 +--- drivers/gpu/drm/i915/i915_gem_pm.c | 264 +++++++--------------- drivers/gpu/drm/i915/i915_gem_pm.h | 3 - drivers/gpu/drm/i915/i915_gpu_error.h | 4 - drivers/gpu/drm/i915/i915_request.c | 10 +- drivers/gpu/drm/i915/i915_request.h | 2 +- drivers/gpu/drm/i915/intel_uc.c | 22 +- drivers/gpu/drm/i915/intel_uc.h | 2 +- drivers/gpu/drm/i915/selftests/i915_gem.c | 16 +- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 114 +--------- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 29 ++- drivers/gpu/drm/i915/selftests/igt_flush_test.c | 32 +-- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 15 +- 37 files changed, 592 insertions(+), 830 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.c create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.h create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 858642c7bc40..dd8d923aa1c6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -71,6 +71,8 @@ gt-y += \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ gt/intel_engine_cs.o \ + gt/intel_engine_pm.o \ + gt/intel_gt_pm.o \ gt/intel_hangcheck.o \ gt/intel_lrc.o \ gt/intel_reset.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 4410e20e8e13..298e463ad082 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -10,6 +10,7 @@ #include "intel_context.h" #include "intel_engine.h" +#include "intel_engine_pm.h" static struct i915_global_context { struct i915_global base; @@ -162,7 +163,11 @@ intel_context_pin(struct i915_gem_context *ctx, return ERR_PTR(-EINTR); if (likely(!atomic_read(&ce->pin_count))) { - err = ce->ops->pin(ce); + intel_wakeref_t wakeref; + + err = 0; + with_intel_runtime_pm(ce->engine->i915, wakeref) + err = ce->ops->pin(ce); if (err) goto err; @@ -269,17 +274,10 @@ int __init i915_global_context_init(void) void intel_context_enter_engine(struct intel_context *ce) { - struct drm_i915_private *i915 = ce->gem_context->i915; - - if (!i915->gt.active_requests++) - i915_gem_unpark(i915); + intel_engine_pm_get(ce->engine); } void intel_context_exit_engine(struct intel_context *ce) { - struct drm_i915_private *i915 = ce->gem_context->i915; - - GEM_BUG_ON(!i915->gt.active_requests); - if (!--i915->gt.active_requests) - i915_gem_park(i915); + intel_engine_pm_put(ce->engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 72c7c337ace9..a228dc1774d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -382,6 +382,8 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); +void intel_engine_init_execlists(struct intel_engine_cs *engine); + void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); @@ -458,19 +460,14 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine, { if (engine->reset.reset) engine->reset.reset(engine, stalled); + engine->serial++; /* contexts lost */ } -void intel_engines_sanitize(struct drm_i915_private *i915, bool force); -void intel_gt_resume(struct drm_i915_private *i915); - bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); void intel_engine_lost_context(struct intel_engine_cs *engine); -void intel_engines_park(struct drm_i915_private *i915); -void intel_engines_unpark(struct drm_i915_private *i915); - void intel_engines_reset_default_submission(struct drm_i915_private *i915); unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 79ac56748b90..cbebe812b317 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #include "intel_engine.h" +#include "intel_engine_pm.h" #include "intel_lrc.h" #include "intel_reset.h" @@ -451,7 +452,7 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) i915_gem_batch_pool_init(&engine->batch_pool, engine); } -static void intel_engine_init_execlist(struct intel_engine_cs *engine) +void intel_engine_init_execlists(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -584,10 +585,11 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); intel_engine_init_breadcrumbs(engine); - intel_engine_init_execlist(engine); + intel_engine_init_execlists(engine); intel_engine_init_hangcheck(engine); intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); + intel_engine_init__pm(engine); /* Use the whole device by default */ engine->sseu = @@ -758,30 +760,6 @@ err_unpin: return ret; } -void intel_gt_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* - * After resume, we may need to poke into the pinned kernel - * contexts to paper over any damage caused by the sudden suspend. - * Only the kernel contexts should remain pinned over suspend, - * allowing us to fixup the user contexts on their first pin. - */ - for_each_engine(engine, i915, id) { - struct intel_context *ce; - - ce = engine->kernel_context; - if (ce) - ce->ops->reset(ce); - - ce = engine->preempt_context; - if (ce) - ce->ops->reset(ce); - } -} - /** * intel_engines_cleanup_common - cleans up the engine state created by * the common initiailizers. @@ -1128,117 +1106,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) engine->set_default_submission(engine); } -static bool reset_engines(struct drm_i915_private *i915) -{ - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - return false; - - return intel_gpu_reset(i915, ALL_ENGINES) == 0; -} - -/** - * intel_engines_sanitize: called after the GPU has lost power - * @i915: the i915 device - * @force: ignore a failed reset and sanitize engine state anyway - * - * Anytime we reset the GPU, either with an explicit GPU reset or through a - * PCI power cycle, the GPU loses state and we must reset our state tracking - * to match. Note that calling intel_engines_sanitize() if the GPU has not - * been reset results in much confusion! - */ -void intel_engines_sanitize(struct drm_i915_private *i915, bool force) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - GEM_TRACE("\n"); - - if (!reset_engines(i915) && !force) - return; - - for_each_engine(engine, i915, id) - intel_engine_reset(engine, false); -} - -/** - * intel_engines_park: called when the GT is transitioning from busy->idle - * @i915: the i915 device - * - * The GT is now idle and about to go to sleep (maybe never to wake again?). - * Time for us to tidy and put away our toys (release resources back to the - * system). - */ -void intel_engines_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - /* Flush the residual irq tasklets first. */ - intel_engine_disarm_breadcrumbs(engine); - tasklet_kill(&engine->execlists.tasklet); - - /* - * We are committed now to parking the engines, make sure there - * will be no more interrupts arriving later and the engines - * are truly idle. - */ - if (wait_for(intel_engine_is_idle(engine), 10)) { - struct drm_printer p = drm_debug_printer(__func__); - - dev_err(i915->drm.dev, - "%s is not idle before parking\n", - engine->name); - intel_engine_dump(engine, &p, NULL); - } - - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); - - if (engine->park) - engine->park(engine); - - if (engine->pinned_default_state) { - i915_gem_object_unpin_map(engine->default_state); - engine->pinned_default_state = NULL; - } - - i915_gem_batch_pool_fini(&engine->batch_pool); - engine->execlists.no_priolist = false; - } - - i915->gt.active_engines = 0; -} - -/** - * intel_engines_unpark: called when the GT is transitioning from idle->busy - * @i915: the i915 device - * - * The GT was idle and now about to fire up with some new user requests. - */ -void intel_engines_unpark(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - void *map; - - /* Pin the default state for fast resets from atomic context. */ - map = NULL; - if (engine->default_state) - map = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (!IS_ERR_OR_NULL(map)) - engine->pinned_default_state = map; - - if (engine->unpark) - engine->unpark(engine); - - intel_engine_init_hangcheck(engine); - } -} - /** * intel_engine_lost_context: called when the GPU is reset into unknown state * @engine: the engine @@ -1523,6 +1390,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_reset_failed(engine->i915)) drm_printf(m, "*** WEDGED ***\n"); + drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); drm_printf(m, "\tHangcheck %x:%x [%d ms]\n", engine->hangcheck.last_seqno, engine->hangcheck.next_seqno, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c new file mode 100644 index 000000000000..3976aea3c1d1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -0,0 +1,147 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" + +#include "intel_engine.h" +#include "intel_engine_pm.h" +#include "intel_gt_pm.h" + +static int intel_engine_unpark(struct intel_wakeref *wf) +{ + struct intel_engine_cs *engine = + container_of(wf, typeof(*engine), wakeref); + void *map; + + GEM_TRACE("%s\n", engine->name); + + intel_gt_pm_get(engine->i915); + + /* Pin the default state for fast resets from atomic context. */ + map = NULL; + if (engine->default_state) + map = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR_OR_NULL(map)) + engine->pinned_default_state = map; + + if (engine->unpark) + engine->unpark(engine); + + intel_engine_init_hangcheck(engine); + return 0; +} + +void intel_engine_pm_get(struct intel_engine_cs *engine) +{ + intel_wakeref_get(engine->i915, &engine->wakeref, intel_engine_unpark); +} + +static bool switch_to_kernel_context(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + /* Already inside the kernel context, safe to power down. */ + if (engine->wakeref_serial == engine->serial) + return true; + + /* GPU is pointing to the void, as good as in the kernel context. */ + if (i915_reset_failed(engine->i915)) + return true; + + /* + * Note, we do this without taking the timeline->mutex. We cannot + * as we may be called while retiring the kernel context and so + * already underneath the timeline->mutex. Instead we rely on the + * exclusive property of the intel_engine_park that prevents anyone + * else from creating a request on this engine. This also requires + * that the ring is empty and we avoid any waits while constructing + * the context, as they assume protection by the timeline->mutex. + * This should hold true as we can only park the engine after + * retiring the last request, thus all rings should be empty and + * all timelines idle. + */ + rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + if (IS_ERR(rq)) + /* Context switch failed, hope for the best! Maybe reset? */ + return true; + + /* Check again on the next retirement. */ + engine->wakeref_serial = engine->serial + 1; + __i915_request_commit(rq); + + return false; +} + +static int intel_engine_park(struct intel_wakeref *wf) +{ + struct intel_engine_cs *engine = + container_of(wf, typeof(*engine), wakeref); + + /* + * If one and only one request is completed between pm events, + * we know that we are inside the kernel context and it is + * safe to power down. (We are paranoid in case that runtime + * suspend causes corruption to the active context image, and + * want to avoid that impacting userspace.) + */ + if (!switch_to_kernel_context(engine)) + return -EBUSY; + + GEM_TRACE("%s\n", engine->name); + + intel_engine_disarm_breadcrumbs(engine); + + /* Must be reset upon idling, or we may miss the busy wakeup. */ + GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); + + if (engine->park) + engine->park(engine); + + if (engine->pinned_default_state) { + i915_gem_object_unpin_map(engine->default_state); + engine->pinned_default_state = NULL; + } + + engine->execlists.no_priolist = false; + + intel_gt_pm_put(engine->i915); + return 0; +} + +void intel_engine_pm_put(struct intel_engine_cs *engine) +{ + intel_wakeref_put(engine->i915, &engine->wakeref, intel_engine_park); +} + +void intel_engine_init__pm(struct intel_engine_cs *engine) +{ + intel_wakeref_init(&engine->wakeref); +} + +int intel_engines_resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + intel_gt_pm_get(i915); + for_each_engine(engine, i915, id) { + intel_engine_pm_get(engine); + engine->serial++; /* kernel context lost */ + err = engine->resume(engine); + intel_engine_pm_put(engine); + if (err) { + dev_err(i915->drm.dev, + "Failed to restart %s (%d)\n", + engine->name, err); + break; + } + } + intel_gt_pm_put(i915); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h new file mode 100644 index 000000000000..143ac90ba117 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_PM_H +#define INTEL_ENGINE_PM_H + +struct drm_i915_private; +struct intel_engine_cs; + +void intel_engine_pm_get(struct intel_engine_cs *engine); +void intel_engine_pm_put(struct intel_engine_cs *engine); + +void intel_engine_init__pm(struct intel_engine_cs *engine); + +int intel_engines_resume(struct drm_i915_private *i915); + +#endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 3adf58da6d2c..d972c339309c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -20,6 +20,7 @@ #include "i915_selftest.h" #include "i915_timeline_types.h" #include "intel_sseu.h" +#include "intel_wakeref.h" #include "intel_workarounds_types.h" #define I915_MAX_SLICES 3 @@ -287,6 +288,10 @@ struct intel_engine_cs { struct intel_context *kernel_context; /* pinned */ struct intel_context *preempt_context; /* pinned; optional */ + unsigned long serial; + + unsigned long wakeref_serial; + struct intel_wakeref wakeref; struct drm_i915_gem_object *default_state; void *pinned_default_state; @@ -359,7 +364,7 @@ struct intel_engine_cs { void (*irq_enable)(struct intel_engine_cs *engine); void (*irq_disable)(struct intel_engine_cs *engine); - int (*init_hw)(struct intel_engine_cs *engine); + int (*resume)(struct intel_engine_cs *engine); struct { void (*prepare)(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c new file mode 100644 index 000000000000..ae7155f0e063 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -0,0 +1,143 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt_pm.h" +#include "intel_pm.h" +#include "intel_wakeref.h" + +static void pm_notify(struct drm_i915_private *i915, int state) +{ + blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915); +} + +static int intel_gt_unpark(struct intel_wakeref *wf) +{ + struct drm_i915_private *i915 = + container_of(wf, typeof(*i915), gt.wakeref); + + GEM_TRACE("\n"); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!i915->gt.awake); + + intel_enable_gt_powersave(i915); + + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + + i915_pmu_gt_unparked(i915); + + i915_queue_hangcheck(i915); + + pm_notify(i915, INTEL_GT_UNPARK); + + return 0; +} + +void intel_gt_pm_get(struct drm_i915_private *i915) +{ + intel_wakeref_get(i915, &i915->gt.wakeref, intel_gt_unpark); +} + +static int intel_gt_park(struct intel_wakeref *wf) +{ + struct drm_i915_private *i915 = + container_of(wf, typeof(*i915), gt.wakeref); + intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake); + + GEM_TRACE("\n"); + + pm_notify(i915, INTEL_GT_PARK); + + i915_pmu_gt_parked(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + + GEM_BUG_ON(!wakeref); + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + + return 0; +} + +void intel_gt_pm_put(struct drm_i915_private *i915) +{ + intel_wakeref_put(i915, &i915->gt.wakeref, intel_gt_park); +} + +void intel_gt_pm_init(struct drm_i915_private *i915) +{ + intel_wakeref_init(&i915->gt.wakeref); + BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications); +} + +static bool reset_engines(struct drm_i915_private *i915) +{ + if (INTEL_INFO(i915)->gpu_reset_clobbers_display) + return false; + + return intel_gpu_reset(i915, ALL_ENGINES) == 0; +} + +/** + * intel_gt_sanitize: called after the GPU has lost power + * @i915: the i915 device + * @force: ignore a failed reset and sanitize engine state anyway + * + * Anytime we reset the GPU, either with an explicit GPU reset or through a + * PCI power cycle, the GPU loses state and we must reset our state tracking + * to match. Note that calling intel_gt_sanitize() if the GPU has not + * been reset results in much confusion! + */ +void intel_gt_sanitize(struct drm_i915_private *i915, bool force) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + GEM_TRACE("\n"); + + if (!reset_engines(i915) && !force) + return; + + for_each_engine(engine, i915, id) + intel_engine_reset(engine, false); +} + +void intel_gt_resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * After resume, we may need to poke into the pinned kernel + * contexts to paper over any damage caused by the sudden suspend. + * Only the kernel contexts should remain pinned over suspend, + * allowing us to fixup the user contexts on their first pin. + */ + for_each_engine(engine, i915, id) { + struct intel_context *ce; + + ce = engine->kernel_context; + if (ce) + ce->ops->reset(ce); + + ce = engine->preempt_context; + if (ce) + ce->ops->reset(ce); + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h new file mode 100644 index 000000000000..7dd1130a19a4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_PM_H +#define INTEL_GT_PM_H + +#include + +struct drm_i915_private; + +enum { + INTEL_GT_UNPARK, + INTEL_GT_PARK, +}; + +void intel_gt_pm_get(struct drm_i915_private *i915); +void intel_gt_pm_put(struct drm_i915_private *i915); + +void intel_gt_pm_init(struct drm_i915_private *i915); + +void intel_gt_sanitize(struct drm_i915_private *i915, bool force); +void intel_gt_resume(struct drm_i915_private *i915); + +#endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 3053a706a561..e5eaa06fe74d 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -256,6 +256,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int hung = 0, stuck = 0, wedged = 0; + intel_wakeref_t wakeref; if (!i915_modparams.enable_hangcheck) return; @@ -266,6 +267,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (i915_terminally_wedged(dev_priv)) return; + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) + return; + /* As enabling the GPU requires fairly extensive mmio access, * periodically arm the mmio checker to see if we are triggering * any invalid access. @@ -313,6 +318,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (hung) hangcheck_declare_hang(dev_priv, hung, stuck); + intel_runtime_pm_put(dev_priv, wakeref); + /* Reset timer in case GPU hangs without another request being added */ i915_queue_hangcheck(dev_priv); } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index edec7f183688..d17c08e26935 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1789,7 +1789,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine) return unexpected; } -static int gen8_init_common_ring(struct intel_engine_cs *engine) +static int execlists_resume(struct intel_engine_cs *engine) { intel_engine_apply_workarounds(engine); intel_engine_apply_whitelist(engine); @@ -1822,7 +1822,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request * to a second via its execlists->tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. + * calling engine->resume() and also writing the ELSP. * Turning off the execlists->tasklet until the reset is over * prevents the race. */ @@ -2391,7 +2391,7 @@ static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ - engine->init_hw = gen8_init_common_ring; + engine->resume = execlists_resume; engine->reset.prepare = execlists_reset_prepare; engine->reset.reset = execlists_reset; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index cdf184403d46..3424d28650af 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -9,6 +9,8 @@ #include "i915_drv.h" #include "i915_gpu_error.h" +#include "intel_engine_pm.h" +#include "intel_gt_pm.h" #include "intel_reset.h" #include "intel_guc.h" @@ -680,6 +682,7 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * written to the powercontext is undefined and so we may lose * GPU state upon resume, i.e. fail to restart after a reset. */ + intel_engine_pm_get(engine); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); engine->reset.prepare(engine); } @@ -715,6 +718,7 @@ static void reset_prepare(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + intel_gt_pm_get(i915); for_each_engine(engine, i915, id) reset_prepare_engine(engine); @@ -752,48 +756,10 @@ static int gt_reset(struct drm_i915_private *i915, static void reset_finish_engine(struct intel_engine_cs *engine) { engine->reset.finish(engine); + intel_engine_pm_put(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); } -struct i915_gpu_restart { - struct work_struct work; - struct drm_i915_private *i915; -}; - -static void restart_work(struct work_struct *work) -{ - struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work); - struct drm_i915_private *i915 = arg->i915; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - - wakeref = intel_runtime_pm_get(i915); - mutex_lock(&i915->drm.struct_mutex); - WRITE_ONCE(i915->gpu_error.restart, NULL); - - for_each_engine(engine, i915, id) { - struct i915_request *rq; - - /* - * Ostensibily, we always want a context loaded for powersaving, - * so if the engine is idle after the reset, send a request - * to load our scratch kernel_context. - */ - if (!intel_engine_is_idle(engine)) - continue; - - rq = i915_request_create(engine->kernel_context); - if (!IS_ERR(rq)) - i915_request_add(rq); - } - - mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915, wakeref); - - kfree(arg); -} - static void reset_finish(struct drm_i915_private *i915) { struct intel_engine_cs *engine; @@ -803,29 +769,7 @@ static void reset_finish(struct drm_i915_private *i915) reset_finish_engine(engine); intel_engine_signal_breadcrumbs(engine); } -} - -static void reset_restart(struct drm_i915_private *i915) -{ - struct i915_gpu_restart *arg; - - /* - * Following the reset, ensure that we always reload context for - * powersaving, and to correct engine->last_retired_context. Since - * this requires us to submit a request, queue a worker to do that - * task for us to evade any locking here. - */ - if (READ_ONCE(i915->gpu_error.restart)) - return; - - arg = kmalloc(sizeof(*arg), GFP_KERNEL); - if (arg) { - arg->i915 = i915; - INIT_WORK(&arg->work, restart_work); - - WRITE_ONCE(i915->gpu_error.restart, arg); - queue_work(i915->wq, &arg->work); - } + intel_gt_pm_put(i915); } static void nop_submit_request(struct i915_request *request) @@ -886,6 +830,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * in nop_submit_request. */ synchronize_rcu_expedited(); + set_bit(I915_WEDGED, &error->flags); /* Mark all executing requests as skipped */ for_each_engine(engine, i915, id) @@ -893,9 +838,6 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) reset_finish(i915); - smp_mb__before_atomic(); - set_bit(I915_WEDGED, &error->flags); - GEM_TRACE("end\n"); } @@ -953,7 +895,7 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) } mutex_unlock(&i915->gt.timelines.mutex); - intel_engines_sanitize(i915, false); + intel_gt_sanitize(i915, false); /* * Undo nop_submit_request. We prevent all new i915 requests from @@ -1031,7 +973,6 @@ void i915_reset(struct drm_i915_private *i915, GEM_TRACE("flags=%lx\n", error->flags); might_sleep(); - assert_rpm_wakelock_held(i915); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); /* Clear any previous failed attempts at recovery. Time to try again. */ @@ -1084,8 +1025,6 @@ void i915_reset(struct drm_i915_private *i915, finish: reset_finish(i915); - if (!__i915_wedged(error)) - reset_restart(i915); return; taint: @@ -1134,6 +1073,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + if (!intel_wakeref_active(&engine->wakeref)) + return 0; + reset_prepare_engine(engine); if (msg) @@ -1165,7 +1107,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * have been reset to their default values. Follow the init_ring * process to program RING_MODE, HWSP and re-enable submission. */ - ret = engine->init_hw(engine); + ret = engine->resume(engine); if (ret) goto out; @@ -1422,25 +1364,6 @@ int i915_terminally_wedged(struct drm_i915_private *i915) return __i915_wedged(error) ? -EIO : 0; } -bool i915_reset_flush(struct drm_i915_private *i915) -{ - int err; - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - flush_workqueue(i915->wq); - GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart)); - - mutex_lock(&i915->drm.struct_mutex); - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - MAX_SCHEDULE_TIMEOUT); - mutex_unlock(&i915->drm.struct_mutex); - - return !err; -} - static void i915_wedge_me(struct work_struct *work) { struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 8e662bb43a9b..b52efaab4941 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -34,7 +34,6 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *reason); void i915_reset_request(struct i915_request *rq, bool guilty); -bool i915_reset_flush(struct drm_i915_private *i915); int __must_check i915_reset_trylock(struct drm_i915_private *i915); void i915_reset_unlock(struct drm_i915_private *i915, int tag); diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index f89541274d44..b791da2711e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -637,12 +637,15 @@ static bool stop_ring(struct intel_engine_cs *engine) return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0; } -static int init_ring_common(struct intel_engine_cs *engine) +static int xcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->buffer; int ret = 0; + GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n", + engine->name, ring->head, ring->tail); + intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); if (!stop_ring(engine)) { @@ -827,7 +830,7 @@ static int intel_rcs_ctx_init(struct i915_request *rq) return 0; } -static int init_render_ring(struct intel_engine_cs *engine) +static int rcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -869,7 +872,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN_RANGE(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return init_ring_common(engine); + return xcs_resume(engine); } static void cancel_requests(struct intel_engine_cs *engine) @@ -2201,7 +2204,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, intel_ring_init_irq(dev_priv, engine); - engine->init_hw = init_ring_common; + engine->resume = xcs_resume; engine->reset.prepare = reset_prepare; engine->reset.reset = reset_ring; engine->reset.finish = reset_finish; @@ -2263,7 +2266,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (IS_HASWELL(dev_priv)) engine->emit_bb_start = hsw_emit_bb_start; - engine->init_hw = init_render_ring; + engine->resume = rcs_resume; ret = intel_init_ring_buffer(engine); if (ret) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index bcfeb0c67997..a97a0ab35703 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -24,6 +24,7 @@ #include "i915_drv.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "mock_engine.h" #include "selftests/mock_request.h" @@ -268,6 +269,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); intel_engine_init_breadcrumbs(&engine->base); + intel_engine_init_execlists(&engine->base); + intel_engine_init__pm(&engine->base); /* fake hw queue */ spin_lock_init(&engine->hw_lock); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index acd33aa46068..9dece55a091c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -24,6 +24,8 @@ #include +#include "intel_engine_pm.h" + #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" @@ -479,19 +481,6 @@ static int igt_reset_nop(void *arg) break; } - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } - err = igt_flush_test(i915, 0); if (err) break; @@ -594,19 +583,6 @@ static int igt_reset_nop_engine(void *arg) err = -EINVAL; break; } - - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -669,6 +645,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); + intel_engine_pm_get(engine); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { @@ -721,21 +698,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) err = -EINVAL; break; } - - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + intel_engine_pm_put(engine); if (err) break; @@ -942,6 +907,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, get_task_struct(tsk); } + intel_engine_pm_get(engine); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { struct i915_request *rq = NULL; @@ -1018,6 +984,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + intel_engine_pm_put(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); @@ -1069,7 +1036,9 @@ unwind: if (err) break; - err = igt_flush_test(i915, 0); + mutex_lock(&i915->drm.struct_mutex); + err = igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index e61e47421ed2..6b9e9b6d82f7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -71,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { const u32 base = engine->mmio_base; struct drm_i915_gem_object *result; - intel_wakeref_t wakeref; struct i915_request *rq; struct i915_vma *vma; u32 srm, *cs; @@ -103,9 +102,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) if (err) goto err_obj; - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(engine->i915, wakeref) - rq = i915_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_pin; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f77263d42253..6972f9b6ae83 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2041,8 +2041,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) } seq_printf(m, "RPS enabled? %d\n", rps->enabled); - seq_printf(m, "GPU busy? %s [%d requests]\n", - yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); + seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); @@ -2061,9 +2060,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); - if (INTEL_GEN(dev_priv) >= 6 && - rps->enabled && - dev_priv->gt.active_requests) { + if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && dev_priv->gt.awake) { u32 rpup, rpupei; u32 rpdown, rpdownei; @@ -3093,9 +3090,9 @@ static int i915_engine_info(struct seq_file *m, void *unused) wakeref = intel_runtime_pm_get(dev_priv); - seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake)); - seq_printf(m, "Global active requests: %d\n", - dev_priv->gt.active_requests); + seq_printf(m, "GT awake? %s [%d]\n", + yesno(dev_priv->gt.awake), + atomic_read(&dev_priv->gt.wakeref.count)); seq_printf(m, "CS timestamp frequency: %u kHz\n", RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); @@ -3941,8 +3938,7 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_IDLE) { do { - if (READ_ONCE(i915->gt.active_requests)) - flush_delayed_work(&i915->gem.retire_work); + flush_delayed_work(&i915->gem.retire_work); drain_delayed_work(&i915->gem.idle_work); } while (READ_ONCE(i915->gt.awake)); } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ac416d2c02ca..824409ffd03f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -47,8 +47,9 @@ #include #include -#include "gt/intel_workarounds.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_reset.h" +#include "gt/intel_workarounds.h" #include "i915_drv.h" #include "i915_pmu.h" @@ -2323,7 +2324,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_power_domains_resume(dev_priv); - intel_engines_sanitize(dev_priv, true); + intel_gt_sanitize(dev_priv, true); enable_rpm_wakeref_asserts(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 45e027f45e62..5c77bf5b735b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2006,10 +2006,10 @@ struct drm_i915_private { struct list_head hwsp_free_list; } timelines; - intel_engine_mask_t active_engines; struct list_head active_rings; struct list_head closed_vma; - u32 active_requests; + + struct intel_wakeref wakeref; /** * Is the GPU currently considered idle, or busy executing @@ -2020,12 +2020,16 @@ struct drm_i915_private { */ intel_wakeref_t awake; + struct blocking_notifier_head pm_notifications; + ktime_t last_init_time; struct i915_vma *scratch; } gt; struct { + struct notifier_block pm_notifier; + /** * We leave the user IRQ off as much as possible, * but this means that requests will finish and never diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7f833c97138e..54f27cabae2a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -39,6 +39,8 @@ #include #include +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_mocs.h" #include "gt/intel_reset.h" #include "gt/intel_workarounds.h" @@ -2888,9 +2890,6 @@ wait_for_timelines(struct drm_i915_private *i915, struct i915_gt_timelines *gt = &i915->gt.timelines; struct i915_timeline *tl; - if (!READ_ONCE(i915->gt.active_requests)) - return timeout; - mutex_lock(>->mutex); list_for_each_entry(tl, >->active_list, link) { struct i915_request *rq; @@ -2930,9 +2929,10 @@ wait_for_timelines(struct drm_i915_private *i915, int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags, long timeout) { - GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", + GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n", flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", - timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); + timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "", + yesno(i915->gt.awake)); /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) @@ -4154,7 +4154,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * it may impact the display and we are uncertain about the stability * of the reset, so this could be applied to even earlier gen. */ - intel_engines_sanitize(i915, false); + intel_gt_sanitize(i915, false); intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(i915, wakeref); @@ -4212,27 +4212,6 @@ static void init_unused_rings(struct drm_i915_private *dev_priv) } } -static int __i915_gem_restart_engines(void *data) -{ - struct drm_i915_private *i915 = data; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - for_each_engine(engine, i915, id) { - err = engine->init_hw(engine); - if (err) { - DRM_ERROR("Failed to restart %s (%d)\n", - engine->name, err); - return err; - } - } - - intel_engines_set_scheduler_caps(i915); - - return 0; -} - int i915_gem_init_hw(struct drm_i915_private *dev_priv) { int ret; @@ -4291,12 +4270,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); /* Only when the HW is re-initialised, can we replay the requests */ - ret = __i915_gem_restart_engines(dev_priv); + ret = intel_engines_resume(dev_priv); if (ret) goto cleanup_uc; intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); + intel_engines_set_scheduler_caps(dev_priv); return 0; cleanup_uc: @@ -4602,6 +4582,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_init_hw: mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_set_wedged(dev_priv); i915_gem_suspend(dev_priv); i915_gem_suspend_late(dev_priv); @@ -4663,6 +4644,8 @@ err_uc_misc: void i915_gem_fini(struct drm_i915_private *dev_priv) { + GEM_BUG_ON(dev_priv->gt.awake); + i915_gem_suspend_late(dev_priv); intel_disable_gt_powersave(dev_priv); @@ -4757,6 +4740,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) { int err; + intel_gt_pm_init(dev_priv); + INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 9074eb1e843f..67f8a4a807a0 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -75,9 +75,6 @@ struct drm_i915_private; #define I915_GEM_IDLE_TIMEOUT (HZ / 5) -void i915_gem_park(struct drm_i915_private *i915); -void i915_gem_unpark(struct drm_i915_private *i915); - static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3eb1a664b5fa..76ed74e75d82 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -824,26 +824,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return 0; } -static struct i915_request * -last_request_on_engine(struct i915_timeline *timeline, - struct intel_engine_cs *engine) -{ - struct i915_request *rq; - - GEM_BUG_ON(timeline == &engine->timeline); - - rq = i915_active_request_raw(&timeline->last_request, - &engine->i915->drm.struct_mutex); - if (rq && rq->engine->mask & engine->mask) { - GEM_TRACE("last request on engine %s: %llx:%llu\n", - engine->name, rq->fence.context, rq->fence.seqno); - GEM_BUG_ON(rq->timeline != timeline); - return rq; - } - - return NULL; -} - struct context_barrier_task { struct i915_active base; void (*task)(void *data); @@ -871,7 +851,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; struct intel_context *ce, *next; - intel_wakeref_t wakeref; int err = 0; lockdep_assert_held(&i915->drm.struct_mutex); @@ -884,7 +863,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, i915_active_init(i915, &cb->base, cb_retire); i915_active_acquire(&cb->base); - wakeref = intel_runtime_pm_get(i915); rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) { struct intel_engine_cs *engine = ce->engine; struct i915_request *rq; @@ -914,7 +892,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (err) break; } - intel_runtime_pm_put(i915, wakeref); cb->task = err ? NULL : task; /* caller needs to unwind instead */ cb->data = data; @@ -924,54 +901,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, return err; } -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, - intel_engine_mask_t mask) -{ - struct intel_engine_cs *engine; - - GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake)); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!i915->kernel_context); - - /* Inoperable, so presume the GPU is safely pointing into the void! */ - if (i915_terminally_wedged(i915)) - return 0; - - for_each_engine_masked(engine, i915, mask, mask) { - struct intel_ring *ring; - struct i915_request *rq; - - rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - /* Queue this switch after all other activity */ - list_for_each_entry(ring, &i915->gt.active_rings, active_link) { - struct i915_request *prev; - - prev = last_request_on_engine(ring->timeline, engine); - if (!prev) - continue; - - if (prev->gem_context == i915->kernel_context) - continue; - - GEM_TRACE("add barrier on %s for %llx:%lld\n", - engine->name, - prev->fence.context, - prev->fence.seqno); - i915_sw_fence_await_sw_fence_gfp(&rq->submit, - &prev->submit, - I915_FENCE_GFP); - } - - i915_request_add(rq); - } - - return 0; -} - static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1169,9 +1098,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, static int gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) { - struct drm_i915_private *i915 = ce->engine->i915; struct i915_request *rq; - intel_wakeref_t wakeref; int ret; lockdep_assert_held(&ce->pin_mutex); @@ -1185,14 +1112,9 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) if (!intel_context_is_pinned(ce)) return 0; - /* Submitting requests etc needs the hw awake. */ - wakeref = intel_runtime_pm_get(i915); - rq = i915_request_create(ce->engine->kernel_context); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - goto out_put; - } + if (IS_ERR(rq)) + return PTR_ERR(rq); /* Queue this switch after all other activity by this context. */ ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); @@ -1216,9 +1138,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) out_add: i915_request_add(rq); -out_put: - intel_runtime_pm_put(i915, wakeref); - return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index cec278ab04e2..5a8e080499fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -141,10 +141,6 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); void i915_gem_context_close(struct drm_file *file); -int i915_switch_context(struct i915_request *rq); -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask); - void i915_gem_context_release(struct kref *ctx_ref); struct i915_gem_context * i915_gem_context_create_gvt(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 060f5903544a..0bdb3e072ba5 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -36,15 +36,8 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl { bool fail_if_busy:1; } igt_evict_ctl;) -static bool ggtt_is_idle(struct drm_i915_private *i915) -{ - return !i915->gt.active_requests; -} - static int ggtt_flush(struct drm_i915_private *i915) { - int err; - /* * Not everything in the GGTT is tracked via vma (otherwise we * could evict as required with minimal stalling) so we are forced @@ -52,19 +45,10 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines); - if (err) - return err; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) - return err; - - GEM_BUG_ON(!ggtt_is_idle(i915)); - return 0; + return i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); } static bool @@ -222,24 +206,17 @@ search_again: * us a termination condition, when the last retired context is * the kernel's there is no more we can evict. */ - if (!ggtt_is_idle(dev_priv)) { - if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) - return -EBUSY; + if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) + return -EBUSY; - ret = ggtt_flush(dev_priv); - if (ret) - return ret; + ret = ggtt_flush(dev_priv); + if (ret) + return ret; - cond_resched(); - goto search_again; - } + cond_resched(); - /* - * If we still have pending pageflip completions, drop - * back to userspace to give our workqueues time to - * acquire our locks and unpin the old scanouts. - */ - return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; + flags |= PIN_NONBLOCK; + goto search_again; found: /* drm_mm doesn't allow any other other operations while diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c index 9fb0e8d567a2..3554d55dae35 100644 --- a/drivers/gpu/drm/i915/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/i915_gem_pm.c @@ -4,136 +4,63 @@ * Copyright © 2019 Intel Corporation */ +#include "gt/intel_gt_pm.h" + #include "i915_drv.h" #include "i915_gem_pm.h" #include "i915_globals.h" -#include "intel_pm.h" -static void __i915_gem_park(struct drm_i915_private *i915) +static void i915_gem_park(struct drm_i915_private *i915) { - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); + struct intel_engine_cs *engine; + enum intel_engine_id id; lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); - GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); - - if (!i915->gt.awake) - return; - - /* - * Be paranoid and flush a concurrent interrupt to make sure - * we don't reactivate any irq tasklets after parking. - * - * FIXME: Note that even though we have waited for execlists to be idle, - * there may still be an in-flight interrupt even though the CSB - * is now empty. synchronize_irq() makes sure that a residual interrupt - * is completed before we continue, but it doesn't prevent the HW from - * raising a spurious interrupt later. To complete the shield we should - * coordinate disabling the CS irq with flushing the interrupts. - */ - synchronize_irq(i915->drm.irq); - - intel_engines_park(i915); - i915_timelines_park(i915); - - i915_pmu_gt_parked(i915); - i915_vma_parked(i915); - - wakeref = fetch_and_zero(&i915->gt.awake); - GEM_BUG_ON(!wakeref); - - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); - - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); - - i915_globals_park(); -} - -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, - unsigned long mask) -{ - bool result = true; - - /* - * Even if we fail to switch, give whatever is running a small chance - * to save itself before we report the failure. Yes, this may be a - * false positive due to e.g. ENOMEM, caveat emptor! - */ - if (i915_gem_switch_to_kernel_context(i915, mask)) - result = false; - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT)) - result = false; + for_each_engine(engine, i915, id) { + /* + * We are committed now to parking the engines, make sure there + * will be no more interrupts arriving later and the engines + * are truly idle. + */ + if (wait_for(intel_engine_is_idle(engine), 10)) { + struct drm_printer p = drm_debug_printer(__func__); - if (!result) { - if (i915_modparams.reset) { /* XXX hide warning from gem_eio */ dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); + "%s is not idle before parking\n", + engine->name); + intel_engine_dump(engine, &p, NULL); } + tasklet_kill(&engine->execlists.tasklet); - /* Forcibly cancel outstanding work and leave the gpu quiet. */ - i915_gem_set_wedged(i915); + i915_gem_batch_pool_fini(&engine->batch_pool); } - i915_retire_requests(i915); /* ensure we flush after wedging */ - return result; + i915_timelines_park(i915); + i915_vma_parked(i915); + + i915_globals_park(); } static void idle_work_handler(struct work_struct *work) { struct drm_i915_private *i915 = container_of(work, typeof(*i915), gem.idle_work.work); - bool rearm_hangcheck; - - if (!READ_ONCE(i915->gt.awake)) - return; - - if (READ_ONCE(i915->gt.active_requests)) - return; - - rearm_hangcheck = - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); if (!mutex_trylock(&i915->drm.struct_mutex)) { /* Currently busy, come back later */ mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(50)); - goto out_rearm; + return; } - /* - * Flush out the last user context, leaving only the pinned - * kernel context resident. Should anything unfortunate happen - * while we are idle (such as the GPU being power cycled), no users - * will be harmed. - */ - if (!work_pending(&i915->gem.idle_work.work) && - !i915->gt.active_requests) { - ++i915->gt.active_requests; /* don't requeue idle */ - - switch_to_kernel_context_sync(i915, i915->gt.active_engines); - - if (!--i915->gt.active_requests) { - __i915_gem_park(i915); - rearm_hangcheck = false; - } - } + intel_wakeref_lock(&i915->gt.wakeref); + if (!intel_wakeref_active(&i915->gt.wakeref)) + i915_gem_park(i915); + intel_wakeref_unlock(&i915->gt.wakeref); mutex_unlock(&i915->drm.struct_mutex); - -out_rearm: - if (rearm_hangcheck) { - GEM_BUG_ON(!i915->gt.awake); - i915_queue_hangcheck(i915); - } } static void retire_work_handler(struct work_struct *work) @@ -147,97 +74,76 @@ static void retire_work_handler(struct work_struct *work) mutex_unlock(&i915->drm.struct_mutex); } - /* - * Keep the retire handler running until we are finally idle. - * We do not need to do this test under locking as in the worst-case - * we queue the retire worker once too often. - */ - if (READ_ONCE(i915->gt.awake)) + if (intel_wakeref_active(&i915->gt.wakeref)) queue_delayed_work(i915->wq, &i915->gem.retire_work, round_jiffies_up_relative(HZ)); } -void i915_gem_park(struct drm_i915_private *i915) +static int pm_notifier(struct notifier_block *nb, + unsigned long action, + void *data) { - GEM_TRACE("\n"); + struct drm_i915_private *i915 = + container_of(nb, typeof(*i915), gem.pm_notifier); - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); + switch (action) { + case INTEL_GT_UNPARK: + i915_globals_unpark(); + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); + break; - if (!i915->gt.awake) - return; + case INTEL_GT_PARK: + mod_delayed_work(i915->wq, + &i915->gem.idle_work, + msecs_to_jiffies(100)); + break; + } - /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ - mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(100)); + return NOTIFY_OK; } -void i915_gem_unpark(struct drm_i915_private *i915) +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) { - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!i915->gt.active_requests); - assert_rpm_wakelock_held(i915); - - if (i915->gt.awake) - return; - - /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. - * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. - */ - i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - GEM_BUG_ON(!i915->gt.awake); - - i915_globals_unpark(); - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); + bool result = true; - i915_queue_hangcheck(i915); + do { + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + i915_gem_set_wedged(i915); + result = false; + } + } while (i915_retire_requests(i915) && result); - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); + GEM_BUG_ON(i915->gt.awake); + return result; } bool i915_gem_load_power_context(struct drm_i915_private *i915) { - /* Force loading the kernel context on all engines */ - if (!switch_to_kernel_context_sync(i915, ALL_ENGINES)) - return false; - - /* - * Immediately park the GPU so that we enable powersaving and - * treat it as idle. The next time we issue a request, we will - * unpark and start using the engine->pinned_default_state, otherwise - * it is in limbo and an early reset may fail. - */ - __i915_gem_park(i915); - - return true; + return switch_to_kernel_context_sync(i915); } void i915_gem_suspend(struct drm_i915_private *i915) { - intel_wakeref_t wakeref; - GEM_TRACE("\n"); - wakeref = intel_runtime_pm_get(i915); + flush_workqueue(i915->wq); mutex_lock(&i915->drm.struct_mutex); @@ -250,10 +156,16 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(i915, i915->gt.active_engines); + switch_to_kernel_context_sync(i915); mutex_unlock(&i915->drm.struct_mutex); - i915_reset_flush(i915); + + /* + * Assert that we successfully flushed all the work and + * reset the GPU back to its idle, low power state. + */ + GEM_BUG_ON(i915->gt.awake); + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); drain_delayed_work(&i915->gem.retire_work); @@ -263,17 +175,9 @@ void i915_gem_suspend(struct drm_i915_private *i915) */ drain_delayed_work(&i915->gem.idle_work); - flush_workqueue(i915->wq); - - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); + i915_gem_drain_freed_objects(i915); intel_uc_suspend(i915); - - intel_runtime_pm_put(i915, wakeref); } void i915_gem_suspend_late(struct drm_i915_private *i915) @@ -362,4 +266,8 @@ void i915_gem_init__pm(struct drm_i915_private *i915) { INIT_DELAYED_WORK(&i915->gem.idle_work, idle_work_handler); INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); + + i915->gem.pm_notifier.notifier_call = pm_notifier; + blocking_notifier_chain_register(&i915->gt.pm_notifications, + &i915->gem.pm_notifier); } diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h index 52f65e3f06b5..6f7d5d11ac3b 100644 --- a/drivers/gpu/drm/i915/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/i915_gem_pm.h @@ -17,9 +17,6 @@ void i915_gem_init__pm(struct drm_i915_private *i915); bool i915_gem_load_power_context(struct drm_i915_private *i915); void i915_gem_resume(struct drm_i915_private *i915); -void i915_gem_unpark(struct drm_i915_private *i915); -void i915_gem_park(struct drm_i915_private *i915); - void i915_gem_idle_work_handler(struct work_struct *work); void i915_gem_suspend(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index b419d0f59275..2ecd0c6a1c94 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -179,8 +179,6 @@ struct i915_gpu_state { struct scatterlist *sgl, *fit; }; -struct i915_gpu_restart; - struct i915_gpu_error { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ @@ -241,8 +239,6 @@ struct i915_gpu_error { wait_queue_head_t reset_queue; struct srcu_struct reset_backoff_srcu; - - struct i915_gpu_restart *restart; }; struct drm_i915_error_state_buf { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 705c125bafc6..11c484e679b6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -430,6 +430,8 @@ void __i915_request_submit(struct i915_request *request) /* Transfer from per-context onto the global per-engine timeline */ move_to_timeline(request, &engine->timeline); + engine->serial++; + trace_i915_request_execute(request); } @@ -1145,7 +1147,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) list_add_tail(&rq->ring_link, &ring->request_list); if (list_is_first(&rq->ring_link, &ring->request_list)) list_add(&ring->active_link, &rq->i915->gt.active_rings); - rq->i915->gt.active_engines |= rq->engine->mask; rq->emitted_jiffies = jiffies; /* @@ -1440,21 +1441,20 @@ out: return timeout; } -void i915_retire_requests(struct drm_i915_private *i915) +bool i915_retire_requests(struct drm_i915_private *i915) { struct intel_ring *ring, *tmp; lockdep_assert_held(&i915->drm.struct_mutex); - if (!i915->gt.active_requests) - return; - list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) { intel_ring_get(ring); /* last rq holds reference! */ ring_retire_requests(ring); intel_ring_put(ring); } + + return !list_empty(&i915->gt.active_rings); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 36f13b74ec58..1eee7416af31 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -425,6 +425,6 @@ static inline void i915_request_mark_complete(struct i915_request *rq) rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ } -void i915_retire_requests(struct drm_i915_private *i915); +bool i915_retire_requests(struct drm_i915_private *i915); #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 30a8e376d19f..01ea36e3150c 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -481,26 +481,22 @@ void intel_uc_reset_prepare(struct drm_i915_private *i915) intel_uc_sanitize(i915); } -int intel_uc_suspend(struct drm_i915_private *i915) +void intel_uc_suspend(struct drm_i915_private *i915) { struct intel_guc *guc = &i915->guc; + intel_wakeref_t wakeref; int err; - if (!USES_GUC(i915)) - return 0; - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - err = intel_guc_suspend(guc); - if (err) { - DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); - return err; - } + return; - guc_disable_communication(guc); + with_intel_runtime_pm(i915, wakeref) { + err = intel_guc_suspend(guc); + if (err) + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); - return 0; + guc_disable_communication(guc); + } } int intel_uc_resume(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index c14729786652..c92436b1f1c5 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -39,7 +39,7 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_uc_init(struct drm_i915_private *dev_priv); void intel_uc_fini(struct drm_i915_private *dev_priv); void intel_uc_reset_prepare(struct drm_i915_private *i915); -int intel_uc_suspend(struct drm_i915_private *dev_priv); +void intel_uc_suspend(struct drm_i915_private *i915); int intel_uc_resume(struct drm_i915_private *dev_priv); static inline bool intel_uc_is_using_guc(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 6fd70d326468..0342de369d3e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -16,26 +16,18 @@ static int switch_to_context(struct drm_i915_private *i915, { struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = 0; - - wakeref = intel_runtime_pm_get(i915); for_each_engine(engine, i915, id) { struct i915_request *rq; rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - break; - } + if (IS_ERR(rq)) + return PTR_ERR(rq); i915_request_add(rq); } - intel_runtime_pm_put(i915, wakeref); - - return err; + return 0; } static void trash_stolen(struct drm_i915_private *i915) @@ -120,7 +112,7 @@ static void pm_resume(struct drm_i915_private *i915) * that runtime-pm just works. */ with_intel_runtime_pm(i915, wakeref) { - intel_engines_sanitize(i915, false); + intel_gt_sanitize(i915, false); i915_gem_sanitize(i915); i915_gem_resume(i915); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 9d646fa1b74e..71d896bbade2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1608,113 +1608,6 @@ __engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) return "none"; } -static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, - struct i915_gem_context *ctx, - intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - int pass; - - GEM_TRACE("Testing %s\n", __engine_name(i915, engines)); - for (pass = 0; pass < 4; pass++) { /* Once busy; once idle; repeat */ - bool from_idle = pass & 1; - int err; - - if (!from_idle) { - for_each_engine_masked(engine, i915, engines, tmp) { - struct i915_request *rq; - - rq = i915_request_alloc(engine, ctx); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - i915_request_add(rq); - } - } - - err = i915_gem_switch_to_kernel_context(i915, - i915->gt.active_engines); - if (err) - return err; - - if (!from_idle) { - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) - return err; - } - - if (i915->gt.active_requests) { - pr_err("%d active requests remain after switching to kernel context, pass %d (%s) on %s engine%s\n", - i915->gt.active_requests, - pass, from_idle ? "idle" : "busy", - __engine_name(i915, engines), - is_power_of_2(engines) ? "" : "s"); - return -EINVAL; - } - - /* XXX Bonus points for proving we are the kernel context! */ - - mutex_unlock(&i915->drm.struct_mutex); - drain_delayed_work(&i915->gem.idle_work); - mutex_lock(&i915->drm.struct_mutex); - } - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - return -EIO; - - return 0; -} - -static int igt_switch_to_kernel_context(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err; - - /* - * A core premise of switching to the kernel context is that - * if an engine is already idling in the kernel context, we - * do not emit another request and wake it up. The other being - * that we do indeed end up idling in the kernel context. - */ - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - ctx = kernel_context(i915); - if (IS_ERR(ctx)) { - mutex_unlock(&i915->drm.struct_mutex); - return PTR_ERR(ctx); - } - - /* First check idling each individual engine */ - for_each_engine(engine, i915, id) { - err = __igt_switch_to_kernel_context(i915, ctx, BIT(id)); - if (err) - goto out_unlock; - } - - /* Now en masse */ - err = __igt_switch_to_kernel_context(i915, ctx, ALL_ENGINES); - if (err) - goto out_unlock; - -out_unlock: - GEM_TRACE_DUMP_ON(err); - - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - kernel_context_close(ctx); - return err; -} - static void mock_barrier_task(void *data) { unsigned int *counter = data; @@ -1729,7 +1622,6 @@ static int mock_context_barrier(void *arg) struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx; struct i915_request *rq; - intel_wakeref_t wakeref; unsigned int counter; int err; @@ -1772,9 +1664,7 @@ static int mock_context_barrier(void *arg) goto out; } - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(i915, wakeref) - rq = i915_request_alloc(i915->engine[RCS0], ctx); + rq = i915_request_alloc(i915->engine[RCS0], ctx); if (IS_ERR(rq)) { pr_err("Request allocation failed!\n"); goto out; @@ -1824,7 +1714,6 @@ unlock: int i915_gem_context_mock_selftests(void) { static const struct i915_subtest tests[] = { - SUBTEST(igt_switch_to_kernel_context), SUBTEST(mock_context_barrier), }; struct drm_i915_private *i915; @@ -1843,7 +1732,6 @@ int i915_gem_context_mock_selftests(void) int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) { static const struct i915_subtest tests[] = { - SUBTEST(igt_switch_to_kernel_context), SUBTEST(live_nop_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 12203d665a4e..088b2aa05dcd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -24,6 +24,7 @@ #include "../i915_selftest.h" +#include "igt_flush_test.h" #include "mock_gem_device.h" #include "huge_gem_object.h" @@ -505,19 +506,23 @@ static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_shrinker_unregister(i915); - mutex_lock(&i915->drm.struct_mutex); - if (!i915->gt.active_requests++) { - intel_wakeref_t wakeref; - - with_intel_runtime_pm(i915, wakeref) - i915_gem_unpark(i915); - } - mutex_unlock(&i915->drm.struct_mutex); + intel_gt_pm_get(i915); cancel_delayed_work_sync(&i915->gem.retire_work); cancel_delayed_work_sync(&i915->gem.idle_work); } +static void restore_retire_worker(struct drm_i915_private *i915) +{ + intel_gt_pm_put(i915); + + mutex_lock(&i915->drm.struct_mutex); + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_shrinker_register(i915); +} + static int igt_mmap_offset_exhaustion(void *arg) { struct drm_i915_private *i915 = arg; @@ -615,13 +620,7 @@ static int igt_mmap_offset_exhaustion(void *arg) out: drm_mm_remove_node(&resv); out_park: - mutex_lock(&i915->drm.struct_mutex); - if (--i915->gt.active_requests) - queue_delayed_work(i915->wq, &i915->gem.retire_work, 0); - else - queue_delayed_work(i915->wq, &i915->gem.idle_work, 0); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_shrinker_register(i915); + restore_retire_worker(i915); return err; err_obj: i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 94aee4071a66..e42f3c58536a 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -11,23 +11,29 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) { + int ret = i915_terminally_wedged(i915) ? -EIO : 0; + int repeat = !!(flags & I915_WAIT_LOCKED); + cond_resched(); - if (flags & I915_WAIT_LOCKED && - i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) { - pr_err("Failed to switch back to kernel context; declaring wedged\n"); - i915_gem_set_wedged(i915); - } + do { + if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) { + pr_err("%pS timed out, cancelling all further testing.\n", + __builtin_return_address(0)); - if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) { - pr_err("%pS timed out, cancelling all further testing.\n", - __builtin_return_address(0)); + GEM_TRACE("%pS timed out.\n", + __builtin_return_address(0)); + GEM_TRACE_DUMP(); - GEM_TRACE("%pS timed out.\n", __builtin_return_address(0)); - GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + repeat = 0; + ret = -EIO; + } - i915_gem_set_wedged(i915); - } + /* Ensure we also flush after wedging. */ + if (flags & I915_WAIT_LOCKED) + i915_retire_requests(i915); + } while (repeat--); - return i915_terminally_wedged(i915); + return ret; } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index fb677b4019a0..c072424c6b7c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -41,11 +41,10 @@ void mock_device_flush(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) - mock_engine_flush(engine); - - i915_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); + do { + for_each_engine(engine, i915, id) + mock_engine_flush(engine); + } while (i915_retire_requests(i915)); } static void mock_device_release(struct drm_device *dev) @@ -110,10 +109,6 @@ static void mock_retire_work_handler(struct work_struct *work) static void mock_idle_work_handler(struct work_struct *work) { - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work.work); - - i915->gt.active_engines = 0; } static int pm_domain_resume(struct device *dev) @@ -185,6 +180,8 @@ struct drm_i915_private *mock_gem_device(void) mock_uncore_init(&i915->uncore); i915_gem_init__mm(i915); + intel_gt_pm_init(i915); + atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.reset_queue); -- cgit From 8f2a1057d6ec217aefb8bf0de6996294452a2577 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 Apr 2019 06:01:43 +0100 Subject: drm/i915: Explicitly pin the logical context for execbuf In order to separate the reservation phase of building a request from its emission phase, we need to pull some of the request alloc activities from deep inside i915_request to the surface, GEM_EXECBUFFER. v2: Be frivolous, use a local drm_i915_private. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190425050143.811-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 108 ++++++++++++++++++----------- drivers/gpu/drm/i915/i915_request.c | 9 --- 2 files changed, 69 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3d672c9edb94..794af8edc6a2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,6 +34,8 @@ #include #include +#include "gt/intel_gt_pm.h" + #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_trace.h" @@ -236,7 +238,8 @@ struct i915_execbuffer { unsigned int *flags; struct intel_engine_cs *engine; /** engine to queue the request to */ - struct i915_gem_context *ctx; /** context for building the request */ + struct intel_context *context; /* logical state for the request */ + struct i915_gem_context *gem_context; /** caller's context */ struct i915_address_space *vm; /** GTT and vma for the request */ struct i915_request *request; /** our request to build */ @@ -738,7 +741,7 @@ static int eb_select_context(struct i915_execbuffer *eb) if (unlikely(!ctx)) return -ENOENT; - eb->ctx = ctx; + eb->gem_context = ctx; if (ctx->ppgtt) { eb->vm = &ctx->ppgtt->vm; eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; @@ -784,7 +787,6 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) static int eb_wait_for_ring(const struct i915_execbuffer *eb) { - const struct intel_context *ce; struct i915_request *rq; int ret = 0; @@ -794,11 +796,7 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb) * keeping all of their resources pinned. */ - ce = intel_context_lookup(eb->ctx, eb->engine); - if (!ce || !ce->ring) /* first use, assume empty! */ - return 0; - - rq = __eb_wait_for_ring(ce->ring); + rq = __eb_wait_for_ring(eb->context->ring); if (rq) { mutex_unlock(&eb->i915->drm.struct_mutex); @@ -817,15 +815,15 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb) static int eb_lookup_vmas(struct i915_execbuffer *eb) { - struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; + struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; struct drm_i915_gem_object *obj; unsigned int i, batch; int err; - if (unlikely(i915_gem_context_is_closed(eb->ctx))) + if (unlikely(i915_gem_context_is_closed(eb->gem_context))) return -ENOENT; - if (unlikely(i915_gem_context_is_banned(eb->ctx))) + if (unlikely(i915_gem_context_is_banned(eb->gem_context))) return -EIO; INIT_LIST_HEAD(&eb->relocs); @@ -870,8 +868,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) if (!vma->open_count++) i915_vma_reopen(vma); list_add(&lut->obj_link, &obj->lut_list); - list_add(&lut->ctx_link, &eb->ctx->handles_list); - lut->ctx = eb->ctx; + list_add(&lut->ctx_link, &eb->gem_context->handles_list); + lut->ctx = eb->gem_context; lut->handle = handle; add_vma: @@ -1227,7 +1225,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_request_alloc(eb->engine, eb->ctx); + rq = i915_request_create(eb->context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -2088,31 +2086,65 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { [I915_EXEC_VEBOX] = VECS0 }; -static struct intel_engine_cs * -eb_select_engine(struct drm_i915_private *dev_priv, +static int eb_pin_context(struct i915_execbuffer *eb, + struct intel_engine_cs *engine) +{ + struct intel_context *ce; + int err; + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + err = i915_terminally_wedged(eb->i915); + if (err) + return err; + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ce = intel_context_pin(eb->gem_context, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + eb->engine = engine; + eb->context = ce; + return 0; +} + +static void eb_unpin_context(struct i915_execbuffer *eb) +{ + intel_context_unpin(eb->context); +} + +static int +eb_select_engine(struct i915_execbuffer *eb, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args) { + struct drm_i915_private *i915 = eb->i915; unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; struct intel_engine_cs *engine; if (user_ring_id > I915_USER_RINGS) { DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return NULL; + return -EINVAL; } if ((user_ring_id != I915_EXEC_BSD) && ((args->flags & I915_EXEC_BSD_MASK) != 0)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return NULL; + return -EINVAL; } - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) { + if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file); + bsd_idx = gen8_dispatch_bsd_engine(i915, file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -2120,20 +2152,20 @@ eb_select_engine(struct drm_i915_private *dev_priv, } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return NULL; + return -EINVAL; } - engine = dev_priv->engine[_VCS(bsd_idx)]; + engine = i915->engine[_VCS(bsd_idx)]; } else { - engine = dev_priv->engine[user_ring_map[user_ring_id]]; + engine = i915->engine[user_ring_map[user_ring_id]]; } if (!engine) { DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return NULL; + return -EINVAL; } - return engine; + return eb_pin_context(eb, engine); } static void @@ -2275,7 +2307,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; - intel_wakeref_t wakeref; int out_fence_fd = -1; int err; @@ -2335,12 +2366,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - eb.engine = eb_select_engine(eb.i915, file, args); - if (!eb.engine) { - err = -EINVAL; - goto err_engine; - } - /* * Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the @@ -2348,16 +2373,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, * wakeref that we hold until the GPU has been idle for at least * 100ms. */ - wakeref = intel_runtime_pm_get(eb.i915); + intel_gt_pm_get(eb.i915); err = i915_mutex_lock_interruptible(dev); if (err) goto err_rpm; - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + err = eb_select_engine(&eb, file, args); if (unlikely(err)) goto err_unlock; + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + if (unlikely(err)) + goto err_engine; + err = eb_relocate(&eb); if (err) { /* @@ -2441,7 +2470,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, GEM_BUG_ON(eb.reloc_cache.rq); /* Allocate a request for this batch buffer nice and early. */ - eb.request = i915_request_alloc(eb.engine, eb.ctx); + eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { err = PTR_ERR(eb.request); goto err_batch_unpin; @@ -2479,8 +2508,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); err_request: - i915_request_add(eb.request); add_to_client(eb.request, file); + i915_request_add(eb.request); if (fences) signal_fence_array(&eb, fences); @@ -2502,12 +2531,13 @@ err_batch_unpin: err_vma: if (eb.exec) eb_release_vmas(&eb); +err_engine: + eb_unpin_context(&eb); err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: - intel_runtime_pm_put(eb.i915, wakeref); -err_engine: - i915_gem_context_put(eb.ctx); + intel_gt_pm_put(eb.i915); + i915_gem_context_put(eb.gem_context); err_destroy: eb_destroy(&eb); err_out_fence: diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 11c484e679b6..5869c37a35e1 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -785,7 +785,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) struct drm_i915_private *i915 = engine->i915; struct intel_context *ce; struct i915_request *rq; - int ret; /* * Preempt contexts are reserved for exclusive use to inject a @@ -794,14 +793,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ GEM_BUG_ON(ctx == i915->preempt_context); - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - ret = i915_terminally_wedged(i915); - if (ret) - return ERR_PTR(ret); - /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for -- cgit From 1f2b4a7edbc3b89c27f661b81a699095d922467c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 Apr 2019 06:43:33 +0100 Subject: drm/i915: Allow multiple user handles to the same VM It was noted that we made the same mistake for VM_ID as for object handles, whereby we ensured that we only allocated a single handle for one ppgtt. This has the unfortunate consequence for userspace that they need to reference count the handles to avoid destroying an active ID. If we allow multiple handles to the same ppgtt, userspace can freely unreference any handle they own without fear of destroying the same handle in use elsewhere. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190425054333.27299-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 26 ++++++++------------------ drivers/gpu/drm/i915/i915_gem_gtt.h | 2 -- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 76ed74e75d82..05496ea7a123 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -772,8 +772,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (err < 0) goto err_unlock; - GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */ - ppgtt->user_handle = err; + GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */ mutex_unlock(&file_priv->vm_idr_lock); @@ -811,10 +810,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; ppgtt = idr_remove(&file_priv->vm_idr, id); - if (ppgtt) { - GEM_BUG_ON(ppgtt->user_handle != id); - ppgtt->user_handle = 0; - } mutex_unlock(&file_priv->vm_idr_lock); if (!ppgtt) @@ -925,18 +920,15 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (ret) goto err_put; - if (!ppgtt->user_handle) { - ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); - GEM_BUG_ON(!ret); - if (ret < 0) - goto err_unlock; + ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); + GEM_BUG_ON(!ret); + if (ret < 0) + goto err_unlock; - ppgtt->user_handle = ret; - i915_ppgtt_get(ppgtt); - } + i915_ppgtt_get(ppgtt); args->size = 0; - args->value = ppgtt->user_handle; + args->value = ret; ret = 0; err_unlock: @@ -1027,10 +1019,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, return err; ppgtt = idr_find(&file_priv->vm_idr, args->value); - if (ppgtt) { - GEM_BUG_ON(ppgtt->user_handle != args->value); + if (ppgtt) i915_ppgtt_get(ppgtt); - } mutex_unlock(&file_priv->vm_idr_lock); if (!ppgtt) return -ENOENT; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f85b75db1f98..2fafa04c45ec 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -397,8 +397,6 @@ struct i915_hw_ppgtt { struct i915_page_directory_pointer pdp; /* GEN8+ */ struct i915_page_directory pd; /* GEN6-7 */ }; - - u32 user_handle; }; struct gen6_hw_ppgtt { -- cgit From a75d035fedbdecf83f86767aa2e4d05c8c4ffd95 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:18 +0100 Subject: drm/i915: Disable preemption and sleeping while using the punit sideband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While we talk to the punit over its sideband, we need to prevent the cpu from sleeping in order to prevent a potential machine hang. Note that by itself, it appears that pm_qos_update_request (via intel_idle) doesn't provide a sufficient barrier to ensure that all core are indeed awake (out of Cstate) and that the package is awake. To do so, we need to supplement the pm_qos with a manual ping on_each_cpu. v2: Restrict the heavy-weight wakeup to just the ISOF_PORT_PUNIT, there is insufficient evidence to implicate a wider problem atm. Similarly, restrict the w/a to Valleyview, as Cherryview doesn't have an angry cadre of users. The working theory, courtesy of Ville and Hans, is the issue lies within the power delivery and so is likely to be unit and board specific and occurs when both the unit/fw require extra power at the same time as the cpu package is changing its own power state. References: https://bugzilla.kernel.org/show_bug.cgi?id=109051 References: https://bugs.freedesktop.org/show_bug.cgi?id=102657 References: https://bugzilla.kernel.org/show_bug.cgi?id=195255 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Hans de Goede Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 6 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_sideband.c | 203 ++++++++++++++++++++++------------ 3 files changed, 139 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 824409ffd03f..aacc8dd6ecfd 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -886,6 +886,9 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->backlight_lock); mutex_init(&dev_priv->sb_lock); + pm_qos_add_request(&dev_priv->sb_qos, + PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); + mutex_init(&dev_priv->av_mutex); mutex_init(&dev_priv->wm.wm_mutex); mutex_init(&dev_priv->pps_mutex); @@ -945,6 +948,9 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) i915_gem_cleanup_early(dev_priv); i915_workqueues_cleanup(dev_priv); i915_engines_cleanup(dev_priv); + + pm_qos_remove_request(&dev_priv->sb_qos); + mutex_destroy(&dev_priv->sb_lock); } /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5c77bf5b735b..662dbd2e3245 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1561,6 +1561,7 @@ struct drm_i915_private { /* Sideband mailbox protection */ struct mutex sb_lock; + struct pm_qos_request sb_qos; /** Cached value of IMR to avoid reads in updating the bitfield */ union { diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 57de41b1f989..fc8913461622 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -22,6 +22,8 @@ * */ +#include + #include "i915_drv.h" #include "intel_drv.h" @@ -39,19 +41,50 @@ /* Private register write, double-word addressing, non-posted */ #define SB_CRWRDA_NP 0x07 -static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, - u32 port, u32 opcode, u32 addr, u32 *val) +static void ping(void *info) { - u32 cmd, be = 0xf, bar = 0; - bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP); +} - cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) | - (port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) | - (bar << IOSF_BAR_SHIFT); +static void __vlv_punit_get(struct drm_i915_private *i915) +{ + iosf_mbi_punit_acquire(); - WARN_ON(!mutex_is_locked(&dev_priv->sb_lock)); + /* + * Prevent the cpu from sleeping while we use this sideband, otherwise + * the punit may cause a machine hang. The issue appears to be isolated + * with changing the power state of the CPU package while changing + * the power state via the punit, and we have only observed it + * reliably on 4-core Baytail systems suggesting the issue is in the + * power delivery mechanism and likely to be be board/function + * specific. Hence we presume the workaround needs only be applied + * to the Valleyview P-unit and not all sideband communications. + */ + if (IS_VALLEYVIEW(i915)) { + pm_qos_update_request(&i915->sb_qos, 0); + on_each_cpu(ping, NULL, 1); + } +} - if (intel_wait_for_register(&dev_priv->uncore, +static void __vlv_punit_put(struct drm_i915_private *i915) +{ + if (IS_VALLEYVIEW(i915)) + pm_qos_update_request(&i915->sb_qos, PM_QOS_DEFAULT_VALUE); + + iosf_mbi_punit_release(); +} + +static int vlv_sideband_rw(struct drm_i915_private *i915, + u32 devfn, u32 port, u32 opcode, + u32 addr, u32 *val) +{ + struct intel_uncore *uncore = &i915->uncore; + const bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP); + int err; + + lockdep_assert_held(&i915->sb_lock); + + /* Flush the previous comms, just in case it failed last time. */ + if (intel_wait_for_register(uncore, VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0, 5)) { DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n", @@ -59,131 +92,156 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, return -EAGAIN; } - I915_WRITE(VLV_IOSF_ADDR, addr); - I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val); - I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd); - - if (intel_wait_for_register(&dev_priv->uncore, - VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0, - 5)) { + preempt_disable(); + + intel_uncore_write_fw(uncore, VLV_IOSF_ADDR, addr); + intel_uncore_write_fw(uncore, VLV_IOSF_DATA, is_read ? 0 : *val); + intel_uncore_write_fw(uncore, VLV_IOSF_DOORBELL_REQ, + (devfn << IOSF_DEVFN_SHIFT) | + (opcode << IOSF_OPCODE_SHIFT) | + (port << IOSF_PORT_SHIFT) | + (0xf << IOSF_BYTE_ENABLES_SHIFT) | + (0 << IOSF_BAR_SHIFT) | + IOSF_SB_BUSY); + + if (__intel_wait_for_register_fw(uncore, + VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0, + 10000, 0, NULL) == 0) { + if (is_read) + *val = intel_uncore_read_fw(uncore, VLV_IOSF_DATA); + err = 0; + } else { DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n", is_read ? "read" : "write"); - return -ETIMEDOUT; + err = -ETIMEDOUT; } - if (is_read) - *val = I915_READ(VLV_IOSF_DATA); + preempt_enable(); - return 0; + return err; } -u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) +u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + WARN_ON(!mutex_is_locked(&i915->pcu_lock)); + + mutex_lock(&i915->sb_lock); + __vlv_punit_get(i915); - mutex_lock(&dev_priv->sb_lock); - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRRDDA_NP, addr, &val); - mutex_unlock(&dev_priv->sb_lock); + + __vlv_punit_put(i915); + mutex_unlock(&i915->sb_lock); return val; } -int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) +int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) { int err; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + WARN_ON(!mutex_is_locked(&i915->pcu_lock)); - mutex_lock(&dev_priv->sb_lock); - err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + mutex_lock(&i915->sb_lock); + __vlv_punit_get(i915); + + err = vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRWRDA_NP, addr, &val); - mutex_unlock(&dev_priv->sb_lock); + + __vlv_punit_put(i915); + mutex_unlock(&i915->sb_lock); return err; } -u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg) +u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg) { u32 val = 0; - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT, SB_CRRDDA_NP, reg, &val); return val; } -void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val) { - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT, SB_CRWRDA_NP, reg, &val); } -u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr) +u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + WARN_ON(!mutex_is_locked(&i915->pcu_lock)); - mutex_lock(&dev_priv->sb_lock); - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC, + mutex_lock(&i915->sb_lock); + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_NC, SB_CRRDDA_NP, addr, &val); - mutex_unlock(&dev_priv->sb_lock); + mutex_unlock(&i915->sb_lock); return val; } -u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg) +u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg) { u32 val = 0; - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), port, + + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port, SB_CRRDDA_NP, reg, &val); + return val; } -void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, +void vlv_iosf_sb_write(struct drm_i915_private *i915, u8 port, u32 reg, u32 val) { - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), port, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port, SB_CRWRDA_NP, reg, &val); } -u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg) +u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg) { u32 val = 0; - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCK, + + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCK, SB_CRRDDA_NP, reg, &val); + return val; } -void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val) { - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCK, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCK, SB_CRWRDA_NP, reg, &val); } -u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg) +u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg) { u32 val = 0; - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCU, + + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCU, SB_CRRDDA_NP, reg, &val); + return val; } -void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val) { - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCU, + vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCU, SB_CRWRDA_NP, reg, &val); } -u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg) +u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg) { + int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)]; u32 val = 0; - vlv_sideband_rw(dev_priv, DPIO_DEVFN, DPIO_PHY_IOSF_PORT(DPIO_PHY(pipe)), - SB_MRD_NP, reg, &val); + vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MRD_NP, reg, &val); /* * FIXME: There might be some registers where all 1's is a valid value, @@ -195,10 +253,27 @@ u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg) return val; } -void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val) +void vlv_dpio_write(struct drm_i915_private *i915, + enum pipe pipe, int reg, u32 val) +{ + int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)]; + + vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MWR_NP, reg, &val); +} + +u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg) { - vlv_sideband_rw(dev_priv, DPIO_DEVFN, DPIO_PHY_IOSF_PORT(DPIO_PHY(pipe)), - SB_MWR_NP, reg, &val); + u32 val = 0; + + vlv_sideband_rw(i915, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP, + reg, &val); + return val; +} + +void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ + vlv_sideband_rw(i915, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP, + reg, &val); } /* SBI access */ @@ -279,17 +354,3 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, return; } } - -u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg) -{ - u32 val = 0; - vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP, - reg, &val); - return val; -} - -void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) -{ - vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP, - reg, &val); -} -- cgit From 221c78623ea5a1b94e1d53443ccf78fae0bc5982 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:19 +0100 Subject: drm/i915: Lift acquiring the vlv punit magic to a common sb-get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we now employ a very heavy pm_qos around the punit access, we want to minimise the number of synchronous requests by performing one for the whole punit sequence rather than around individual accesses. The sideband lock is used for this, so push the pm_qos into the sideband lock acquisition and release, moving it from the lowlevel punit rw routine to the callers. In the first step, we move the punit magic into the common sideband lock so that we can acquire a bunch of ports simultaneously, and if need be extend the workaround protection later. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 124 ++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_cdclk.c | 6 +- drivers/gpu/drm/i915/intel_display.c | 37 +++++----- drivers/gpu/drm/i915/intel_dp.c | 4 +- drivers/gpu/drm/i915/intel_dpio_phy.c | 37 +++++----- drivers/gpu/drm/i915/intel_dsi_vbt.c | 8 +-- drivers/gpu/drm/i915/intel_hdmi.c | 4 +- drivers/gpu/drm/i915/intel_pm.c | 4 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 8 +-- drivers/gpu/drm/i915/intel_sideband.c | 45 +++++++----- drivers/gpu/drm/i915/vlv_dsi.c | 8 +-- drivers/gpu/drm/i915/vlv_dsi_pll.c | 14 ++-- 12 files changed, 206 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 662dbd2e3245..b2215fb7f562 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3442,25 +3442,119 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); /* intel_sideband.c */ -u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); -int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); -u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr); -u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg); -void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val); -u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); -u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); -u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); -u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg); -void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val); + +enum { + VLV_IOSF_SB_BUNIT, + VLV_IOSF_SB_CCK, + VLV_IOSF_SB_CCU, + VLV_IOSF_SB_DPIO, + VLV_IOSF_SB_FLISDSI, + VLV_IOSF_SB_GPIO, + VLV_IOSF_SB_NC, + VLV_IOSF_SB_PUNIT, +}; + +void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports); +u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg); +void vlv_iosf_sb_write(struct drm_i915_private *i915, + u8 port, u32 reg, u32 val); +void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports); + +static inline void vlv_bunit_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_BUNIT)); +} + +u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg); +void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val); + +static inline void vlv_bunit_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_BUNIT)); +} + +static inline void vlv_cck_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCK)); +} + +u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg); +void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val); + +static inline void vlv_cck_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCK)); +} + +static inline void vlv_ccu_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCU)); +} + +u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg); +void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val); + +static inline void vlv_ccu_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCU)); +} + +static inline void vlv_dpio_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_DPIO)); +} + +u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg); +void vlv_dpio_write(struct drm_i915_private *i915, + enum pipe pipe, int reg, u32 val); + +static inline void vlv_dpio_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_DPIO)); +} + +static inline void vlv_flisdsi_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_FLISDSI)); +} + +u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg); +void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val); + +static inline void vlv_flisdsi_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_FLISDSI)); +} + +static inline void vlv_nc_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_NC)); +} + +u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr); + +static inline void vlv_nc_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_NC)); +} + +static inline void vlv_punit_get(struct drm_i915_private *i915) +{ + vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_PUNIT)); +} + +u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr); +int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val); + +static inline void vlv_punit_put(struct drm_i915_private *i915) +{ + vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_PUNIT)); +} + u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, enum intel_sbi_destination destination); void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, enum intel_sbi_destination destination); -u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); /* intel_dpio_phy.c */ void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port, diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index ae40a8679314..5845d0a37599 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -557,7 +557,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, } mutex_unlock(&dev_priv->pcu_lock); - mutex_lock(&dev_priv->sb_lock); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT)); if (cdclk == 400000) { u32 divider; @@ -591,7 +592,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, val |= 3000 / 250; /* 3.0 usec */ vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); - mutex_unlock(&dev_priv->sb_lock); + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT)); intel_update_cdclk(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c3d1d38ccf4d..297efab33b44 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -152,10 +152,10 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; /* Obtain SKU information */ - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) & CCK_FUSE_HPLL_FREQ_MASK; - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); return vco_freq[hpll_freq] * 1000; } @@ -166,9 +166,9 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, u32 val; int divider; - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, reg); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); divider = val & CCK_FREQUENCY_VALUES; @@ -1093,9 +1093,9 @@ void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state) u32 val; bool cur_state; - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); cur_state = val & DSI_PLL_VCO_EN; I915_STATE_WARN(cur_state != state, @@ -1405,14 +1405,14 @@ static void _chv_enable_pll(struct intel_crtc *crtc, enum dpio_channel port = vlv_pipe_to_channel(pipe); u32 tmp; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Enable back the 10bit clock to display controller */ tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); tmp |= DPIO_DCLKP_EN; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); /* * Need to wait > 100ns between dclkp clock enable bit and PLL enable. @@ -1569,14 +1569,14 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) I915_WRITE(DPLL(pipe), val); POSTING_READ(DPLL(pipe)); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Disable 10bit clock to display controller */ val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); val &= ~DPIO_DCLKP_EN; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void vlv_wait_port_ready(struct drm_i915_private *dev_priv, @@ -7276,7 +7276,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) return; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); bestn = pipe_config->dpll.n; bestm1 = pipe_config->dpll.m1; @@ -7353,7 +7353,8 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk); vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000); - mutex_unlock(&dev_priv->sb_lock); + + vlv_dpio_put(dev_priv); } static void chv_prepare_pll(struct intel_crtc *crtc, @@ -7386,7 +7387,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, dpio_val = 0; loopfilter = 0; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* p1 and p2 divider */ vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port), @@ -7458,7 +7459,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) | DPIO_AFC_RECAL); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } /** @@ -8084,9 +8085,9 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc, if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) return; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe)); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7; clock.m2 = mdiv & DPIO_M2DIV_MASK; @@ -8195,13 +8196,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) return; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port)); pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port)); pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port)); pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port)); pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; clock.m2 = (pll_dw0 & 0xff) << 22; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 560274d1c50b..4fc25dcc97d4 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3148,12 +3148,12 @@ static void chv_post_disable_dp(struct intel_encoder *encoder, intel_dp_link_down(encoder, old_crtc_state); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, old_crtc_state, true); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } static void diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index ab4ac7158b79..c784f3daaf51 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -648,7 +648,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, u32 val; int i; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Clear calc init */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); @@ -729,8 +729,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); } - mutex_unlock(&dev_priv->sb_lock); - + vlv_dpio_put(dev_priv); } void chv_data_lane_soft_reset(struct intel_encoder *encoder, @@ -800,7 +799,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, chv_phy_powergate_lanes(encoder, true, lane_mask); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, crtc_state, true); @@ -855,7 +854,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, val |= CHV_CMN_USEDCLKCHANNEL; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, @@ -870,7 +869,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, int data, i, stagger; u32 val; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* allow hardware to manage TX FIFO reset source */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch)); @@ -935,7 +934,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, /* Deassert data lane reset */ chv_data_lane_soft_reset(encoder, crtc_state, false); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void chv_phy_release_cl2_override(struct intel_encoder *encoder) @@ -956,7 +955,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder, enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe; u32 val; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* disable left/right clock distribution */ if (pipe != PIPE_B) { @@ -969,7 +968,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val); } - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); /* * Leave the power down bit cleared for at least one @@ -993,7 +992,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); + vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port), @@ -1006,7 +1006,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN); - mutex_unlock(&dev_priv->sb_lock); + + vlv_dpio_put(dev_priv); } void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, @@ -1019,7 +1020,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, enum pipe pipe = crtc->pipe; /* Program Tx lane resets to default */ - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); + vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); @@ -1033,7 +1035,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000); - mutex_unlock(&dev_priv->sb_lock); + + vlv_dpio_put(dev_priv); } void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, @@ -1047,7 +1050,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, enum pipe pipe = crtc->pipe; u32 val; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Enable clock channels for this port */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port)); @@ -1063,7 +1066,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void vlv_phy_reset_lanes(struct intel_encoder *encoder, @@ -1075,8 +1078,8 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder, enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = crtc->pipe; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 3074448446bc..2304488f2d35 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -248,7 +248,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv, pconf0 = VLV_GPIO_PCONF0(map->base_offset); padval = VLV_GPIO_PAD_VAL(map->base_offset); - mutex_lock(&dev_priv->sb_lock); + vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO)); if (!map->init) { /* FIXME: remove constant below */ vlv_iosf_sb_write(dev_priv, port, pconf0, 0x2000CC00); @@ -257,7 +257,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv, tmp = 0x4 | value; vlv_iosf_sb_write(dev_priv, port, padval, tmp); - mutex_unlock(&dev_priv->sb_lock); + vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO)); } static void chv_exec_gpio(struct drm_i915_private *dev_priv, @@ -303,12 +303,12 @@ static void chv_exec_gpio(struct drm_i915_private *dev_priv, cfg0 = CHV_GPIO_PAD_CFG0(family_num, gpio_index); cfg1 = CHV_GPIO_PAD_CFG1(family_num, gpio_index); - mutex_lock(&dev_priv->sb_lock); + vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO)); vlv_iosf_sb_write(dev_priv, port, cfg1, 0); vlv_iosf_sb_write(dev_priv, port, cfg0, CHV_GPIO_GPIOEN | CHV_GPIO_GPIOCFG_GPO | CHV_GPIO_GPIOTXSTATE(value)); - mutex_unlock(&dev_priv->sb_lock); + vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO)); } static void bxt_exec_gpio(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e1005d7b75fd..8b72365f9309 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -2630,12 +2630,12 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, old_crtc_state, true); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } static void chv_hdmi_pre_enable(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7aa9a8c12b54..6c2f416b95a6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7803,9 +7803,9 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); switch ((val >> 2) & 0x7) { case 3: diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index d4f4262d0fee..9c1294c29566 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1569,7 +1569,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, 1)) DRM_ERROR("Display PHY %d is not power up\n", phy); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Enable dynamic power down */ tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW28); @@ -1592,7 +1592,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, tmp); } - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(phy); I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control); @@ -1655,9 +1655,9 @@ static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpi else reg = _CHV_CMN_DW6_CH1; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); val = vlv_dpio_read(dev_priv, pipe, reg); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); /* * This assumes !override is only used when the port is disabled. diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index fc8913461622..b2fc605e2e29 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -73,6 +73,22 @@ static void __vlv_punit_put(struct drm_i915_private *i915) iosf_mbi_punit_release(); } +void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports) +{ + if (ports & BIT(VLV_IOSF_SB_PUNIT)) + __vlv_punit_get(i915); + + mutex_lock(&i915->sb_lock); +} + +void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports) +{ + mutex_unlock(&i915->sb_lock); + + if (ports & BIT(VLV_IOSF_SB_PUNIT)) + __vlv_punit_put(i915); +} + static int vlv_sideband_rw(struct drm_i915_private *i915, u32 devfn, u32 port, u32 opcode, u32 addr, u32 *val) @@ -82,6 +98,8 @@ static int vlv_sideband_rw(struct drm_i915_private *i915, int err; lockdep_assert_held(&i915->sb_lock); + if (port == IOSF_PORT_PUNIT) + iosf_mbi_assert_punit_acquired(); /* Flush the previous comms, just in case it failed last time. */ if (intel_wait_for_register(uncore, @@ -125,16 +143,14 @@ u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&i915->pcu_lock)); + lockdep_assert_held(&i915->pcu_lock); - mutex_lock(&i915->sb_lock); - __vlv_punit_get(i915); + vlv_punit_get(i915); vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRRDDA_NP, addr, &val); - __vlv_punit_put(i915); - mutex_unlock(&i915->sb_lock); + vlv_punit_put(i915); return val; } @@ -143,16 +159,14 @@ int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) { int err; - WARN_ON(!mutex_is_locked(&i915->pcu_lock)); + lockdep_assert_held(&i915->pcu_lock); - mutex_lock(&i915->sb_lock); - __vlv_punit_get(i915); + vlv_punit_get(i915); err = vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRWRDA_NP, addr, &val); - __vlv_punit_put(i915); - mutex_unlock(&i915->sb_lock); + vlv_punit_put(i915); return err; } @@ -177,12 +191,10 @@ u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&i915->pcu_lock)); - - mutex_lock(&i915->sb_lock); + vlv_nc_get(i915); vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_NC, SB_CRRDDA_NP, addr, &val); - mutex_unlock(&i915->sb_lock); + vlv_nc_put(i915); return val; } @@ -281,7 +293,8 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, enum intel_sbi_destination destination) { u32 value = 0; - WARN_ON(!mutex_is_locked(&dev_priv->sb_lock)); + + lockdep_assert_held(&dev_priv->sb_lock); if (intel_wait_for_register(&dev_priv->uncore, SBI_CTL_STAT, SBI_BUSY, 0, @@ -321,7 +334,7 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, { u32 tmp; - WARN_ON(!mutex_is_locked(&dev_priv->sb_lock)); + lockdep_assert_held(&dev_priv->sb_lock); if (intel_wait_for_register(&dev_priv->uncore, SBI_CTL_STAT, SBI_BUSY, 0, diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index e0b1ec821960..dc1839bfde3e 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -248,7 +248,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs, static void band_gap_reset(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->sb_lock); + vlv_flisdsi_get(dev_priv); vlv_flisdsi_write(dev_priv, 0x08, 0x0001); vlv_flisdsi_write(dev_priv, 0x0F, 0x0005); @@ -257,7 +257,7 @@ static void band_gap_reset(struct drm_i915_private *dev_priv) vlv_flisdsi_write(dev_priv, 0x0F, 0x0000); vlv_flisdsi_write(dev_priv, 0x08, 0x0000); - mutex_unlock(&dev_priv->sb_lock); + vlv_flisdsi_put(dev_priv); } static int bdw_get_pipemisc_bpp(struct intel_crtc *crtc) @@ -515,11 +515,11 @@ static void vlv_dsi_device_ready(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->sb_lock); + vlv_flisdsi_get(dev_priv); /* program rcomp for compliance, reduce from 50 ohms to 45 ohms * needed everytime after power gate */ vlv_flisdsi_write(dev_priv, 0x04, 0x0004); - mutex_unlock(&dev_priv->sb_lock); + vlv_flisdsi_put(dev_priv); /* bandgap reset is needed after everytime we do power gate */ band_gap_reset(dev_priv); diff --git a/drivers/gpu/drm/i915/vlv_dsi_pll.c b/drivers/gpu/drm/i915/vlv_dsi_pll.c index 5e7b1fb2db5d..25b811174f5c 100644 --- a/drivers/gpu/drm/i915/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/vlv_dsi_pll.c @@ -149,7 +149,7 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder, DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, 0); vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_DIVIDER, config->dsi_pll.div); @@ -166,11 +166,11 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder, if (wait_for(vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL) & DSI_PLL_LOCK, 20)) { - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); DRM_ERROR("DSI PLL lock failed\n"); return; } - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); DRM_DEBUG_KMS("DSI PLL locked\n"); } @@ -182,14 +182,14 @@ void vlv_dsi_pll_disable(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); tmp = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); tmp &= ~DSI_PLL_VCO_EN; tmp |= DSI_PLL_LDO_GATE; vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, tmp); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); } bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) @@ -266,10 +266,10 @@ u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); pll_ctl = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); pll_div = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_DIVIDER); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); config->dsi_pll.ctrl = pll_ctl & ~DSI_PLL_LOCK; config->dsi_pll.div = pll_div; -- cgit From 337fa6e04d40216e9f462b23b86d9e62f93c3d48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:20 +0100 Subject: drm/i915: Lift sideband locking for vlv_punit_(read|write) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lift the sideband acquisition for vlv_punit_read and vlv_punit_write into their callers, so that we can lock the sideband once for a sequence of operations, rather than perform the heavyweight acquisition on each request. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++++ drivers/gpu/drm/i915/i915_sysfs.c | 14 +++++----- drivers/gpu/drm/i915/intel_cdclk.c | 23 +++++++++++++---- drivers/gpu/drm/i915/intel_display.c | 16 +++++++----- drivers/gpu/drm/i915/intel_pm.c | 46 +++++++++++++++++++++++++++------ drivers/gpu/drm/i915/intel_runtime_pm.c | 10 +++++++ drivers/gpu/drm/i915/intel_sideband.c | 18 ++----------- 7 files changed, 89 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6972f9b6ae83..f8472db460bb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1057,7 +1057,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused) yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); + vlv_punit_get(dev_priv); freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); + seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); @@ -2030,8 +2033,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) with_intel_runtime_pm_if_in_use(dev_priv, wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); act_freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); act_freq = (act_freq >> 8) & 0xff; mutex_unlock(&dev_priv->pcu_lock); } else { diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 41313005af42..bfabb3de4808 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -259,25 +259,25 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); intel_wakeref_t wakeref; - int ret; + u32 freq; wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - u32 freq; + vlv_punit_get(dev_priv); freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff); + vlv_punit_put(dev_priv); + + freq = (freq >> 8) & 0xff; } else { - ret = intel_gpu_freq(dev_priv, - intel_get_cagf(dev_priv, - I915_READ(GEN6_RPSTAT1))); + freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv, wakeref); - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq)); } static ssize_t gt_cur_freq_mhz_show(struct device *kdev, diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 5845d0a37599..9dd22203a7e8 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -464,13 +464,19 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv, { u32 val; + mutex_lock(&dev_priv->pcu_lock); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, cdclk_state->vco); - mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); + + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); mutex_unlock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv)) @@ -545,6 +551,11 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, */ wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_CCK) | + BIT(VLV_IOSF_SB_BUNIT) | + BIT(VLV_IOSF_SB_PUNIT)); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK; @@ -557,9 +568,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, } mutex_unlock(&dev_priv->pcu_lock); - vlv_iosf_sb_get(dev_priv, - BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT)); - if (cdclk == 400000) { u32 divider; @@ -593,7 +601,9 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); vlv_iosf_sb_put(dev_priv, - BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT)); + BIT(VLV_IOSF_SB_CCK) | + BIT(VLV_IOSF_SB_BUNIT) | + BIT(VLV_IOSF_SB_PUNIT)); intel_update_cdclk(dev_priv); @@ -630,6 +640,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK_CHV; val |= (cmd << DSPFREQGUAR_SHIFT_CHV); @@ -639,6 +650,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } + + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 297efab33b44..8eef0e732ac9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -152,10 +152,8 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; /* Obtain SKU information */ - vlv_cck_get(dev_priv); hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) & CCK_FUSE_HPLL_FREQ_MASK; - vlv_cck_put(dev_priv); return vco_freq[hpll_freq] * 1000; } @@ -166,10 +164,7 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, u32 val; int divider; - vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, reg); - vlv_cck_put(dev_priv); - divider = val & CCK_FREQUENCY_VALUES; WARN((val & CCK_FREQUENCY_STATUS) != @@ -182,11 +177,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, const char *name, u32 reg) { + int hpll; + + vlv_cck_get(dev_priv); + if (dev_priv->hpll_freq == 0) dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv); - return vlv_get_cck_clock(dev_priv, name, reg, - dev_priv->hpll_freq); + hpll = vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq); + + vlv_cck_put(dev_priv); + + return hpll; } static void intel_update_czclk(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6c2f416b95a6..9db39ea9bd83 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -318,6 +318,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) u32 val; mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); if (enable) @@ -332,6 +333,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -340,6 +342,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) u32 val; mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); if (enable) @@ -348,6 +351,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) val &= ~DSP_MAXFIFO_PM5_ENABLE; vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val); + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -6140,6 +6144,7 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) if (IS_CHERRYVIEW(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); if (val & DSP_MAXFIFO_PM5_ENABLE) @@ -6169,6 +6174,7 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) wm->level = VLV_WM_LEVEL_DDR_DVFS; } + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -6743,7 +6749,9 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); if (val != dev_priv->gt_pm.rps.cur_freq) { + vlv_punit_get(dev_priv); err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + vlv_punit_put(dev_priv); if (err) return err; @@ -7755,6 +7763,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) valleyview_setup_pctx(dev_priv); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + vlv_init_gpll_ref_freq(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); @@ -7792,6 +7805,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, rps->min_freq), rps->min_freq); + + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) @@ -7801,11 +7819,14 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) cherryview_setup_pctx(dev_priv); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + vlv_init_gpll_ref_freq(dev_priv); - vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); - vlv_cck_put(dev_priv); switch ((val >> 2) & 0x7) { case 3: @@ -7838,6 +7859,11 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) intel_gpu_freq(dev_priv, rps->min_freq), rps->min_freq); + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | rps->min_freq) & 1, "Odd GPU freq values\n"); @@ -7925,13 +7951,15 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_DOWN_IDLE_AVG); /* Setting Fixed Bias */ - val = VLV_OVERRIDE_EN | - VLV_SOC_TDP_EN | - CHV_BIAS_CPU_50_SOC_50; + vlv_punit_get(dev_priv); + + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); + /* RPS code assumes GPLL is used */ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); @@ -8008,14 +8036,16 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_CONT); + vlv_punit_get(dev_priv); + /* Setting Fixed Bias */ - val = VLV_OVERRIDE_EN | - VLV_SOC_TDP_EN | - VLV_BIAS_CPU_125_SOC_875; + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); + /* RPS code assumes GPLL is used */ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 9c1294c29566..ac8bc5baef40 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1212,6 +1212,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, PUNIT_PWRGT_PWR_GATE(pw_idx); mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) @@ -1232,6 +1233,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, #undef COND out: + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -1260,6 +1262,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = PUNIT_PWRGT_PWR_ON(pw_idx); mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; /* @@ -1278,6 +1281,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; WARN_ON(ctrl != state); + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); return enabled; @@ -1765,6 +1769,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, u32 state, ctrl; mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); state = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe); /* @@ -1781,6 +1786,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSC_MASK(pipe); WARN_ON(ctrl << 16 != state); + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); return enabled; @@ -1797,6 +1803,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe)) == state) @@ -1817,6 +1824,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, #undef COND out: + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); } @@ -4012,7 +4020,9 @@ static bool vlv_punit_is_power_gated(struct drm_i915_private *dev_priv, u32 reg0 bool ret; mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); ret = (vlv_punit_read(dev_priv, reg0) & SSPM0_SSC_MASK) == SSPM0_SSC_PWR_GATE; + vlv_punit_put(dev_priv); mutex_unlock(&dev_priv->pcu_lock); return ret; diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index b2fc605e2e29..7c33925f52f9 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -145,30 +145,18 @@ u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) lockdep_assert_held(&i915->pcu_lock); - vlv_punit_get(i915); - vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRRDDA_NP, addr, &val); - vlv_punit_put(i915); - return val; } int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) { - int err; - lockdep_assert_held(&i915->pcu_lock); - vlv_punit_get(i915); - - err = vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, - SB_CRWRDA_NP, addr, &val); - - vlv_punit_put(i915); - - return err; + return vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + SB_CRWRDA_NP, addr, &val); } u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg) @@ -191,10 +179,8 @@ u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) { u32 val = 0; - vlv_nc_get(i915); vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_NC, SB_CRRDDA_NP, addr, &val); - vlv_nc_put(i915); return val; } -- cgit From ebb5eb7d731cc39e29661e0eb9dfe61242817663 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:21 +0100 Subject: drm/i915: Replace pcu_lock with sb_lock We now have two locks for sideband access. The general one covering sideband access across all generation, sb_lock, and a specific one covering sideband access via the punit on vlv/chv. After lifting the sb_lock around the punit into the callers, the pcu_lock is now redudant and can be separated from its other use to regulate RPS (essentially giving RPS a lock all of its own). v2: Extract a couple of minor bug fixes. Signed-off-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 25 +------ drivers/gpu/drm/i915/i915_drv.h | 10 +-- drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/i915_sysfs.c | 32 +++----- drivers/gpu/drm/i915/intel_cdclk.c | 28 ------- drivers/gpu/drm/i915/intel_display.c | 6 -- drivers/gpu/drm/i915/intel_hdcp.c | 2 - drivers/gpu/drm/i915/intel_pm.c | 128 +++++++++++++++----------------- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 --- drivers/gpu/drm/i915/intel_sideband.c | 4 - 10 files changed, 81 insertions(+), 168 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f8472db460bb..9545556898e6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1046,8 +1046,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 rpmodectl, freq_sts; - mutex_lock(&dev_priv->pcu_lock); - rpmodectl = I915_READ(GEN6_RP_CONTROL); seq_printf(m, "Video Turbo Mode: %s\n", yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); @@ -1082,7 +1080,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(dev_priv, rps->efficient_freq)); - mutex_unlock(&dev_priv->pcu_lock); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -1487,12 +1484,9 @@ static int gen6_drpc_info(struct seq_file *m) gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); } - if (INTEL_GEN(dev_priv) <= 7) { - mutex_lock(&dev_priv->pcu_lock); + if (INTEL_GEN(dev_priv) <= 7) sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - mutex_unlock(&dev_priv->pcu_lock); - } seq_printf(m, "RC1e Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); @@ -1756,17 +1750,10 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) unsigned int max_gpu_freq, min_gpu_freq; intel_wakeref_t wakeref; int gpu_freq, ia_freq; - int ret; if (!HAS_LLC(dev_priv)) return -ENODEV; - wakeref = intel_runtime_pm_get(dev_priv); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - goto out; - min_gpu_freq = rps->min_freq; max_gpu_freq = rps->max_freq; if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { @@ -1777,6 +1764,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); + wakeref = intel_runtime_pm_get(dev_priv); for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { ia_freq = gpu_freq; sandybridge_pcode_read(dev_priv, @@ -1790,12 +1778,9 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } - - mutex_unlock(&dev_priv->pcu_lock); - -out: intel_runtime_pm_put(dev_priv, wakeref); - return ret; + + return 0; } static int i915_opregion(struct seq_file *m, void *unused) @@ -2032,13 +2017,11 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) with_intel_runtime_pm_if_in_use(dev_priv, wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); act_freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(dev_priv); act_freq = (act_freq >> 8) & 0xff; - mutex_unlock(&dev_priv->pcu_lock); } else { act_freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b2215fb7f562..ca1b35d8faca 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -648,6 +648,8 @@ struct intel_rps_ei { }; struct intel_rps { + struct mutex lock; /* protects enabling and the worker */ + /* * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock @@ -1710,14 +1712,6 @@ struct drm_i915_private { */ u32 edram_size_mb; - /* - * Protects RPS/RC6 register access and PCU communication. - * Must be taken after struct_mutex if nested. Note that - * this lock may be held for long periods of time when - * talking to hw - so only take it when talking to hw! - */ - struct mutex pcu_lock; - /* gen6+ GT PM state */ struct intel_gen6_power_mgmt gt_pm; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b92cfd69134b..15f5415a0aa2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1301,7 +1301,7 @@ static void gen6_pm_rps_work(struct work_struct *work) if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) goto out; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); @@ -1367,7 +1367,7 @@ static void gen6_pm_rps_work(struct work_struct *work) rps->last_adj = 0; } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); out: /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index bfabb3de4808..0952d6a70e1f 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -263,7 +263,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, wakeref = intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_punit_get(dev_priv); freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); @@ -273,7 +272,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, } else { freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } - mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv, wakeref); @@ -318,12 +316,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); if (val != rps->boost_freq) { rps->boost_freq = val; boost = atomic_read(&rps->num_waiters); } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); if (boost) schedule_work(&rps->work); @@ -364,17 +362,14 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, return ret; wakeref = intel_runtime_pm_get(dev_priv); - - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); val = intel_freq_opcode(dev_priv, val); - if (val < rps->min_freq || val > rps->max_freq || val < rps->min_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv, wakeref); - return -EINVAL; + ret = -EINVAL; + goto unlock; } if (val > rps->rp0_freq) @@ -392,8 +387,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->pcu_lock); - +unlock: + mutex_unlock(&rps->lock); intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; @@ -423,17 +418,14 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, return ret; wakeref = intel_runtime_pm_get(dev_priv); - - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); val = intel_freq_opcode(dev_priv, val); - if (val < rps->min_freq || val > rps->max_freq || val > rps->max_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv, wakeref); - return -EINVAL; + ret = -EINVAL; + goto unlock; } rps->min_freq_softlimit = val; @@ -447,8 +439,8 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->pcu_lock); - +unlock: + mutex_unlock(&rps->lock); intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 9dd22203a7e8..2bc5d3227a24 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -464,7 +464,6 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv, { u32 val; - mutex_lock(&dev_priv->pcu_lock); vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); @@ -477,7 +476,6 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv, vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); - mutex_unlock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv)) cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK) >> @@ -556,7 +554,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, BIT(VLV_IOSF_SB_BUNIT) | BIT(VLV_IOSF_SB_PUNIT)); - mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK; val |= (cmd << DSPFREQGUAR_SHIFT); @@ -566,7 +563,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->pcu_lock); if (cdclk == 400000) { u32 divider; @@ -639,7 +635,6 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, */ wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK_CHV; @@ -652,7 +647,6 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, } vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -731,10 +725,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, "trying to change cdclk frequency with cdclk not enabled\n")) return; - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("failed to inform pcode about cdclk change\n"); return; @@ -783,10 +775,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); @@ -1025,12 +1015,10 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, */ WARN_ON_ONCE(IS_SKYLAKE(dev_priv) && vco == 8640000); - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1094,10 +1082,8 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } @@ -1394,12 +1380,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, * requires us to wait up to 150usec, but that leads to timeouts; * the 2ms used here is based on experiment. */ - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write_timeout(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, 0x80000000, 150, 2); - mutex_unlock(&dev_priv->pcu_lock); - if (ret) { DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", ret, cdclk); @@ -1429,7 +1412,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, if (pipe != INVALID_PIPE) intel_wait_for_vblank(dev_priv, pipe); - mutex_lock(&dev_priv->pcu_lock); /* * The timeout isn't specified, the 2ms used here is based on * experiment. @@ -1439,8 +1421,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, ret = sandybridge_pcode_write_timeout(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, cdclk_state->voltage_level, 150, 2); - mutex_unlock(&dev_priv->pcu_lock); - if (ret) { DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", ret, cdclk); @@ -1663,12 +1643,10 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider; int ret; - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1707,10 +1685,8 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, intel_wait_for_vblank(dev_priv, pipe); /* inform PCU of the change */ - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -1849,12 +1825,10 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv, unsigned int vco = cdclk_state->vco; int ret; - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1876,10 +1850,8 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv, I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE | skl_cdclk_decimal(cdclk)); - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8eef0e732ac9..3cd8273b7186 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5331,10 +5331,8 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state) WARN_ON(!(crtc_state->active_planes & ~BIT(PLANE_CURSOR))); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, IPS_ENABLE | IPS_PCODE_CONTROL)); - mutex_unlock(&dev_priv->pcu_lock); /* Quoting Art Runyan: "its not safe to expect any particular * value in IPS_CTL bit 31 after enabling IPS through the * mailbox." Moreover, the mailbox may return a bogus state, @@ -5364,9 +5362,7 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) return; if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); - mutex_unlock(&dev_priv->pcu_lock); /* * Wait for PCODE to finish disabling IPS. The BSpec specified * 42ms timeout value leads to occasional timeouts so use 100ms @@ -9506,11 +9502,9 @@ static u32 hsw_read_dcomp(struct drm_i915_private *dev_priv) static void hsw_write_dcomp(struct drm_i915_private *dev_priv, u32 val) { if (IS_HASWELL(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, val)) DRM_DEBUG_KMS("Failed to write to D_COMP\n"); - mutex_unlock(&dev_priv->pcu_lock); } else { I915_WRITE(D_COMP_BDW, val); POSTING_READ(D_COMP_BDW); diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c index 99b007169c49..2476e867981d 100644 --- a/drivers/gpu/drm/i915/intel_hdcp.c +++ b/drivers/gpu/drm/i915/intel_hdcp.c @@ -213,10 +213,8 @@ static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv) * from other platforms. So GEN9_BC uses the GT Driver Mailbox i/f. */ if (IS_GEN9_BC(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_LOAD_HDCP_KEYS, 1); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to initiate HDCP key load (%d)\n", ret); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9db39ea9bd83..5d4a793a1988 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -317,7 +317,6 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); @@ -334,14 +333,12 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); @@ -352,7 +349,6 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val); vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } #define FW_WM(value, plane) \ @@ -2821,11 +2817,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the first set of memory latencies[0:3] */ val = 0; /* data0 to be programmed to 0 for first set */ - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); @@ -2842,11 +2836,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the second set of memory latencies[4:7] */ val = 1; /* data0 to be programmed to 1 for second set */ - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); return; @@ -3681,13 +3673,10 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Enabling SAGV\n"); - mutex_lock(&dev_priv->pcu_lock); - ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); /* We don't need to wait for SAGV when enabling */ - mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -3718,15 +3707,11 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Disabling SAGV\n"); - mutex_lock(&dev_priv->pcu_lock); - /* bspec says to keep retrying for at least 1 ms */ ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_DISABLE, GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 1); - mutex_unlock(&dev_priv->pcu_lock); - /* * Some skl systems, pre-release machines in particular, * don't actually have SAGV. @@ -6143,7 +6128,6 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) wm->level = VLV_WM_LEVEL_PM2; if (IS_CHERRYVIEW(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); @@ -6175,7 +6159,6 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv) } vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } for_each_intel_crtc(&dev_priv->drm, crtc) { @@ -6804,7 +6787,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); if (rps->enabled) { u8 freq; @@ -6827,7 +6810,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) rps->max_freq_softlimit))) DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); } void gen6_rps_idle(struct drm_i915_private *dev_priv) @@ -6841,7 +6824,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) */ gen6_disable_rps_interrupts(dev_priv); - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); if (rps->enabled) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_set_rps_idle(dev_priv); @@ -6851,7 +6834,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); } void gen6_rps_boost(struct i915_request *rq) @@ -6891,7 +6874,7 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) struct intel_rps *rps = &dev_priv->gt_pm.rps; int err; - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&rps->lock); GEM_BUG_ON(val > rps->max_freq); GEM_BUG_ON(val < rps->min_freq); @@ -7464,7 +7447,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) unsigned int max_gpu_freq, min_gpu_freq; struct cpufreq_policy *policy; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + lockdep_assert_held(&rps->lock); if (rps->max_freq <= rps->min_freq) return; @@ -8549,8 +8532,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) pm_runtime_get(&dev_priv->drm.pdev->dev); } - mutex_lock(&dev_priv->pcu_lock); - /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) cherryview_init_gt_powersave(dev_priv); @@ -8581,8 +8562,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) rps->boost_freq = rps->max_freq; rps->idle_freq = rps->min_freq; rps->cur_freq = rps->idle_freq; - - mutex_unlock(&dev_priv->pcu_lock); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -8608,7 +8587,7 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->pcu_lock); + lockdep_assert_held(&i915->gt_pm.rps.lock); if (!i915->gt_pm.llc_pstate.enabled) return; @@ -8620,7 +8599,7 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) static void intel_disable_rc6(struct drm_i915_private *dev_priv) { - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&dev_priv->gt_pm.rps.lock); if (!dev_priv->gt_pm.rc6.enabled) return; @@ -8639,7 +8618,7 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv) static void intel_disable_rps(struct drm_i915_private *dev_priv) { - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&dev_priv->gt_pm.rps.lock); if (!dev_priv->gt_pm.rps.enabled) return; @@ -8660,19 +8639,19 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv) void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&dev_priv->gt_pm.rps.lock); intel_disable_rc6(dev_priv); intel_disable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_disable_llc_pstate(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&dev_priv->gt_pm.rps.lock); } static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->pcu_lock); + lockdep_assert_held(&i915->gt_pm.rps.lock); if (i915->gt_pm.llc_pstate.enabled) return; @@ -8684,7 +8663,7 @@ static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) static void intel_enable_rc6(struct drm_i915_private *dev_priv) { - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&dev_priv->gt_pm.rps.lock); if (dev_priv->gt_pm.rc6.enabled) return; @@ -8709,7 +8688,7 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - lockdep_assert_held(&dev_priv->pcu_lock); + lockdep_assert_held(&rps->lock); if (rps->enabled) return; @@ -8744,7 +8723,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (intel_vgpu_active(dev_priv)) return; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&dev_priv->gt_pm.rps.lock); if (HAS_RC6(dev_priv)) intel_enable_rc6(dev_priv); @@ -8753,7 +8732,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (HAS_LLC(dev_priv)) intel_enable_llc_pstate(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&dev_priv->gt_pm.rps.lock); } static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) @@ -9769,22 +9748,20 @@ static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) } } -int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) +static int +__sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) { int status; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + lockdep_assert_held(&dev_priv->sb_lock); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work * required when reading/writing. */ - if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { - DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n", - mbox, __builtin_return_address(0)); + if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) return -EAGAIN; - } I915_WRITE_FW(GEN6_PCODE_DATA, *val); I915_WRITE_FW(GEN6_PCODE_DATA1, 0); @@ -9792,11 +9769,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val if (__intel_wait_for_register_fw(&dev_priv->uncore, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, - 500, 0, NULL)) { - DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n", - mbox, __builtin_return_address(0)); + 500, 0, NULL)) return -ETIMEDOUT; - } *val = I915_READ_FW(GEN6_PCODE_DATA); I915_WRITE_FW(GEN6_PCODE_DATA, 0); @@ -9806,33 +9780,40 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val else status = gen6_check_mailbox_status(dev_priv); + return status; +} + +int +sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) +{ + int status; + + mutex_lock(&dev_priv->sb_lock); + status = __sandybridge_pcode_read(dev_priv, mbox, val); + mutex_unlock(&dev_priv->sb_lock); + if (status) { DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", mbox, __builtin_return_address(0), status); - return status; } - return 0; + return status; } -int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, - u32 mbox, u32 val, - int fast_timeout_us, int slow_timeout_ms) +static int __sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, + u32 mbox, u32 val, + int fast_timeout_us, + int slow_timeout_ms) { int status; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); - /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work * required when reading/writing. */ - if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { - DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n", - val, mbox, __builtin_return_address(0)); + if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) return -EAGAIN; - } I915_WRITE_FW(GEN6_PCODE_DATA, val); I915_WRITE_FW(GEN6_PCODE_DATA1, 0); @@ -9841,11 +9822,8 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, if (__intel_wait_for_register_fw(&dev_priv->uncore, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, fast_timeout_us, slow_timeout_ms, - NULL)) { - DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n", - val, mbox, __builtin_return_address(0)); + NULL)) return -ETIMEDOUT; - } I915_WRITE_FW(GEN6_PCODE_DATA, 0); @@ -9854,13 +9832,28 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, else status = gen6_check_mailbox_status(dev_priv); + return status; +} + +int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, + u32 mbox, u32 val, + int fast_timeout_us, + int slow_timeout_ms) +{ + int status; + + mutex_lock(&dev_priv->sb_lock); + status = __sandybridge_pcode_write_timeout(dev_priv, mbox, val, + fast_timeout_us, + slow_timeout_ms); + mutex_unlock(&dev_priv->sb_lock); + if (status) { DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n", val, mbox, __builtin_return_address(0), status); - return status; } - return 0; + return status; } static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, @@ -9869,7 +9862,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, { u32 val = request; - *status = sandybridge_pcode_read(dev_priv, mbox, &val); + *status = __sandybridge_pcode_read(dev_priv, mbox, &val); return *status || ((val & reply_mask) == reply); } @@ -9899,7 +9892,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 status; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + mutex_lock(&dev_priv->sb_lock); #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ &status) @@ -9935,6 +9928,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, preempt_enable(); out: + mutex_unlock(&dev_priv->sb_lock); return ret ? ret : status; #undef COND } @@ -10004,7 +9998,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) void intel_pm_setup(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->pcu_lock); + mutex_init(&dev_priv->gt_pm.rps.lock); mutex_init(&dev_priv->gt_pm.rps.power.mutex); atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index ac8bc5baef40..a80ff35f6c81 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1211,7 +1211,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, state = enable ? PUNIT_PWRGT_PWR_ON(pw_idx) : PUNIT_PWRGT_PWR_GATE(pw_idx); - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); #define COND \ @@ -1234,7 +1233,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, out: vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } static void vlv_power_well_enable(struct drm_i915_private *dev_priv, @@ -1261,7 +1259,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, mask = PUNIT_PWRGT_MASK(pw_idx); ctrl = PUNIT_PWRGT_PWR_ON(pw_idx); - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; @@ -1282,7 +1279,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, WARN_ON(ctrl != state); vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1768,7 +1764,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, bool enabled; u32 state, ctrl; - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); state = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe); @@ -1787,7 +1782,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, WARN_ON(ctrl << 16 != state); vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1802,7 +1796,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); #define COND \ @@ -1825,7 +1818,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, out: vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); } static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, @@ -4019,11 +4011,9 @@ static bool vlv_punit_is_power_gated(struct drm_i915_private *dev_priv, u32 reg0 { bool ret; - mutex_lock(&dev_priv->pcu_lock); vlv_punit_get(dev_priv); ret = (vlv_punit_read(dev_priv, reg0) & SSPM0_SSC_MASK) == SSPM0_SSC_PWR_GATE; vlv_punit_put(dev_priv); - mutex_unlock(&dev_priv->pcu_lock); return ret; } diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 7c33925f52f9..457b8cad5494 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -143,8 +143,6 @@ u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) { u32 val = 0; - lockdep_assert_held(&i915->pcu_lock); - vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRRDDA_NP, addr, &val); @@ -153,8 +151,6 @@ u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) { - lockdep_assert_held(&i915->pcu_lock); - return vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRWRDA_NP, addr, &val); } -- cgit From 56c5098ffcf8e655ac4e8f0634e44f1cea988590 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Apr 2019 09:17:22 +0100 Subject: drm/i915: Separate sideband declarations to intel_sideband.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the sideback declarations out of the ginormous i915_drv.h Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190426081725.31217-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile.header-test | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 120 --------------------------- drivers/gpu/drm/i915/i915_sysfs.c | 2 + drivers/gpu/drm/i915/intel_cdclk.c | 1 + drivers/gpu/drm/i915/intel_display.c | 1 + drivers/gpu/drm/i915/intel_dp.c | 2 + drivers/gpu/drm/i915/intel_dpio_phy.c | 1 + drivers/gpu/drm/i915/intel_dsi_vbt.c | 13 ++- drivers/gpu/drm/i915/intel_hdmi.c | 1 + drivers/gpu/drm/i915/intel_pm.c | 1 + drivers/gpu/drm/i915/intel_runtime_pm.c | 1 + drivers/gpu/drm/i915/intel_sideband.c | 2 + drivers/gpu/drm/i915/intel_sideband.h | 130 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/vlv_dsi.c | 2 +- drivers/gpu/drm/i915/vlv_dsi_pll.c | 4 +- 16 files changed, 157 insertions(+), 126 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_sideband.h diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index 702e3a7ade4c..325071da0ff7 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -30,6 +30,7 @@ header_test := \ intel_pipe_crc.h \ intel_pm.h \ intel_psr.h \ + intel_sideband.h \ intel_sdvo.h \ intel_sprite.h \ intel_tv.h \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9545556898e6..28e69e5ae9b3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -42,6 +42,7 @@ #include "intel_hdmi.h" #include "intel_pm.h" #include "intel_psr.h" +#include "intel_sideband.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ca1b35d8faca..c18b28271bfd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -541,11 +541,6 @@ enum intel_pch { PCH_ICP, /* Ice Lake PCH */ }; -enum intel_sbi_destination { - SBI_ICLK, - SBI_MPHY, -}; - #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) #define QUIRK_BACKLIGHT_PRESENT (1<<3) @@ -3435,121 +3430,6 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox, int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); -/* intel_sideband.c */ - -enum { - VLV_IOSF_SB_BUNIT, - VLV_IOSF_SB_CCK, - VLV_IOSF_SB_CCU, - VLV_IOSF_SB_DPIO, - VLV_IOSF_SB_FLISDSI, - VLV_IOSF_SB_GPIO, - VLV_IOSF_SB_NC, - VLV_IOSF_SB_PUNIT, -}; - -void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports); -u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg); -void vlv_iosf_sb_write(struct drm_i915_private *i915, - u8 port, u32 reg, u32 val); -void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports); - -static inline void vlv_bunit_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_BUNIT)); -} - -u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg); -void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val); - -static inline void vlv_bunit_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_BUNIT)); -} - -static inline void vlv_cck_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCK)); -} - -u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg); -void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val); - -static inline void vlv_cck_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCK)); -} - -static inline void vlv_ccu_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCU)); -} - -u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg); -void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val); - -static inline void vlv_ccu_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCU)); -} - -static inline void vlv_dpio_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_DPIO)); -} - -u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg); -void vlv_dpio_write(struct drm_i915_private *i915, - enum pipe pipe, int reg, u32 val); - -static inline void vlv_dpio_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_DPIO)); -} - -static inline void vlv_flisdsi_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_FLISDSI)); -} - -u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg); -void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val); - -static inline void vlv_flisdsi_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_FLISDSI)); -} - -static inline void vlv_nc_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_NC)); -} - -u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr); - -static inline void vlv_nc_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_NC)); -} - -static inline void vlv_punit_get(struct drm_i915_private *i915) -{ - vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_PUNIT)); -} - -u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr); -int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val); - -static inline void vlv_punit_put(struct drm_i915_private *i915) -{ - vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_PUNIT)); -} - -u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, - enum intel_sbi_destination destination); -void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, - enum intel_sbi_destination destination); - /* intel_dpio_phy.c */ void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port, enum dpio_phy *phy, enum dpio_channel *ch); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 0952d6a70e1f..9bb3a15e4683 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -29,7 +29,9 @@ #include #include #include + #include "intel_drv.h" +#include "intel_sideband.h" #include "i915_drv.h" static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 2bc5d3227a24..cf9c916e8d49 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -23,6 +23,7 @@ #include "intel_cdclk.h" #include "intel_drv.h" +#include "intel_sideband.h" /** * DOC: CDCLK / RAWCLK diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3cd8273b7186..2e2ed2c2b482 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -66,6 +66,7 @@ #include "intel_pm.h" #include "intel_psr.h" #include "intel_sdvo.h" +#include "intel_sideband.h" #include "intel_sprite.h" #include "intel_tv.h" diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 4fc25dcc97d4..08d92570f17f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -31,6 +31,7 @@ #include #include #include + #include #include @@ -53,6 +54,7 @@ #include "intel_lvds.h" #include "intel_panel.h" #include "intel_psr.h" +#include "intel_sideband.h" #define DP_DPRX_ESI_LEN 14 diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index c784f3daaf51..d80887b5e234 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -23,6 +23,7 @@ #include "intel_dp.h" #include "intel_drv.h" +#include "intel_sideband.h" /** * DOC: DPIO diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 2304488f2d35..fbed9064ac7e 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -24,18 +24,23 @@ * */ -#include -#include -#include #include #include #include -#include