diff options
author | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2022-02-23 14:19:43 -0500 |
---|---|---|
committer | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2022-02-23 15:03:51 -0500 |
commit | 30424ebae8df0f786835e7a31ad790fa00764f35 (patch) | |
tree | ef8d835947f4bb8bb608ae2841c34f0d6cb93030 /drivers/gpu/drm/i915/gt | |
parent | 6d8ebef53c2ccea482ed77769b7b5cbb453b7715 (diff) | |
parent | 154cfae6158141b18d65abb0db679bb51a8294e7 (diff) |
Merge tag 'drm-intel-gt-next-2022-02-17' of git://anongit.freedesktop.org/drm/drm-intel into drm-intel-next
UAPI Changes:
- Weak parallel submission support for execlists
Minimal implementation of the parallel submission support for
execlists backend that was previously only implemented for GuC.
Support one sibling non-virtual engine.
Core Changes:
- Two backmerges of drm/drm-next for header file renames/changes and
i915_regs reorganization
Driver Changes:
- Add new DG2 subplatform: DG2-G12 (Matt R)
- Add new DG2 workarounds (Matt R, Ram, Bruce)
- Handle pre-programmed WOPCM registers for DG2+ (Daniele)
- Update guc shim control programming on XeHP SDV+ (Daniele)
- Add RPL-S C0/D0 stepping information (Anusha)
- Improve GuC ADS initialization to work on ARM64 on dGFX (Lucas)
- Fix KMD and GuC race on accessing PMU busyness (Umesh)
- Use PM timestamp instead of RING TIMESTAMP for reference in PMU with GuC (Umesh)
- Report error on invalid reset notification from GuC (John)
- Avoid WARN splat by holding RPM wakelock during PXP unbind (Juston)
- Fixes to parallel submission implementation (Matt B.)
- Improve GuC loading status check/error reports (John)
- Tweak TTM LRU priority hint selection (Matt A.)
- Align the plane_vma to min_page_size of stolen mem (Ram)
- Introduce vma resources and implement async unbinding (Thomas)
- Use struct vma_resource instead of struct vma_snapshot (Thomas)
- Return some TTM accel move errors instead of trying memcpy move (Thomas)
- Fix a race between vma / object destruction and unbinding (Thomas)
- Remove short-term pins from execbuf (Maarten)
- Update to GuC version 69.0.3 (John, Michal Wa.)
- Improvements to GT reset paths in GuC backend (Matt B.)
- Use shrinker_release_pages instead of writeback in shmem object hooks (Matt A., Tvrtko)
- Use trylock instead of blocking lock when freeing GEM objects (Maarten)
- Allocate intel_engine_coredump_alloc with ALLOW_FAIL (Matt B.)
- Fixes to object unmapping and purging (Matt A)
- Check for wedged device in GuC backend (John)
- Avoid lockdep splat by locking dpt_obj around set_cache_level (Maarten)
- Allow dead vm to unbind vma's without lock (Maarten)
- s/engine->i915/i915/ for DG2 engine workarounds (Matt R)
- Use to_gt() helper for GGTT accesses (Michal Wi.)
- Selftest improvements (Matt B., Thomas, Ram)
- Coding style and compiler warning fixes (Matt B., Jasmine, Andi, Colin, Gustavo, Dan)
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Yg4i2aCZvvee5Eai@jlahtine-mobl.ger.corp.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Fixed conflicts while applying, using the fixups/drm-intel-gt-next.patch
from drm-rerere's 1f2b1742abdd ("2022y-02m-23d-16h-07m-57s UTC: drm-tip
rerere cache update")]
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
37 files changed, 1071 insertions, 463 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 0bd0d611e0c8..871fe7bda0e0 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -108,17 +108,17 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, } static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory * const pd = ppgtt->pd; - unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE; + unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE; unsigned int act_pt = first_entry / GEN6_PTES; unsigned int act_pte = first_entry % GEN6_PTES; const u32 pte_encode = vm->pte_encode(0, cache_level, flags); - struct sgt_dma iter = sgt_dma(vma); + struct sgt_dma iter = sgt_dma(vma_res); gen6_pte_t *vaddr; GEM_BUG_ON(!pd->entry[act_pt]); @@ -144,7 +144,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } } while (1); - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; } static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) @@ -275,13 +275,13 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) static void pd_vma_bind(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct gen6_ppgtt *ppgtt = vma->private; - u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; + struct gen6_ppgtt *ppgtt = vma_res->private; + u32 ggtt_offset = vma_res->start / I915_GTT_PAGE_SIZE; ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10; ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; @@ -289,9 +289,10 @@ static void pd_vma_bind(struct i915_address_space *vm, gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); } -static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) +static void pd_vma_unbind(struct i915_address_space *vm, + struct i915_vma_resource *vma_res) { - struct gen6_ppgtt *ppgtt = vma->private; + struct gen6_ppgtt *ppgtt = vma_res->private; struct i915_page_directory * const pd = ppgtt->base.pd; struct i915_page_table *pt; unsigned int pde; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index b012c50f7ce7..c43e724afa9f 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -453,20 +453,21 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, return idx; } -static void gen8_ppgtt_insert_huge(struct i915_vma *vma, +static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, struct sgt_dma *iter, enum i915_cache_level cache_level, u32 flags) { const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); unsigned int rem = sg_dma_len(iter->sg); - u64 start = vma->node.start; + u64 start = vma_res->start; - GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm)); + GEM_BUG_ON(!i915_vm_is_4lvl(vm)); do { struct i915_page_directory * const pdp = - gen8_pdp_for_page_address(vma->vm, start); + gen8_pdp_for_page_address(vm, start); struct i915_page_directory * const pd = i915_pd_entry(pdp, __gen8_pte_index(start, 2)); gen8_pte_t encode = pte_encode; @@ -475,7 +476,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, gen8_pte_t *vaddr; u16 index; - if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && rem >= I915_GTT_PAGE_SIZE_2M && !__gen8_pte_index(start, 0)) { @@ -492,7 +493,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, page_size = I915_GTT_PAGE_SIZE; if (!index && - vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) @@ -541,9 +542,9 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, */ if (maybe_64K != -1 && (index == I915_PDES || - (i915_vm_has_scratch_64K(vma->vm) && - !iter->sg && IS_ALIGNED(vma->node.start + - vma->node.size, + (i915_vm_has_scratch_64K(vm) && + !iter->sg && IS_ALIGNED(vma_res->start + + vma_res->node_size, I915_GTT_PAGE_SIZE_2M)))) { vaddr = px_vaddr(pd); vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; @@ -559,10 +560,10 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, * instead - which we detect as missing results during * selftests. */ - if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { + if (I915_SELFTEST_ONLY(vm->scrub_64K)) { u16 i; - encode = vma->vm->scratch[0]->encode; + encode = vm->scratch[0]->encode; vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K)); for (i = 1; i < index; i += 16) @@ -572,22 +573,22 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, } } - vma->page_sizes.gtt |= page_size; + vma_res->page_sizes_gtt |= page_size; } while (iter->sg && sg_dma_len(iter->sg)); } static void gen8_ppgtt_insert(struct i915_address_space *vm, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags) { struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = sgt_dma(vma); + struct sgt_dma iter = sgt_dma(vma_res); - if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { - gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags); + if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { + gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); } else { - u64 idx = vma->node.start >> GEN8_PTE_SHIFT; + u64 idx = vma_res->start >> GEN8_PTE_SHIFT; do { struct i915_page_directory * const pdp = @@ -597,7 +598,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, cache_level, flags); } while (idx); - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; } } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ba083d800a08..5d0ec7c49b6a 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 959e9300ac9e..e53008b4dd05 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1700,18 +1700,15 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, static void print_request_ring(struct drm_printer *m, struct i915_request *rq) { - struct i915_vma_snapshot *vsnap = &rq->batch_snapshot; + struct i915_vma_resource *vma_res = rq->batch_res; void *ring; int size; - if (!i915_vma_snapshot_present(vsnap)) - vsnap = NULL; - drm_printf(m, "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", rq->head, rq->postfix, rq->tail, - vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u, - vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u); + vma_res ? upper_32_bits(vma_res->start) : ~0u, + vma_res ? lower_32_bits(vma_res->start) : ~0u); size = rq->tail - rq->head; if (rq->tail < rq->head) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 4a9ef688fac2..961d795220a3 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2601,6 +2601,43 @@ static void execlists_context_cancel_request(struct intel_context *ce, current->comm); } +static struct intel_context * +execlists_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width) +{ + struct intel_context *parent = NULL, *ce, *err; + int i; + + GEM_BUG_ON(num_siblings != 1); + + for (i = 0; i < width; ++i) { + ce = intel_context_create(engines[i]); + if (IS_ERR(ce)) { + err = ce; + goto unwind; + } + + if (i == 0) + parent = ce; + else + intel_context_bind_parent_child(parent, ce); + } + + parent->parallel.fence_context = dma_fence_context_alloc(1); + + intel_context_set_nopreempt(parent); + for_each_child(parent, ce) + intel_context_set_nopreempt(ce); + + return parent; + +unwind: + if (parent) + intel_context_put(parent); + return err; +} + static const struct intel_context_ops execlists_context_ops = { .flags = COPS_HAS_INFLIGHT, @@ -2619,6 +2656,7 @@ static const struct intel_context_ops execlists_context_ops = { .reset = lrc_reset, .destroy = lrc_destroy, + .create_parallel = execlists_create_parallel, .create_virtual = execlists_create_virtual, }; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index d2922f64d1c8..8850d4e0f9cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -87,7 +87,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915) * beyond the end of the batch buffer, across the page boundary, * and beyond the end of the GTT if we do not provide a guard. */ - ret = ggtt_init_hw(&i915->ggtt); + ret = ggtt_init_hw(to_gt(i915)->ggtt); if (ret) return ret; @@ -130,22 +130,51 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm) drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); +retry: + i915_gem_drain_freed_objects(vm->i915); + mutex_lock(&vm->mutex); /* Skip rewriting PTE on VMA unbind. */ open = atomic_xchg(&vm->open, 0); list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - i915_vma_wait_for_bind(vma); - if (i915_vma_is_pinned(vma)) + if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) continue; + /* unlikely to race when GPU is idle, so no worry about slowpath.. */ + if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) { + /* + * No dead objects should appear here, GPU should be + * completely idle, and userspace suspended + */ + i915_gem_object_get(obj); + + atomic_set(&vm->open, open); + mutex_unlock(&vm->mutex); + + i915_gem_object_lock(obj, NULL); + open = i915_vma_unbind(vma); + i915_gem_object_unlock(obj); + + GEM_WARN_ON(open); + + i915_gem_object_put(obj); + goto retry; + } + if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { - __i915_vma_evict(vma); + i915_vma_wait_for_bind(vma); + + __i915_vma_evict(vma, false); drm_mm_remove_node(&vma->node); } + + i915_gem_object_unlock(obj); } vm->clear_range(vm, 0, vm->total); @@ -236,7 +265,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, } static void gen8_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level level, u32 flags) { @@ -253,10 +282,10 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, */ gte = (gen8_pte_t __iomem *)ggtt->gsm; - gte += vma->node.start / I915_GTT_PAGE_SIZE; - end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + gte += vma_res->start / I915_GTT_PAGE_SIZE; + end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; - for_each_sgt_daddr(addr, iter, vma->pages) + for_each_sgt_daddr(addr, iter, vma_res->bi.pages) gen8_set_pte(gte++, pte_encode | addr); GEM_BUG_ON(gte > end); @@ -293,7 +322,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, * through the GMADR mapped BAR (i915->mm.gtt->gtt). */ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level level, u32 flags) { @@ -304,10 +333,10 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, dma_addr_t addr; gte = (gen6_pte_t __iomem *)ggtt->gsm; - gte += vma->node.start / I915_GTT_PAGE_SIZE; - end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + gte += vma_res->start / I915_GTT_PAGE_SIZE; + end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; - for_each_sgt_daddr(addr, iter, vma->pages) + for_each_sgt_daddr(addr, iter, vma_res->bi.pages) iowrite32(vm->pte_encode(addr, level, flags), gte++); GEM_BUG_ON(gte > end); @@ -390,7 +419,7 @@ static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, struct insert_entries { struct i915_address_space *vm; - struct i915_vma *vma; + struct i915_vma_resource *vma_res; enum i915_cache_level level; u32 flags; }; @@ -399,18 +428,18 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) { struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); + gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags); bxt_vtd_ggtt_wa(arg->vm); return 0; } static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level level, u32 flags) { - struct insert_entries arg = { vm, vma, level, flags }; + struct insert_entries arg = { vm, vma_res, level, flags }; stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); } @@ -449,14 +478,14 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm, } static void i915_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 unused) { unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, + intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT, flags); } @@ -468,30 +497,32 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm, static void ggtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags) { - struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; - if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK)) + if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK)) return; + vma_res->bound_flags |= flags; + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; - if (i915_gem_object_is_readonly(obj)) + if (vma_res->bi.readonly) pte_flags |= PTE_READ_ONLY; - if (i915_gem_object_is_lmem(obj)) + if (vma_res->bi.lmem) pte_flags |= PTE_LM; - vm->insert_entries(vm, vma, cache_level, pte_flags); - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + vm->insert_entries(vm, vma_res, cache_level, pte_flags); + vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; } -static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) +static void ggtt_unbind_vma(struct i915_address_space *vm, + struct i915_vma_resource *vma_res) { - vm->clear_range(vm, vma->node.start, vma->size); + vm->clear_range(vm, vma_res->start, vma_res->vma_size); } static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) @@ -505,7 +536,7 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); size = ggtt->vm.total - GUC_GGTT_TOP; - ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) @@ -624,7 +655,7 @@ err: static void aliasing_gtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags) { @@ -632,25 +663,27 @@ static void aliasing_gtt_bind_vma(struct i915_address_space *vm, /* Currently applicable only to VLV */ pte_flags = 0; - if (i915_gem_object_is_readonly(vma->obj)) + if (vma_res->bi.readonly) pte_flags |= PTE_READ_ONLY; if (flags & I915_VMA_LOCAL_BIND) ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, - stash, vma, cache_level, flags); + stash, vma_res, cache_level, flags); if (flags & I915_VMA_GLOBAL_BIND) - vm->insert_entries(vm, vma, cache_level, pte_flags); + vm->insert_entries(vm, vma_res, cache_level, pte_flags); + + vma_res->bound_flags |= flags; } static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, - struct i915_vma *vma) + struct i915_vma_resource *vma_res) { - if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) - vm->clear_range(vm, vma->node.start, vma->size); + if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND) + vm->clear_range(vm, vma_res->start, vma_res->vma_size); - if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) - ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma); + if (vma_res->bound_flags & I915_VMA_LOCAL_BIND) + ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res); } static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) @@ -723,14 +756,14 @@ int i915_init_ggtt(struct drm_i915_private *i915) { int ret; - ret = init_ggtt(&i915->ggtt); + ret = init_ggtt(to_gt(i915)->ggtt); if (ret) return ret; if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { - ret = init_aliasing_ppgtt(&i915->ggtt); + ret = init_aliasing_ppgtt(to_gt(i915)->ggtt); if (ret) - cleanup_init_ggtt(&i915->ggtt); + cleanup_init_ggtt(to_gt(i915)->ggtt); } return 0; @@ -743,11 +776,21 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) atomic_set(&ggtt->vm.open, 0); flush_workqueue(ggtt->vm.i915->wq); + i915_gem_drain_freed_objects(ggtt->vm.i915); mutex_lock(&ggtt->vm.mutex); - list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + bool trylock; + + trylock = i915_gem_object_trylock(obj, NULL); + WARN_ON(!trylock); + WARN_ON(__i915_vma_unbind(vma)); + if (trylock) + i915_gem_object_unlock(obj); + } if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); @@ -773,7 +816,7 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) */ void i915_ggtt_driver_release(struct drm_i915_private *i915) { - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; fini_aliasing_ppgtt(ggtt); @@ -788,7 +831,7 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915) */ void i915_ggtt_driver_late_release(struct drm_i915_private *i915) { - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1); dma_resv_fini(&ggtt->vm._resv); @@ -1209,7 +1252,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) { int ret; - ret = ggtt_probe_hw(&i915->ggtt, to_gt(i915)); + ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); if (ret) return ret; @@ -1281,7 +1324,7 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) atomic_read(&vma->flags) & I915_VMA_BIND_MASK; GEM_BUG_ON(!was_bound); - vma->ops->bind_vma(vm, NULL, vma, + vma->ops->bind_vma(vm, NULL, vma->resource, obj ? obj->cache_level : 0, was_bound); if (obj) { /* only used during resume => exclusive access */ diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index f294753bc947..76880fb8fc19 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -428,7 +428,6 @@ int i915_vma_pin_fence(struct i915_vma *vma) * must keep the device awake whilst using the fence. */ assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm); - GEM_BUG_ON(!i915_vma_is_pinned(vma)); GEM_BUG_ON(!i915_vma_is_ggtt(vma)); err = mutex_lock_interruptible(&vma->vm->mutex); @@ -731,8 +730,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) swizzle_y = I915_BIT_6_SWIZZLE_NONE; } - i915->ggtt.bit_6_swizzle_x = swizzle_x; - i915->ggtt.bit_6_swizzle_y = swizzle_y; + to_gt(i915)->ggtt->bit_6_swizzle_x = swizzle_x; + to_gt(i915)->ggtt->bit_6_swizzle_y = swizzle_y; } /* @@ -899,7 +898,7 @@ void intel_gt_init_swizzling(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; if (GRAPHICS_VER(i915) < 5 || - i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) + to_gt(i915)->ggtt->bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) return; intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 545a2b1f1834..e8403fa53909 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -3,6 +3,7 @@ * Copyright © 2019 Intel Corporation */ +#include <drm/drm_managed.h> #include <drm/intel-gtt.h> #include "gem/i915_gem_internal.h" @@ -90,9 +91,11 @@ int intel_gt_probe_lmem(struct intel_gt *gt) return 0; } -void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) +int intel_gt_assign_ggtt(struct intel_gt *gt) { - gt->ggtt = ggtt; + gt->ggtt = drmm_kzalloc(>->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL); + + return gt->ggtt ? 0 : -ENOMEM; } static const struct intel_mmio_range icl_l3bank_steering_table[] = { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index a913fb6ffec3..2dad46c3eff2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -36,7 +36,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); -void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); +int intel_gt_assign_ggtt(struct intel_gt *gt); int intel_gt_probe_lmem(struct intel_gt *gt); int intel_gt_init_mmio(struct intel_gt *gt); int __must_check intel_gt_init_hw(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index e8143fa4b5a8..18d158d77aba 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -322,6 +322,9 @@ #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) +#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) +#define XEHPSDV_CCS_BASE_SHIFT 8 + #define GAMTARBMODE _MMIO(0x4a08) #define ARB_MODE_BWGTLB_DISABLE (1 << 9) #define ARB_MODE_SWIZZLE_BDW (1 << 1) @@ -1042,6 +1045,7 @@ #define GEN9_ROW_CHICKEN4 _MMIO(0xe48c) #define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) +#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) #define GEN12_DISABLE_TDL_PUSH REG_BIT(9) #define GEN11_DIS_PICK_2ND_EU REG_BIT(7) #define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 8506db9983da..49a8fb63e6e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -163,6 +163,9 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, release_work); + /* Synchronize async unbinds. */ + i915_vma_resource_bind_dep_sync_all(vm); + vm->cleanup(vm); i915_address_space_fini(vm); @@ -191,6 +194,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref); + vm->pending_unbind = RB_ROOT_CACHED; INIT_WORK(&vm->release_work, __i915_vm_release); atomic_set(&vm->open, 1); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 177b42b935a1..8073438b67c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -27,6 +27,7 @@ #include "gt/intel_reset.h" #include "i915_selftest.h" +#include "i915_vma_resource.h" #include "i915_vma_types.h" #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) @@ -200,7 +201,7 @@ struct i915_vma_ops { /* Map an object into an address space with the given cache flags. */ void (*bind_vma)(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags); /* @@ -208,7 +209,8 @@ struct i915_vma_ops { * setting the valid PTE entries to a reserved scratch page. */ void (*unbind_vma)(struct i915_address_space *vm, - struct i915_vma *vma); + struct i915_vma_resource *vma_res); + }; struct i915_address_space { @@ -263,6 +265,9 @@ struct i915_address_space { /* Flags used when creating page-table objects for this vm */ unsigned long lmem_pt_obj_flags; + /* Interval tree for pending unbind vma resources */ + struct rb_root_cached pending_unbind; + struct drm_i915_gem_object * (*alloc_pt_dma)(struct i915_address_space *vm, int sz); struct drm_i915_gem_object * @@ -285,7 +290,7 @@ struct i915_address_space { enum i915_cache_level cache_level, u32 flags); void (*insert_entries)(struct i915_address_space *vm, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags); void (*cleanup)(struct i915_address_space *vm); @@ -600,11 +605,11 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); void ppgtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags); void ppgtt_unbind_vma(struct i915_address_space *vm, - struct i915_vma *vma); + struct i915_vma_resource *vma_res); void gtt_write_workarounds(struct intel_gt *gt); @@ -627,8 +632,8 @@ __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long static inline struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; -} sgt_dma(struct i915_vma *vma) { - struct scatterlist *sg = vma->pages->sgl; +} sgt_dma(struct i915_vma_resource *vma_res) { + struct scatterlist *sg = vma_res->bi.pages->sgl; dma_addr_t addr = sg_dma_address(sg); return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) }; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 91c87a3a8e0b..004e1216e654 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1069,6 +1069,10 @@ lrc_pin(struct intel_context *ce, void lrc_unpin(struct intel_context *ce) { + if (unlikely(ce->parallel.last_rq)) { + i915_request_put(ce->parallel.last_rq); + ce->parallel.last_rq = NULL; + } check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, ce->engine); } @@ -1164,6 +1168,29 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) return cs; } +/* + * On DG2 during context restore of a preempted context in GPGPU mode, + * RCS restore hang is detected. This is extremely timing dependent. + * To address this below sw wabb is implemented for DG2 A steppings. + */ +static u32 * +dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG); + *cs++ = 0x21; + + *cs++ = MI_LOAD_REGISTER_REG; + *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); + *cs++ = i915_mmio_reg_offset(GEN12_CULLBIT1); + + *cs++ = MI_LOAD_REGISTER_REG; + *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); + *cs++ = i915_mmio_reg_offset(GEN12_CULLBIT2); + + return cs; +} + static u32 * gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) { @@ -1171,6 +1198,11 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); + /* Wa_22011450934:dg2 */ + if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0)) + cs = dg2_emit_rcs_hang_wabb(ce, cs); + /* Wa_16013000631:dg2 */ if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || IS_DG2_G11(ce->engine->i915)) diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 083b3090c69c..48e6e2f87700 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -179,32 +179,34 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, void ppgtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags) { u32 pte_flags; - if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { - vm->allocate_va_range(vm, stash, vma->node.start, vma->size); - set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); + if (!vma_res->allocated) { + vm->allocate_va_range(vm, stash, vma_res->start, + vma_res->vma_size); + vma_res->allocated = true; } /* Applicable to VLV, and gen8+ */ pte_flags = 0; - if (i915_gem_object_is_readonly(vma->obj)) + if (vma_res->bi.readonly) pte_flags |= PTE_READ_ONLY; - if (i915_gem_object_is_lmem(vma->obj)) + if (vma_res->bi.lmem) pte_flags |= PTE_LM; - vm->insert_entries(vm, vma, cache_level, pte_flags); + vm->insert_entries(vm, vma_res, cache_level, pte_flags); wmb(); } -void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) +void ppgtt_unbind_vma(struct i915_address_space *vm, + struct i915_vma_resource *vma_res) { - if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) - vm->clear_range(vm, vma->node.start, vma->size); + if (vma_res->allocated) + vm->clear_range(vm, vma_res->start, vma_res->vma_size); } static unsigned long pd_count(u64 size, int shift) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index cb5a67c98f30..a04e0cf4a94b 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -12,11 +12,12 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_regs.h" static int init_fake_lmem_bar(struct intel_memory_region *mem) { struct drm_i915_private *i915 = mem->i915; - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; unsigned long n; int ret; @@ -132,7 +133,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt) if (!i915->params.fake_lmem_start) return ERR_PTR(-ENODEV); - GEM_BUG_ON(i915_ggtt_has_aperture(&i915->ggtt)); + GEM_BUG_ON(i915_ggtt_has_aperture(to_gt(i915)->ggtt)); /* Your mappable aperture belongs to me now! */ mappable_end = pci_resource_len(pdev, 2); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index ae7542f70afb..82713264b96c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -604,6 +604,15 @@ static int gen8_reset_engines(struct intel_gt *gt, */ } + /* + * Wa_22011100796:dg2, whenever Full soft reset is required, + * reset all individual engines firstly, and then do a full soft reset. + * + * This is best effort, so ignore any error from the initial reset. + */ + if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) + gen11_reset_engines(gt, gt->info.engine_mask, 0); + if (GRAPHICS_VER(gt->i915) >= 11) ret = gen11_reset_engines(gt, engine_mask, retry); else diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index eeda1692d845..26038066e90b 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1510,6 +1510,12 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); + + /* Wa_18018781329:dg2 */ + wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } static void @@ -2040,7 +2046,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + if (IS_DG2(i915)) { + /* Wa_14015227452:dg2 */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + } + + if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14013392000:dg2_g11 */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); @@ -2048,15 +2059,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); } - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012419201:dg2 */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); } - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(engine->i915)) { + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || + IS_DG2_G11(i915)) { /* * Wa_22012826095:dg2 * Wa_22013059131:dg2 @@ -2071,14 +2082,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } /* Wa_1308578152:dg2_g10 when first gslice is fused off */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) && + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) && needs_wa_1308578152(engine)) { wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, GEN12_REPLAY_MODE_GRANULARITY); } - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(engine->i915)) { + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(i915) || IS_DG2_G12(i915)) { /* Wa_22013037850:dg2 */ wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DISABLE_128B_EVICTION_COMMAND_UDW); @@ -2095,7 +2106,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { /* * Wa_1608949956:dg2_g10 * Wa_14010198302:dg2_g10 @@ -2114,7 +2125,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 0, false); } - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { /* Wa_22010430635:dg2 */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, @@ -2124,8 +2135,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); } - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_G11(engine->i915)) { + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || + IS_DG2_G11(i915)) { /* Wa_22012654132:dg2 */ wa_add(wal, GEN10_CACHE_MODE_SS, 0, _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), @@ -2134,8 +2145,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } /* Wa_14013202645:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 9e9ccb139ba7..83ff4c2e57c5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1384,7 +1384,7 @@ static int evict_vma(void *data) complete(&arg->completion); mutex_lock(&vm->mutex); - err = i915_gem_evict_for_node(vm, &evict, 0); + err = i915_gem_evict_for_node(vm, NULL, &evict, 0); mutex_unlock(&vm->mutex); return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 8a873f6bda7f..37c38bdd5f47 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -19,7 +19,7 @@ __igt_reset_stolen(struct intel_gt *gt, intel_engine_mask_t mask, const char *msg) { - struct i915_ggtt *ggtt = >->i915->ggtt; + struct i915_ggtt *ggtt = gt->ggtt; const struct resource *dsm = >->i915->dsm; resource_size_t num_pages, page; struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index fe5d7d261797..7afdadc7656f 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -7,9 +7,9 @@ #define _ABI_GUC_ACTIONS_ABI_H /** - * DOC: HOST2GUC_REGISTER_CTB + * DOC: HOST2GUC_SELF_CFG * - * This message is used as part of the `CTB based communication`_ setup. + * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_. * * This message must be sent as `MMIO HXG Message`_. * @@ -22,20 +22,18 @@ * | +-------+--------------------------------------------------------------+ * | | 27:16 | DATA0 = MBZ | * | +-------+--------------------------------------------------------------+ - * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_REGISTER_CTB` = 0x4505 | + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508 | * +---+-------+--------------------------------------------------------------+ - * | 1 | 31:12 | RESERVED = MBZ | + * | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_ | * | +-------+--------------------------------------------------------------+ - * | | 11:8 | **TYPE** - type for the `CT Buffer`_ | + * | | 15:0 | **KLV_LEN** - KLV length | * | | | | - * | | | - _`GUC_CTB_TYPE_HOST2GUC` = 0 | - * | | | - _`GUC_CTB_TYPE_GUC2HOST` = 1 | - * | +-------+--------------------------------------------------------------+ - * | | 7:0 | **SIZE** - size of the `CT Buffer`_ in 4K units minus 1 | + * | | | - 32 bit KLV = 1 | + * | | | - 64 bit KLV = 2 | * +---+-------+--------------------------------------------------------------+ - * | 2 | 31:0 | **DESC_ADDR** - GGTT address of the `CTB Descriptor`_ | + * | 2 | 31:0 | **VALUE32** - Bits 31-0 of the KLV value | * +---+-------+--------------------------------------------------------------+ - * | 3 | 31:0 | **BUFF_ADDF** - GGTT address of the `CT Buffer`_ | + * | 3 | 31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2) | * +---+-------+--------------------------------------------------------------+ * * +---+-------+--------------------------------------------------------------+ @@ -45,28 +43,25 @@ * | +-------+--------------------------------------------------------------+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | * | +-------+--------------------------------------------------------------+ - * | | 27:0 | DATA0 = MBZ | + * | | 27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized | * +---+-------+--------------------------------------------------------------+ */ -#define GUC_ACTION_HOST2GUC_REGISTER_CTB 0x4505 +#define GUC_ACTION_HOST2GUC_SELF_CFG 0x0508 -#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) -#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 -#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12) -#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8) -#define GUC_CTB_TYPE_HOST2GUC 0u -#define GUC_CTB_TYPE_GUC2HOST 1u -#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE (0xff << 0) -#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR GUC_HXG_REQUEST_MSG_n_DATAn -#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR GUC_HXG_REQUEST_MSG_n_DATAn +#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn +#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn -#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN -#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 +#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM GUC_HXG_RESPONSE_MSG_0_DATA0 /** - * DOC: HOST2GUC_DEREGISTER_CTB + * DOC: HOST2GUC_CONTROL_CTB * - * This message is used as part of the `CTB based communication`_ teardown. + * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_. * * This message must be sent as `MMIO HXG Message`_. * @@ -79,15 +74,12 @@ * | +-------+--------------------------------------------------------------+ * | | 27:16 | DATA0 = MBZ | * | +-------+--------------------------------------------------------------+ - * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_DEREGISTER_CTB` = 0x4506 | + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509 | * +---+-------+--------------------------------------------------------------+ - * | 1 | 31:12 | RESERVED = MBZ | - * | +-------+--------------------------------------------------------------+ - * | | 11:8 | **TYPE** - type of the `CT Buffer`_ | + * | 1 | 31:0 | **CONTROL** - control `CTB based communication`_ | * | | | | - * | | | see `GUC_ACTION_HOST2GUC_REGISTER_CTB`_ | - * | +-------+--------------------------------------------------------------+ - * | | 7:0 | RESERVED = MBZ | + * | | | - _`GUC_CTB_CONTROL_DISABLE` = 0 | + * | | | - _`GUC_CTB_CONTROL_ENABLE` = 1 | * +---+-------+--------------------------------------------------------------+ * * +---+-------+--------------------------------------------------------------+ @@ -100,16 +92,16 @@ * | | 27:0 | DATA0 = MBZ | * +---+-------+--------------------------------------------------------------+ */ -#define GUC_ACTION_HOST2GUC_DEREGISTER_CTB 0x4506 +#define GUC_ACTION_HOST2GUC_CONTROL_CTB 0x4509 -#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) -#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 -#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12) -#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8) -#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ2 (0xff << 0) +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL GUC_HXG_REQUEST_MSG_n_DATAn +#define GUC_CTB_CONTROL_DISABLE 0u +#define GUC_CTB_CONTROL_ENABLE 1u -#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN -#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 +#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 /* legacy definitions */ @@ -143,8 +135,12 @@ enum intel_guc_action { INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, - INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, + INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, + INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002, + INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, + INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, + INTEL_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index 488b6061ee89..c20658ee85a5 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -11,4 +11,27 @@ enum intel_guc_response_status { INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, }; +enum intel_guc_load_status { + INTEL_GUC_LOAD_STATUS_DEFAULT = 0x00, + INTEL_GUC_LOAD_STATUS_START = 0x01, + INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02, + INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03, + INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04, + INTEL_GUC_LOAD_STATUS_GDT_DONE = 0x10, + INTEL_GUC_LOAD_STATUS_IDT_DONE = 0x20, + INTEL_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, + INTEL_GUC_LOAD_STATUS_GUCINT_DONE = 0x40, + INTEL_GUC_LOAD_STATUS_DPC_READY = 0x50, + INTEL_GUC_LOAD_STATUS_DPC_ERROR = 0x60, + INTEL_GUC_LOAD_STATUS_EXCEPTION = 0x70, + INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID = 0x71, + INTEL_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED = 0x72, + INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, + INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, + INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, + + INTEL_GUC_LOAD_STATUS_READY = 0xF0, +}; + #endif /* _ABI_GUC_ERRORS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h new file mode 100644 index 000000000000..f0814a57c191 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _ABI_GUC_KLVS_ABI_H +#define _ABI_GUC_KLVS_ABI_H + +/** + * DOC: GuC KLV + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **KEY** - KLV key identifier | + * | | | - `GuC Self Config KLVs`_ | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **LEN** - length of VALUE (in 32bit dwords) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VALUE** - actual value of the KLV (format depends on KEY) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_KLV_LEN_MIN 1u +#define GUC_KLV_0_KEY (0xffff << 16) +#define GUC_KLV_0_LEN (0xffff << 0) +#define GUC_KLV_n_VALUE (0xffffffff << 0) + +/** + * DOC: GuC Self Config KLVs + * + * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902 + * Refers to 64 bit Global Gfx address of H2G `CT Buffer`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR` : 0x0903 + * Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE` : 0x0904 + * Refers to size of H2G `CT Buffer`_ in bytes. + * Should be a multiple of 4K. + * + * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR` : 0x0905 + * Refers to 64 bit Global Gfx address of G2H `CT Buffer`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR` : 0x0906 + * Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_G2H_CTB_SIZE` : 0x0907 + * Refers to size of G2H `CT Buffer`_ in bytes. + * Should be a multiple of 4K. + */ + +#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY 0x0902 +#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY 0x0903 +#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY 0x0904 +#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN 1u + +#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY 0x0905 +#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY 0x0906 +#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907 +#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u + +#endif /* _ABI_GUC_KLVS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 5bab32fef120..447a976c9f25 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -184,6 +184,9 @@ void intel_guc_init_early(struct intel_guc *guc) guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN; BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT); } + + intel_guc_enable_msg(guc, INTEL_GUC_RECV_MSG_EXCEPTION | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED); } void intel_guc_init_late(struct intel_guc *guc) @@ -224,32 +227,48 @@ static u32 guc_ctl_log_params_flags(struct intel_guc *guc) u32 flags; #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) - #define UNIT SZ_1M - #define FLAG GUC_LOG_ALLOC_IN_MEGABYTE + #define LOG_UNIT SZ_1M + #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS #else - #define UNIT SZ_4K - #define FLAG 0 + #define LOG_UNIT SZ_4K + #define LOG_FLAG 0 + #endif + + #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) + #define CAPTURE_UNIT SZ_1M + #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS + #else + #define CAPTURE_UNIT SZ_4K + #define CAPTURE_FLAG 0 #endif BUILD_BUG_ON(!CRASH_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT)); + BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT)); BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, UNIT)); + BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); - BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) > + BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DEBUG_BUFFER_SIZE / UNIT - 1) > + BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); + BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > + (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | - FLAG | - ((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DEBUG_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | + CAPTURE_FLAG | + LOG_FLAG | + ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | + ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | + ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << GUC_LOG_CAPTURE_SHIFT) | (offset << GUC_LOG_BUF_ADDR_SHIFT); - #undef UNIT - #undef FLAG + #undef LOG_UNIT + #undef LOG_FLAG + #undef CAPTURE_UNIT + #undef CAPTURE_FLAG return flags; } @@ -262,6 +281,26 @@ static u32 guc_ctl_ads_flags(struct intel_guc *guc) return flags; } +static u32 guc_ctl_wa_flags(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + u32 flags = 0; + + /* Wa_22012773006:gen11,gen12 < XeHP */ + if (GRAPHICS_VER(gt->i915) >= 11 && + GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) + flags |= GUC_WA_POLLCS; + + return flags; +} + +static u32 guc_ctl_devid(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + + return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915); +} + /* * Initialise the GuC parameter block before starting the firmware * transfer. These parameters are read by the firmware on startup @@ -278,6 +317,8 @@ static void guc_init_params(struct intel_guc *guc) params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc); + params[GUC_CTL_WA] = guc_ctl_wa_flags(guc); + params[GUC_CTL_DEVID] = guc_ctl_devid(guc); for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]); @@ -515,9 +556,10 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, /* Make sure to handle only enabled messages */ msg = payload[0] & guc->msg_enabled_mask; - if (msg & (INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | - INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED)) - intel_guc_log_handle_flush_event(&guc->log); + if (msg & INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED) + drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC crash dump notification!\n"); + if (msg & INTEL_GUC_RECV_MSG_EXCEPTION) + drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC exception notification!\n"); return 0; } @@ -551,7 +593,7 @@ int intel_guc_suspend(struct intel_guc *guc) { int ret; u32 action[] = { - INTEL_GUC_ACTION_RESET_CLIENT, + INTEL_GUC_ACTION_CLIENT_SOFT_RESET, }; if (!intel_guc_is_ready(guc)) @@ -715,6 +757,56 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, return 0; } +static int __guc_action_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 value) +{ + u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_SELF_CFG), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) | + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32, lower_32_bits(value)), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64, upper_32_bits(value)), + }; + int ret; + + GEM_BUG_ON(len > 2); + GEM_BUG_ON(len == 1 && upper_32_bits(value)); + + /* Self config must go over MMIO */ + ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + + if (unlikely(ret < 0)) + return ret; + if (unlikely(ret > 1)) + return -EPROTO; + if (unlikely(!ret)) + return -ENOKEY; + + return 0; +} + +static int __guc_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 value) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int err = __guc_action_self_cfg(guc, key, len, value); + + if (unlikely(err)) + i915_probe_error(i915, "Unsuccessful self-config (%pe) key %#hx value %#llx\n", + ERR_PTR(err), key, value); + return err; +} + +int intel_guc_self_cfg32(struct intel_guc *guc, u16 key, u32 value) +{ + return __guc_self_cfg(guc, key, 1, value); +} + +int intel_guc_self_cfg64(struct intel_guc *guc, u16 key, u64 value) +{ + return __guc_self_cfg(guc, key, 2, value); +} + /** * intel_guc_load_status - dump information about GuC load status * @guc: the GuC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index f9240d4baa69..9d779de16613 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -119,6 +119,15 @@ struct intel_guc { * function as it might be in an atomic context (no sleeping) */ struct work_struct destroyed_worker; + /** + * @reset_fail_worker: worker to trigger a GT reset after an + * engine reset fails + */ + struct work_struct reset_fail_worker; + /** + * @reset_fail_mask: mask of engines that failed to reset + */ + intel_engine_mask_t reset_fail_mask; } submission_state; /** @@ -141,6 +150,13 @@ struct intel_guc { struct __guc_ads_blob *ads_blob; /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; + /** + * @ads_regset_count: number of save/restore registers in the ADS for + * each engine + */ + u32 ads_regset_count[I915_NUM_ENGINES]; + /** @ads_regset: save/restore regsets in the ADS */ + struct guc_mmio_reg *ads_regset; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; /** @ads_engine_usage_size: size of engine usage in the ADS */ @@ -206,6 +222,11 @@ struct intel_guc { * context usage for overflows. */ struct delayed_work work; + + /** + * @shift: Right shift value for the gpm timestamp + */ + u32 shift; } timestamp; #ifdef CONFIG_DRM_I915_SELFTEST @@ -328,6 +349,8 @@ int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, struct i915_vma **out_vma, void **out_vaddr); +int intel_guc_self_cfg32(struct intel_guc *guc, u16 key, u32 value); +int intel_guc_self_cfg64(struct intel_guc *guc, u16 key, u64 value); static inline bool intel_guc_is_supported(struct intel_guc *guc) { @@ -404,6 +427,8 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +int intel_guc_error_capture_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); void intel_guc_find_hung_context(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 162b89198567..7e41175618f5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -42,6 +42,10 @@ * +---------------------------------------+ * | padding | * +---------------------------------------+ <== 4K aligned + * | capture lists | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned * | private data | * +---------------------------------------+ * | padding | @@ -67,6 +71,12 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) return PAGE_ALIGN(guc->ads_golden_ctxt_size); } +static u32 guc_ads_capture_size(struct intel_guc *guc) +{ + /* FIXME: Allocate a proper capture list */ + return PAGE_ALIGN(PAGE_SIZE); +} + static u32 guc_ads_private_data_size(struct intel_guc *guc) { return PAGE_ALIGN(guc->fw.private_data_size); @@ -87,7 +97,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } -static u32 guc_ads_private_data_offset(struct intel_guc *guc) +static u32 guc_ads_capture_offset(struct intel_guc *guc) { u32 offset; @@ -97,6 +107,16 @@ static u32 guc_ads_private_data_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } +static u32 guc_ads_private_data_offset(struct intel_guc *guc) +{ + u32 offset; + + offset = guc_ads_capture_offset(guc) + + guc_ads_capture_size(guc); + + return PAGE_ALIGN(offset); +} + static u32 guc_ads_blob_size(struct intel_guc *guc) { return guc_ads_private_data_offset(guc) + @@ -188,14 +208,18 @@ static void guc_mapping_table_init(struct intel_gt *gt, /* * The save/restore register list must be pre-calculated to a temporary - * buffer of driver defined size before it can be generated in place - * inside the ADS. + * buffer before it can be copied inside the ADS. */ -#define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ struct temp_regset { + /* + * ptr to the section of the storage for the engine currently being + * worked on + */ struct guc_mmio_reg *registers; - u32 used; - u32 size; + /* ptr to the base of the allocated storage for all engines */ + struct guc_mmio_reg *storage; + u32 storage_used; + u32 storage_max; }; static int guc_mmio_reg_cmp(const void *a, const void *b) @@ -206,18 +230,44 @@ static int guc_mmio_reg_cmp(const void *a, const void *b) return (int)ra->offset - (int)rb->offset; } -static void guc_mmio_reg_add(struct temp_regset *regset, - u32 offset, u32 flags) +static struct guc_mmio_reg * __must_check +__mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg) { - u32 count = regset->used; + u32 pos = regset->storage_used; + struct guc_mmio_reg *slot; + + if (pos >= regset->storage_max) { + size_t size = ALIGN((pos + 1) * sizeof(*slot), PAGE_SIZE); + struct guc_mmio_reg *r = krealloc(regset->storage, + size, GFP_KERNEL); + if (!r) { + WARN_ONCE(1, "Incomplete regset list: can't add register (%d)\n", + -ENOMEM); + return ERR_PTR(-ENOMEM); + } + + regset->registers = r + (regset->registers - regset->storage); + regset->storage = r; + regset->storage_max = size / sizeof(*slot); + } + + slot = ®set->storage[pos]; + regset->storage_used++; + *slot = *reg; + + return slot; +} + +static long __must_check guc_mmio_reg_add(struct temp_regset *regset, + u32 offset, u32 flags) +{ + u32 count = regset->storage_used - (regset->registers - regset->storage); struct guc_mmio_reg reg = { .offset = offset, .flags = flags, }; struct guc_mmio_reg *slot; - GEM_BUG_ON(count >= regset->size); - /* * The mmio list is built using separate lists within the driver. * It's possible that at some point we may attempt to add the same @@ -226,11 +276,11 @@ static void guc_mmio_reg_add(struct temp_regset *regset, */ if (bsearch(®, regset->registers, count, sizeof(reg), guc_mmio_reg_cmp)) - return; + return 0; - slot = ®set->registers[count]; - regset->used++; - *slot = reg; + slot = __mmio_reg_add(regset, ®); + if (IS_ERR(slot)) + return PTR_ERR(slot); while (slot-- > regset->registers) { GEM_BUG_ON(slot[0].offset == slot[1].offset); @@ -239,6 +289,8 @@ static void guc_mmio_reg_add(struct temp_regset *regset, swap(slot[1], slot[0]); } + + return 0; } #define GUC_MMIO_REG_ADD(regset, reg, masked) \ @@ -246,62 +298,71 @@ static void guc_mmio_reg_add(struct temp_regset *regset, i915_mmio_reg_offset((reg)), \ (masked) ? GUC_REGSET_MASKED : 0) -static void guc_mmio_regset_init(struct temp_regset *regset, - struct intel_engine_cs *engine) +static int guc_mmio_regset_init(struct temp_regset *regset, + struct intel_engine_cs *engine) { const u32 base = engine->mmio_base; struct i915_wa_list *wal = &engine->wa_list; struct i915_wa *wa; unsigned int i; + int ret = 0; - regset->used = 0; + /* + * Each engine's registers point to a new start relative to + * storage + */ + regset->registers = regset->storage + regset->storage_used; - GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true); - GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); - GUC_MMIO_REG_ADD(regset, RING_IMR(base), false); + ret |= GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true); + ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); + ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg); + ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg); /* Be extra paranoid and include all whitelist registers. */ for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) - GUC_MMIO_REG_ADD(regset, - RING_FORCE_TO_NONPRIV(base, i), - false); + ret |= GUC_MMIO_REG_ADD(regset, + RING_FORCE_TO_NONPRIV(base, i), + false); /* add in local MOCS registers */ for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++) - GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false); + ret |= GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false); + + return ret ? -1 : 0; } -static int guc_mmio_reg_state_query(struct intel_guc *guc) +static long guc_mmio_reg_state_create(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; enum intel_engine_id id; - struct temp_regset temp_set; - u32 total; + struct temp_regset temp_set = {}; + long total = 0; + long ret; - /* - * Need to actually build the list in order to filter out - * duplicates and other such data dependent constructions. - */ - temp_set.size = MAX_MMIO_REGS; - temp_set.registers = kmalloc_array(temp_set.size, - sizeof(*temp_set.registers), - GFP_KERNEL); - if (!temp_set.registers) - return -ENOMEM; - - total = 0; for_each_engine(engine, gt, id) { - guc_mmio_regset_init(&temp_set, engine); - total += temp_set.used; + u32 used = temp_set.storage_used; + + ret = guc_mmio_regset_init(&temp_set, engine); + if (ret < 0) + goto fail_regset_init; + + guc->ads_regset_count[id] = temp_set.storage_used - used; + total += guc->ads_regset_count[id]; } - kfree(temp_set.registers); + guc->ads_regset = temp_set.storage; + + drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %zu KB for temporary ADS regset\n", + (temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10); return total * sizeof(struct guc_mmio_reg); + +fail_regset_init: + kfree(temp_set.storage); + return ret; } static void guc_mmio_reg_state_init(struct intel_guc *guc, @@ -309,40 +370,38 @@ static void guc_mmio_reg_state_init(struct intel_guc *guc, { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; + struct guc_mmio_reg *ads_registers; enum intel_engine_id id; - struct temp_regset temp_set; - struct guc_mmio_reg_set *ads_reg_set; u32 addr_ggtt, offset; - u8 guc_class; offset = guc_ads_regset_offset(guc); addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; - temp_set.registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset); - temp_set.size = guc->ads_regset_size / sizeof(temp_set.registers[0]); + ads_registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset); + + memcpy(ads_registers, guc->ads_regset, guc->ads_regset_size); for_each_engine(engine, gt, id) { + u32 count = guc->ads_regset_count[id]; + struct guc_mmio_reg_set *ads_reg_set; + u8 guc_class; + /* Class index is checked in class converter */ GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS); guc_class = engine_class_to_guc_class(engine->class); ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance]; - guc_mmio_regset_init(&temp_set, engine); - if (!temp_set.used) { + if (!count) { ads_reg_set->address = 0; ads_reg_set->count = 0; continue; } ads_reg_set->address = addr_ggtt; - ads_reg_set->count = temp_set.used; + ads_reg_set->count = count; - temp_set.size -= temp_set.used; - temp_set.registers += temp_set.used; - addr_ggtt += temp_set.used * sizeof(struct guc_mmio_reg); + addr_ggtt += count * sizeof(struct guc_mmio_reg); } - - GEM_BUG_ON(temp_set.size); } static void fill_engine_enable_masks(struct intel_gt *gt, @@ -501,6 +560,26 @@ static void guc_init_golden_context(struct intel_guc *guc) GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); } +static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob *blob) +{ + int i, j; + u32 addr_ggtt, offset; + + offset = guc_ads_capture_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + + /* FIXME: Populate a proper capture list */ + + for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { + for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { + blob->ads.capture_instance[i][j] = addr_ggtt; + blob->ads.capture_class[i][j] = addr_ggtt; + } + + blob->ads.capture_global[i] = addr_ggtt; + } +} + static void __guc_ads_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -534,6 +613,9 @@ static void __guc_ads_init(struct intel_guc *guc) base = intel_guc_ggtt_offset(guc, guc->ads_vma); + /* Capture list for hang debug */ + guc_capture_list_init(guc, blob); + /* ADS */ blob->ads.scheduler_policies = base + ptr_offset(blob, policies); blob->ads.gt_system_info = base + ptr_offset(blob, system_info); @@ -561,8 +643,11 @@ int intel_guc_ads_create(struct intel_guc *guc) GEM_BUG_ON(guc->ads_vma); - /* Need to calculate the reg state size dynamically: */ - ret = guc_mmio_reg_state_query(guc); + /* + * Create reg state size dynamically on system memory to be copied to + * the final ads blob on gt init/reset + */ + ret = guc_mmio_reg_state_create(guc); if (ret < 0) return ret; guc->ads_regset_size = ret; @@ -602,6 +687,7 @@ void intel_guc_ads_destroy(struct intel_guc *guc) { i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); guc->ads_blob = NULL; + kfree(guc->ads_regset); } static void guc_ads_private_data_reset(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index aa6dd6415202..2f7fc87a78e1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -112,18 +112,6 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) init_waitqueue_head(&ct->wq); } -static inline const char *guc_ct_buffer_type_to_str(u32 type) -{ - switch (type) { - case GUC_CTB_TYPE_HOST2GUC: - return "SEND"; - case GUC_CTB_TYPE_GUC2HOST: - return "RECV"; - default: - return "<invalid>"; - } -} - static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc) { memset(desc, 0, sizeof(*desc)); @@ -156,71 +144,65 @@ static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb, guc_ct_buffer_reset(ctb); } -static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type, - u32 desc_addr, u32 buff_addr, u32 size) +static int guc_action_control_ctb(struct intel_guc *guc, u32 control) { - u32 request[HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN] = { + u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = { FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_REGISTER_CTB), - FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE, size / SZ_4K - 1) | - FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE, type), - FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr), - FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr), + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_CONTROL_CTB), + FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL, control), }; int ret; - GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); - GEM_BUG_ON(size % SZ_4K); + GEM_BUG_ON(control != GUC_CTB_CONTROL_DISABLE && control != GUC_CTB_CONTROL_ENABLE); - /* CT registration must go over MMIO */ + /* CT control must go over MMIO */ ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); return ret > 0 ? -EPROTO : ret; } -static int ct_register_buffer(struct intel_guc_ct *ct, u32 type, - u32 desc_addr, u32 buff_addr, u32 size) +static int ct_control_enable(struct intel_guc_ct *ct, bool enable) { int err; - err = i915_inject_probe_error(guc_to_gt(ct_to_guc(ct))->i915, -ENXIO); + err = guc_action_control_ctb(ct_to_guc(ct), enable ? + GUC_CTB_CONTROL_ENABLE : GUC_CTB_CONTROL_DISABLE); if (unlikely(err)) - return err; + CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n", + enabledisable(enable), ERR_PTR(err)); - err = guc_action_register_ct_buffer(ct_to_guc(ct), type, - desc_addr, buff_addr, size); - if (unlikely(err)) - CT_ERROR(ct, "Failed to register %s buffer (%pe)\n", - guc_ct_buffer_type_to_str(type), ERR_PTR(err)); return err; } -static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type) +static int ct_register_buffer(struct intel_guc_ct *ct, bool send, + u32 desc_addr, u32 buff_addr, u32 size) { - u32 request[HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN] = { - FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | - FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB), - FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type), - }; - int ret; - - GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); - - /* CT deregistration must go over MMIO */ - ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); + int err; - return ret > 0 ? -EPROTO : ret; -} + err = intel_guc_self_cfg64(ct_to_guc(ct), send ? + GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY : + GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY, + desc_addr); + if (unlikely(err)) + goto failed; -static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) -{ - int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type); + err = intel_guc_self_cfg64(ct_to_guc(ct), send ? + GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY : + GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY, + buff_addr); + if (unlikely(err)) + goto failed; + err = intel_guc_self_cfg32(ct_to_guc(ct), send ? + GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY : + GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY, + size); if (unlikely(err)) - CT_ERROR(ct, "Failed to deregister %s buffer (%pe)\n", - guc_ct_buffer_type_to_str(type), ERR_PTR(err)); +failed: + CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n", + send ? "SEND" : "RECV", ERR_PTR(err)); + return err; } @@ -308,7 +290,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct) int intel_guc_ct_enable(struct intel_guc_ct *ct) { struct intel_guc *guc = ct_to_guc(ct); - u32 base, desc, cmds; + u32 base, desc, cmds, size; void *blob; int err; @@ -333,27 +315,27 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) */ desc = base + ptrdiff(ct->ctbs.recv.desc, blob); cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob); - err = ct_register_buffer(ct, GUC_CTB_TYPE_GUC2HOST, - desc, cmds, ct->ctbs.recv.size * 4); - + size = ct->ctbs.recv.size * 4; + err = ct_register_buffer(ct, false, desc, cmds, size); if (unlikely(err)) goto err_out; desc = base + ptrdiff(ct->ctbs.send.desc, blob); cmds = base + ptrdiff(ct->ctbs.send.cmds, blob); - err = ct_register_buffer(ct, GUC_CTB_TYPE_HOST2GUC, - desc, cmds, ct->ctbs.send.size * 4); + size = ct->ctbs.send.size * 4; + err = ct_register_buffer(ct, true, desc, cmds, size); + if (unlikely(err)) + goto err_out; + err = ct_control_enable(ct, true); if (unlikely(err)) - goto err_deregister; + goto err_out; ct->enabled = true; ct->stall_time = KTIME_MAX; return 0; -err_deregister: - ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST); err_out: CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err)); return err; @@ -372,8 +354,7 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct) ct->enabled = false; if (intel_guc_is_fw_running(guc)) { - ct_deregister_buffer(ct, GUC_CTB_TYPE_HOST2GUC); - ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST); + ct_control_enable(ct, false); } } @@ -662,6 +643,7 @@ static int ct_send(struct intel_guc_ct *ct, struct ct_request request; unsigned long flags; unsigned int sleep_period_ms = 1; + bool send_again; u32 fence; int err; @@ -671,6 +653,9 @@ static int ct_send(struct intel_guc_ct *ct, GEM_BUG_ON(!response_buf && response_buf_size); might_sleep(); +resend: + send_again = false; + /* * We use a lazy spin wait loop here as we believe that if the CT * buffers are sized correctly the flow control condition should be @@ -725,6 +710,13 @@ retry: goto unlink; } + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + CT_DEBUG(ct, "retrying request %#x (%u)\n", *action, + FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status)); + send_again = true; + goto unlink; + } + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { err = -EIO; goto unlink; @@ -747,6 +739,9 @@ unlink: list_del(&request.link); spin_unlock_irqrestore(&ct->requests.lock, flags); + if (unlikely(send_again)) + goto resend; + return err; } @@ -789,7 +784,7 @@ static struct ct_incoming_msg *ct_alloc_msg(u32 num_dwords) { struct ct_incoming_msg *msg; - msg = kmalloc(sizeof(*msg) + sizeof(u32) * num_dwords, GFP_ATOMIC); + msg = kmalloc(struct_size(msg, msg, num_dwords), GFP_ATOMIC); if (msg) msg->size = num_dwords; return msg; @@ -918,6 +913,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r GEM_BUG_ON(len < GUC_HXG_MSG_MIN_LEN); GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]) != GUC_HXG_ORIGIN_GUC); GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_SUCCESS && + FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_NO_RESPONSE_RETRY && FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE); CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]); @@ -990,9 +986,27 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: ret = intel_guc_context_reset_process_msg(guc, payload, len); break; + case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION: + ret = intel_guc_error_capture_process_msg(guc, payload, len); + if (unlikely(ret)) + CT_ERROR(ct, "error capture notification failed %x %*ph\n", + action, 4 * len, payload); + break; case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: ret = intel_guc_engine_failure_process_msg(guc, payload, len); break; + case INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE: + intel_guc_log_handle_flush_event(&guc->log); + ret = 0; + break; + case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED: + CT_ERROR(ct, "Received GuC crash dump notification!\n"); + ret = 0; + break; + case INTEL_GUC_ACTION_NOTIFY_EXCEPTION: + CT_ERROR(ct, "Received GuC exception notification!\n"); + ret = 0; + break; default: ret = -EOPNOTSUPP; break; @@ -1098,6 +1112,7 @@ static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) break; case GUC_HXG_TYPE_RESPONSE_SUCCESS: case GUC_HXG_TYPE_RESPONSE_FAILURE: + case GUC_HXG_TYPE_NO_RESPONSE_RETRY: err = ct_handle_response(ct, msg); break; default: diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index dcb51b53b495..a0372735cddb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -16,13 +16,15 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) { - u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES | - GUC_ENABLE_READ_CACHE_LOGIC | - GUC_ENABLE_MIA_CACHING | + u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | GUC_ENABLE_MIA_CLOCK_GATING; + if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 50)) + shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES | + GUC_ENABLE_MIA_CACHING; + /* Must program this register before loading the ucode with DMA */ intel_uncore_write(uncore, GUC_SHIM_CONTROL, shim_flags); @@ -91,11 +93,10 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, static inline bool guc_ready(struct intel_uncore *uncore, u32 *status) { u32 val = intel_uncore_read(uncore, GUC_STATUS); - u32 uk_val = val & GS_UKERNEL_MASK; + u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val); *status = val; - return (uk_val == GS_UKERNEL_READY) || - ((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE)); + return uk_val == INTEL_GUC_LOAD_STATUS_READY; } static int guc_wait_ucode(struct intel_uncore *uncore) @@ -106,17 +107,26 @@ static int guc_wait_ucode(struct intel_uncore *uncore) /* * Wait for the GuC to start up. * NB: Docs recommend not using the interrupt for completion. - * Measurements indicate this should take no more than 20ms, so a + * Measurements indicate this should take no more than 20ms + * (assuming the GT clock is at maximum frequency). So, a * timeout here indicates that the GuC has failed and is unusable. * (Higher levels of the driver may decide to reset the GuC and * attempt the ucode load again if this happens.) + * + * FIXME: There is a known (but exceedingly unlikely) race condition + * where the asynchronous frequency management code could reduce + * the GT clock while a GuC reload is in progress (during a full + * GT reset). A fix is in progress but there are complex locking + * issues to be resolved. In the meantime bump the timeout to + * 200ms. Even at slowest clock, this should be sufficient. And + * in the working case, a larger timeout makes no difference. */ - ret = wait_for(guc_ready(uncore, &status), 100); + ret = wait_for(guc_ready(uncore, &status), 200); if (ret) { struct drm_device *drm = &uncore->i915->drm; - drm_dbg(drm, "GuC load failed: status = 0x%08X\n", status); - drm_dbg(drm, "GuC load failed: status: Reset = %d, " + drm_info(drm, "GuC load failed: status = 0x%08X\n", status); + drm_info(drm, "GuC load failed: status: Reset = %d, " "BootROM = 0x%02X, UKernel = 0x%02X, " "MIA = 0x%02X, Auth = 0x%02X\n", REG_FIELD_GET(GS_MIA_IN_RESET, status), @@ -126,13 +136,13 @@ static int guc_wait_ucode(struct intel_uncore *uncore) REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - drm_dbg(drm, "GuC firmware signature verification failed\n"); + drm_info(drm, "GuC firmware signature verification failed\n"); ret = -ENOEXEC; } - if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) { - drm_dbg(drm, "GuC firmware exception. EIP: %#x\n", - intel_uncore_read(uncore, SOFT_SCRATCH(13))); + if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) { + drm_info(drm, "GuC firmware exception. EIP: %#x\n", + intel_uncore_read(uncore, SOFT_SCRATCH(13))); ret = -ENXIO; } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 7072e30e99f4..6a4612a852e2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -16,6 +16,7 @@ #include "abi/guc_errors_abi.h" #include "abi/guc_communication_mmio_abi.h" #include "abi/guc_communication_ctb_abi.h" +#include "abi/guc_klvs_abi.h" #include "abi/guc_messages_abi.h" /* Payload length only i.e. don't include G2H header length */ @@ -84,19 +85,24 @@ #define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7) #define GUC_CTL_LOG_PARAMS 0 -#define GUC_LOG_VALID (1 << 0) -#define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1) -#define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) +#define GUC_LOG_VALID BIT(0) +#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) +#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) +#define GUC_LOG_LOG_ALLOC_UNITS BIT(3) #define GUC_LOG_CRASH_SHIFT 4 #define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) #define GUC_LOG_DEBUG_SHIFT 6 #define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) +#define GUC_LOG_CAPTURE_SHIFT 10 +#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) #define GUC_LOG_BUF_ADDR_SHIFT 12 #define GUC_CTL_WA 1 +#define GUC_WA_POLLCS BIT(18) + #define GUC_CTL_FEATURE 2 -#define GUC_CTL_DISABLE_SCHEDULER (1 << 14) #define GUC_CTL_ENABLE_SLPC BIT(2) +#define GUC_CTL_DISABLE_SCHEDULER BIT(14) #define GUC_CTL_DEBUG 3 #define GUC_LOG_VERBOSITY_SHIFT 0 @@ -116,6 +122,8 @@ #define GUC_ADS_ADDR_SHIFT 1 #define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) +#define GUC_CTL_DEVID 5 + #define GUC_CTL_MAX_DWORDS (SOFT_SCRATCH_COUNT - 2) /* [1..14] */ /* Generic GT SysInfo data types */ @@ -263,7 +271,10 @@ struct guc_mmio_reg { u32 offset; u32 value; u32 flags; -#define GUC_REGSET_MASKED (1 << 0) + u32 mask; +#define GUC_REGSET_MASKED BIT(0) +#define GUC_REGSET_MASKED_WITH_VALUE BIT(2) +#define GUC_REGSET_RESTORE_ONLY BIT(3) } __packed; /* GuC register sets */ @@ -280,6 +291,12 @@ struct guc_gt_system_info { u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; } __packed; +enum { + GUC_CAPTURE_LIST_INDEX_PF = 0, + GUC_CAPTURE_LIST_INDEX_VF = 1, + GUC_CAPTURE_LIST_INDEX_MAX = 2, +}; + /* GuC Additional Data Struct */ struct guc_ads { struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; @@ -291,7 +308,11 @@ struct guc_ads { u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; u32 private_data; - u32 reserved[15]; + u32 reserved2; + u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX]; + u32 reserved[14]; } __packed; /* Engine usage stats */ @@ -312,6 +333,7 @@ struct guc_engine_usage { enum guc_log_buffer_type { GUC_DEBUG_LOG_BUFFER, GUC_CRASH_DUMP_LOG_BUFFER, + GUC_CAPTURE_LOG_BUFFER, GUC_MAX_LOG_BUFFER }; @@ -342,6 +364,7 @@ struct guc_log_buffer_state { u32 write_ptr; u32 size; u32 sampled_write_ptr; + u32 wrap_offset; union { struct { u32 flush_to_file:1; @@ -382,7 +405,7 @@ struct guc_shared_ctx_data { /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ enum intel_guc_recv_message { INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), - INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER = BIT(3) + INTEL_GUC_RECV_MSG_EXCEPTION = BIT(30), }; #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 7b0b43e87244..b53f61f3101f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -56,20 +56,6 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable, return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static void guc_log_enable_flush_events(struct intel_guc_log *log) -{ - intel_guc_enable_msg(log_to_guc(log), - INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | - INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED); -} - -static void guc_log_disable_flush_events(struct intel_guc_log *log) -{ - intel_guc_disable_msg(log_to_guc(log), - INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | - INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED); -} - /* * Sub buffer switch callback. Called whenever relay has to switch to a new * sub buffer, relay stays on the same sub buffer if 0 is returned. @@ -202,6 +188,8 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) return DEBUG_BUFFER_SIZE; case GUC_CRASH_DUMP_LOG_BUFFER: return CRASH_BUFFER_SIZE; + case GUC_CAPTURE_LOG_BUFFER: + return CAPTURE_BUFFER_SIZE; default: MISSING_CASE(type); } @@ -464,14 +452,19 @@ int intel_guc_log_create(struct intel_guc_log *log) * +-------------------------------+ 32B * | Debug state header | * +-------------------------------+ 64B + * | Capture state header | + * +-------------------------------+ 96B * | | * +===============================+ PAGE_SIZE (4KB) * | Crash Dump logs | * +===============================+ + CRASH_SIZE * | Debug logs | * +===============================+ + DEBUG_SIZE + * | Capture logs | + * +===============================+ + CAPTURE_SIZE */ - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE; + guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + + CAPTURE_BUFFER_SIZE; vma = intel_guc_allocate_vma(guc, guc_log_size); if (IS_ERR(vma)) { @@ -593,8 +586,6 @@ int intel_guc_log_relay_start(struct intel_guc_log *log) if (log->relay.started) return -EEXIST; - guc_log_enable_flush_events(log); - /* * When GuC is logging without us relaying to userspace, we're ignoring * the flush notification. This means that we need to unconditionally @@ -641,7 +632,6 @@ static void guc_log_relay_stop(struct intel_guc_log *log) if (!log->relay.started) return; - guc_log_disable_flush_events(log); intel_synchronize_irq(i915); flush_work(&log->relay.flush_work); @@ -662,7 +652,8 @@ void intel_guc_log_relay_close(struct intel_guc_log *log) void intel_guc_log_handle_flush_event(struct intel_guc_log *log) { - queue_work(system_highpri_wq, &log->relay.flush_work); + if (log->relay.started) + queue_work(system_highpri_wq, &log->relay.flush_work); } static const char * @@ -673,6 +664,8 @@ stringify_guc_log_type(enum guc_log_buffer_type type) return "DEBUG"; case GUC_CRASH_DUMP_LOG_BUFFER: return "CRASH"; + case GUC_CAPTURE_LOG_BUFFER: + return "CAPTURE"; default: MISSING_CASE(type); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index fe6ab7550a14..d7e1b6471fed 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -18,12 +18,15 @@ struct intel_guc; #if defined(CONFIG_DRM_I915_DEBUG_GUC) #define CRASH_BUFFER_SIZE SZ_2M #define DEBUG_BUFFER_SIZE SZ_16M +#define CAPTURE_BUFFER_SIZE SZ_4M #elif defined(CONFIG_DRM_I915_DEBUG_GEM) #define CRASH_BUFFER_SIZE SZ_1M #define DEBUG_BUFFER_SIZE SZ_2M +#define CAPTURE_BUFFER_SIZE SZ_1M #else #define CRASH_BUFFER_SIZE SZ_8K #define DEBUG_BUFFER_SIZE SZ_64K +#define CAPTURE_BUFFER_SIZE SZ_16K #endif /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 85846c5570c5..66027a42cda9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -22,10 +22,6 @@ #define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) #define GS_UKERNEL_SHIFT 8 #define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) -#define GS_UKERNEL_LAPIC_DONE (0x30 << GS_UKERNEL_SHIFT) -#define GS_UKERNEL_DPC_ERROR (0x60 << GS_UKERNEL_SHIFT) -#define GS_UKERNEL_EXCEPTION (0x70 << GS_UKERNEL_SHIFT) -#define GS_UKERNEL_READY (0xF0 << GS_UKERNEL_SHIFT) #define GS_MIA_SHIFT 16 #define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) #define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT) @@ -98,6 +94,9 @@ #define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) #define GUC_GEN10_SHIM_WC_ENABLE (1<<21) +#define GUC_SHIM_CONTROL2 _MMIO(0xc068) +#define GUC_IS_PRIVILEGED (1<<29) + #define GUC_SEND_INTERRUPT _MMIO(0xc4c8) #define GUC_SEND_TRIGGER (1<<0) #define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 0cb51c5bb765..b3a429a92c0d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1115,6 +1115,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) if (new_start == lower_32_bits(*prev_start)) return; + /* + * When gt is unparked, we update the gt timestamp and start the ping + * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt + * is unparked, all switched in contexts will have a start time that is + * within +/- POLL_TIME_CLKS of the most recent gt_stamp. + * + * If neither gt_stamp nor new_start has rolled over, then the + * gt_stamp_hi does not need to be adjusted, however if one of them has + * rolled over, we need to adjust gt_stamp_hi accordingly. + * + * The below conditions address the cases of new_start rollover and + * gt_stamp_last rollover respectively. + */ if (new_start < gt_stamp_last && (new_start - gt_stamp_last) <= POLL_TIME_CLKS) gt_stamp_hi++; @@ -1126,17 +1139,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) *prev_start = ((u64)gt_stamp_hi << 32) | new_start; } -static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +/* + * GuC updates shared memory and KMD reads it. Since this is not synchronized, + * we run into a race where the value read is inconsistent. Sometimes the + * inconsistency is in reading the upper MSB bytes of the last_in value when + * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper + * 24 bits are zero. Since these are non-zero values, it is non-trivial to + * determine validity of these values. Instead we read the values multiple times + * until they are consistent. In test runs, 3 attempts results in consistent + * values. The upper bound is set to 6 attempts and may need to be tuned as per + * any new occurences. + */ +static void __get_engine_usage_record(struct intel_engine_cs *engine, + u32 *last_in, u32 *id, u32 *total) { struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine); + int i = 0; + + do { + *last_in = READ_ONCE(rec->last_switch_in_stamp); + *id = READ_ONCE(rec->current_context_index); + *total = READ_ONCE(rec->total_runtime); + + if (READ_ONCE(rec->last_switch_in_stamp) == *last_in && + READ_ONCE(rec->current_context_index) == *id && + READ_ONCE(rec->total_runtime) == *total) + break; + } while (++i < 6); +} + +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +{ struct intel_engine_guc_stats *stats = &engine->stats.guc; struct intel_guc *guc = &engine->gt->uc.guc; - u32 last_switch = rec->last_switch_in_stamp; - u32 ctx_id = rec->current_context_index; - u32 total = rec->total_runtime; + u32 last_switch, ctx_id, total; lockdep_assert_held(&guc->timestamp.lock); + __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); + stats->running = ctx_id != ~0U && last_switch; if (stats->running) __extend_last_switch(guc, &stats->start_gt_clk, last_switch); @@ -1151,23 +1192,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) } } -static void guc_update_pm_timestamp(struct intel_guc *guc, - struct intel_engine_cs *engine, - ktime_t *now) +static u32 gpm_timestamp_shift(struct intel_gt *gt) +{ + intel_wakeref_t wakeref; + u32 reg, shift; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); + + shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; + + return 3 - shift; +} + +static u64 gpm_timestamp(struct intel_gt *gt) { - u32 gt_stamp_now, gt_stamp_hi; + u32 lo, hi, old_hi, loop = 0; + + hi = intel_uncore_read(gt->uncore, MISC_STATUS1); + do { + lo = intel_uncore_read(gt->uncore, MISC_STATUS0); + old_hi = hi; + hi = intel_uncore_read(gt->uncore, MISC_STATUS1); + } while (old_hi != hi && loop++ < 2); + + return ((u64)hi << 32) | lo; +} + +static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) +{ + struct intel_gt *gt = guc_to_gt(guc); + u32 gt_stamp_lo, gt_stamp_hi; + u64 gpm_ts; lockdep_assert_held(&guc->timestamp.lock); gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); - gt_stamp_now = intel_uncore_read(engine->uncore, - RING_TIMESTAMP(engine->mmio_base)); + gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift; + gt_stamp_lo = lower_32_bits(gpm_ts); *now = ktime_get(); - if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp)) + if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) gt_stamp_hi++; - guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now; + guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; } /* @@ -1210,8 +1279,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) if (!in_reset && intel_gt_pm_get_if_awake(gt)) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; + /* + * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - + * start_gt_clk' calculation below for active engines. + */ guc_update_engine_gt_clks(engine); - guc_update_pm_timestamp(guc, engine, now); + guc_update_pm_timestamp(guc, now); intel_gt_pm_put_async(gt); if (i915_reset_count(gpu_error) != reset_count) { *stats = stats_saved; @@ -1243,8 +1316,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) spin_lock_irqsave(&guc->timestamp.lock, flags); + guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { - guc_update_pm_timestamp(guc, engine, &unused); guc_update_engine_gt_clks(engine); engine->stats.guc.prev_total = 0; } @@ -1261,10 +1334,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc) ktime_t unused; spin_lock_irqsave(&guc->timestamp.lock, flags); - for_each_engine(engine, gt, id) { - guc_update_pm_timestamp(guc, engine, &unused); + + guc_update_pm_timestamp(guc, &unused); + for_each_engine(engine, gt, id) guc_update_engine_gt_clks(engine); - } + spin_unlock_irqrestore(&guc->timestamp.lock, flags); } @@ -1337,10 +1411,15 @@ void intel_guc_busyness_park(struct intel_gt *gt) void intel_guc_busyness_unpark(struct intel_gt *gt) { struct intel_guc *guc = >->uc.guc; + unsigned long flags; + ktime_t unused; if (!guc_submission_initialized(guc)) return; + spin_lock_irqsave(&guc->timestamp.lock, flags); + guc_update_pm_timestamp(guc, &unused); + spin_unlock_irqrestore(&guc->timestamp.lock, flags); mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); } @@ -1351,7 +1430,8 @@ submission_disabled(struct intel_guc *guc) struct i915_sched_engine * const sched_engine = guc->sched_engine; return unlikely(!sched_engine || - !__tasklet_is_enabled(&sched_engine->tasklet)); + !__tasklet_is_enabled(&sched_engine->tasklet) || + intel_gt_is_wedged(guc_to_gt(guc))); } static void disable_submission(struct intel_guc *guc) @@ -1396,8 +1476,6 @@ static void guc_flush_destroyed_contexts(struct intel_guc *guc); void intel_guc_submission_reset_prepare(struct intel_guc *guc) { - int i; - if (unlikely(!guc_submission_initialized(guc))) { /* Reset called during driver load? GuC not yet initialised! */ return; @@ -1414,21 +1492,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) guc_flush_submissions(guc); guc_flush_destroyed_contexts(guc); - - /* - * Handle any outstanding G2Hs before reset. Call IRQ handler directly - * each pass as interrupt have been disabled. We always scrub for - * outstanding G2H as it is possible for outstanding_submission_g2h to - * be incremented after the context state update. - */ - for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) { - intel_guc_to_host_event_handler(guc); -#define wait_for_reset(guc, wait_var) \ - intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) - do { - wait_for_reset(guc, &guc->outstanding_submission_g2h); - } while (!list_empty(&guc->ct.requests.incoming)); - } + flush_work(&guc->ct.requests.worker); scrub_guc_desc_for_outstanding_g2h(guc); } @@ -1533,7 +1597,6 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) unsigned long flags; u32 head; int i, number_children = ce->parallel.number_children; - bool skip = false; struct intel_context *parent = ce; GEM_BUG_ON(intel_context_is_child(ce)); @@ -1544,23 +1607,10 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) * GuC will implicitly mark the context as non-schedulable when it sends * the reset notification. Make sure our state reflects this change. The * context will be marked enabled on resubmission. - * - * XXX: If the context is reset as a result of the request cancellation - * this G2H is received after the schedule disable complete G2H which is - * wrong as this creates a race between the request cancellation code - * re-submitting the context and this G2H handler. This is a bug in the - * GuC but can be worked around in the meantime but converting this to a - * NOP if a pending enable is in flight as this indicates that a request - * cancellation has occurred. */ spin_lock_irqsave(&ce->guc_state.lock, flags); - if (likely(!context_pending_enable(ce))) - clr_context_enabled(ce); - else - skip = true; + clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (unlikely(skip)) - goto out_put; /* * For each context in the relationship find the hanging request @@ -1592,7 +1642,6 @@ next_context: } __unwind_incomplete_requests(parent); -out_put: intel_context_put(parent); } @@ -1727,7 +1776,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) { /* Reset called during driver load or during wedge? */ if (unlikely(!guc_submission_initialized(guc) || - test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) { + intel_gt_is_wedged(guc_to_gt(guc)))) { return; } @@ -1746,6 +1795,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) } static void destroyed_worker_func(struct work_struct *w); +static void reset_fail_worker_func(struct work_struct *w); /* * Set up the memory resources to be shared with the GuC (via the GGTT) @@ -1776,6 +1826,8 @@ int intel_guc_submission_init(struct intel_guc *guc) INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); INIT_WORK(&guc->submission_state.destroyed_worker, destroyed_worker_func); + INIT_WORK(&guc->submission_state.reset_fail_worker, + reset_fail_worker_func); guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); @@ -1785,6 +1837,7 @@ int intel_guc_submission_init(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; + guc->timestamp.shift = gpm_timestamp_shift(gt); return 0; } @@ -2531,12 +2584,6 @@ static void guc_context_cancel_request(struct intel_context *ce, true); } - /* - * XXX: Racey if context is reset, see comment in - * __guc_reset_context(). - */ - flush_work(&ce_to_guc(ce)->ct.requests.worker); - guc_context_unblock(block_context); intel_context_put(ce); } @@ -3250,8 +3297,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) GEM_BUG_ON(!intel_context_is_parent(ce)); GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); - if (ce->parallel.last_rq) - i915_request_put(ce->parallel.last_rq); unpin_guc_id(guc, ce); lrc_unpin(ce); } @@ -3973,14 +4018,14 @@ static void guc_handle_context_reset(struct intel_guc *guc, { trace_intel_context_reset(ce); - /* - * XXX: Racey if request cancellation has occurred, see comment in - * __guc_reset_context(). - */ - if (likely(!intel_context_is_banned(ce) && - !context_blocked(ce))) { + if (likely(!intel_context_is_banned(ce))) { capture_error_state(guc, ce); guc_context_replay(ce); + } else { + drm_err(&guc_to_gt(guc)->i915->drm, + "Invalid GuC engine reset notificaion for 0x%04X on %s: banned = %d, blocked = %d", + ce->guc_id.id, ce->engine->name, intel_context_is_banned(ce), + context_blocked(ce)); } } @@ -4019,6 +4064,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, return 0; } +int intel_guc_error_capture_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len) +{ + int status; + + if (unlikely(len != 1)) { + drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + status = msg[0]; + drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status); + + /* FIXME: Do something with the capture */ + + return 0; +} + static struct intel_engine_cs * guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) { @@ -4031,6 +4094,26 @@ guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) return gt->engine_class[engine_class][instance]; } +static void reset_fail_worker_func(struct work_struct *w) +{ + struct intel_guc *guc = container_of(w, struct intel_guc, + submission_state.reset_fail_worker); + struct intel_gt *gt = guc_to_gt(guc); + intel_engine_mask_t reset_fail_mask; + unsigned long flags; + + spin_lock_irqsave(&guc->submission_state.lock, flags); + reset_fail_mask = guc->submission_state.reset_fail_mask; + guc->submission_state.reset_fail_mask = 0; + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + + if (likely(reset_fail_mask)) + intel_gt_handle_error(gt, reset_fail_mask, + I915_ERROR_CAPTURE, + "GuC failed to reset engine mask=0x%x\n", + reset_fail_mask); +} + int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) { @@ -4038,6 +4121,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, struct intel_gt *gt = guc_to_gt(guc); u8 guc_class, instance; u32 reason; + unsigned long flags; if (unlikely(len != 3)) { drm_err(>->i915->drm, "Invalid length %u", len); @@ -4062,10 +4146,15 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", guc_class, instance, engine->name, reason); - intel_gt_handle_error(gt, engine->mask, - I915_ERROR_CAPTURE, - "GuC failed to reset %s (reason=0x%08x)\n", - engine->name, reason); + spin_lock_irqsave(&guc->submission_state.lock, flags); + guc->submission_state.reset_fail_mask |= engine->mask; + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + + /* + * A GT reset flushes this worker queue (G2H handler) so we must use + * another worker to trigger a GT reset. + */ + queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); return 0; } @@ -4434,27 +4523,31 @@ static inline bool skip_handshake(struct i915_request *rq) return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); } +#define NON_SKIP_LEN 6 static u32 * emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, u32 *cs) { struct intel_context *ce = rq->context; + __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; + __maybe_unused u32 *start_fini_breadcrumb_cs = cs; GEM_BUG_ON(!intel_context_is_parent(ce)); if (unlikely(skip_handshake(rq))) { /* * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, - * the -6 comes from the length of the emits below. + * the NON_SKIP_LEN comes from the length of the emits below. */ memset(cs, 0, sizeof(u32) * - (ce->engine->emit_fini_breadcrumb_dw - 6)); - cs += ce->engine->emit_fini_breadcrumb_dw - 6; + (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); + cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; } else { cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); } /* Emit fini breadcrumb */ + before_fini_breadcrumb_user_interrupt_cs = cs; cs = gen8_emit_ggtt_write(cs, rq->fence.seqno, i915_request_active_timeline(rq)->hwsp_offset, @@ -4464,6 +4557,12 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; + /* Ensure our math for skip + emit is correct */ + GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != + cs); + GEM_BUG_ON(start_fini_breadcrumb_cs + + ce->engine->emit_fini_breadcrumb_dw != cs); + rq->tail = intel_ring_offset(rq, cs); return cs; @@ -4506,22 +4605,25 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, u32 *cs) { struct intel_context *ce = rq->context; + __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; + __maybe_unused u32 *start_fini_breadcrumb_cs = cs; GEM_BUG_ON(!intel_context_is_child(ce)); if (unlikely(skip_handshake(rq))) { /* * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, - * the -6 comes from the length of the emits below. + * the NON_SKIP_LEN comes from the length of the emits below. */ memset(cs, 0, sizeof(u32) * - (ce->engine->emit_fini_breadcrumb_dw - 6)); - cs += ce->engine->emit_fini_breadcrumb_dw - 6; + (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); + cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; } else { cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); } /* Emit fini breadcrumb */ + before_fini_breadcrumb_user_interrupt_cs = cs; cs = gen8_emit_ggtt_write(cs, rq->fence.seqno, i915_request_active_timeline(rq)->hwsp_offset, @@ -4531,11 +4633,19 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; + /* Ensure our math for skip + emit is correct */ + GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != + cs); + GEM_BUG_ON(start_fini_breadcrumb_cs + + ce->engine->emit_fini_breadcrumb_dw != cs); + rq->tail = intel_ring_offset(rq, cs); return cs; } +#undef NON_SKIP_LEN + static struct intel_context * guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, unsigned long flags) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d10b227ac4aa..556829de9c17 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -124,6 +124,7 @@ int intel_huc_auth(struct intel_huc *huc) } intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); + drm_info(>->i915->drm, "HuC authenticated\n"); return 0; fail: diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 09ed29df67bc..da199aa6989f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -432,6 +432,15 @@ static int __uc_check_hw(struct intel_uc *uc) return 0; } +static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw) +{ + struct drm_i915_private *i915 = uc_to_gt(uc)->i915; + + drm_info(&i915->drm, "%s firmware %s version %u.%u\n", + intel_uc_fw_type_repr(fw->type), fw->path, + fw->major_ver_found, fw->minor_ver_found); +} + static int __uc_init_hw(struct intel_uc *uc) { struct drm_i915_private *i915 = uc_to_gt(uc)->i915; @@ -442,6 +451,11 @@ static int __uc_init_hw(struct intel_uc *uc) GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); + print_fw_ver(uc, &guc->fw); + + if (intel_uc_uses_huc(uc)) + print_fw_ver(uc, &huc->fw); + if (!intel_uc_fw_is_loadable(&guc->fw)) { ret = __uc_check_hw(uc) || intel_uc_fw_is_overridden(&guc->fw) || @@ -507,24 +521,11 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(&uc_to_gt(uc)->rps); } - drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", - intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, - guc->fw.major_ver_found, guc->fw.minor_ver_found, - "submission", + drm_info(&i915->drm, "GuC submission %s\n", enableddisabled(intel_uc_uses_guc_submission(uc))); - - drm_info(&i915->drm, "GuC SLPC: %s\n", + drm_info(&i915->drm, "GuC SLPC %s\n", enableddisabled(intel_uc_uses_guc_slpc(uc))); - if (intel_uc_uses_huc(uc)) { - drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", - intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), - huc->fw.path, - huc->fw.major_ver_found, huc->fw.minor_ver_found, - "authenticated", - yesno(intel_huc_is_authenticated(huc))); - } - return 0; /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 549d5919dc70..c88113044494 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -52,21 +52,21 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0)) \ - fw_def(DG1, 0, guc_def(dg1, 62, 0, 0)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0)) \ - fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0)) \ - fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0)) + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) \ + fw_def(DG1, 0, guc_def(dg1, 69, 0, 3)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 69, 0, 3)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 69, 0, 3)) \ + fw_def(JASPERLAKE, 0, guc_def(ehl, 69, 0, 3)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 69, 0, 3)) \ + fw_def(ICELAKE, 0, guc_def(icl, 69, 0, 3)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 69, 0, 3)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 69, 0, 3)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 69, 0, 3)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 69, 0, 3)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 69, 0, 3)) \ + fw_def(BROXTON, 0, guc_def(bxt, 69, 0, 3)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 69, 0, 3)) #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ @@ -451,20 +451,19 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; - struct i915_vma *dummy = &uc_fw->dummy; + struct i915_vma_resource *dummy = &uc_fw->dummy; u32 pte_flags = 0; - dummy->node.start = uc_fw_ggtt_offset(uc_fw); - dummy->node.size = obj->base.size; - dummy->pages = obj->mm.pages; - dummy->vm = &ggtt->vm; + dummy->start = uc_fw_ggtt_offset(uc_fw); + dummy->node_size = obj->base.size; + dummy->bi.pages = obj->mm.pages; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size); + GEM_BUG_ON(dummy->node_size > ggtt->uc_fw.size); /* uc_fw->obj cache domains were not controlled across suspend */ if (i915_gem_object_has_struct_page(obj)) - drm_clflush_sg(dummy->pages); + drm_clflush_sg(dummy->bi.pages); if (i915_gem_object_is_lmem(obj)) pte_flags |= PTE_LM; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index d9d1dc0b4cbb..3229018877d3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -85,7 +85,7 @@ struct intel_uc_fw { * threaded as it done during driver load (inherently single threaded) * or during a GT reset (mutex guarantees single threaded). */ - struct i915_vma dummy; + struct i915_vma_resource dummy; struct i915_vma *rsa_data; /* diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index d3327b802b76..a115894d5896 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -157,7 +157,7 @@ static int intel_guc_steal_guc_ids(void *arg) wakeref = intel_runtime_pm_get(gt->uncore->rpm); engine = intel_selftest_find_any_engine(gt); sv = guc->submission_state.num_guc_ids; - guc->submission_state.num_guc_ids = 4096; + guc->submission_state.num_guc_ids = 512; /* Create spinner to block requests in below loop */ ce[context_index] = intel_context_create(engine); |