diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
96 files changed, 1609 insertions, 1316 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c index 8fe0499308ff..8116fd5987e2 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -169,7 +169,7 @@ static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs, return cs; } -u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs) +u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs) { return __gen2_emit_breadcrumb(rq, cs, 16, 8); } @@ -179,7 +179,7 @@ u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) return __gen2_emit_breadcrumb(rq, cs, 8, 8); } -/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ +/* Just userspace ABI convention to limit the wa batch bo to a reasonable size */ #define I830_BATCH_LIMIT SZ_256K #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT) @@ -248,7 +248,7 @@ int i830_emit_bb_start(struct i915_request *rq, return 0; } -int gen3_emit_bb_start(struct i915_request *rq, +int gen2_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { @@ -292,29 +292,12 @@ int gen4_emit_bb_start(struct i915_request *rq, void gen2_irq_enable(struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; - - i915->irq_mask &= ~engine->irq_enable_mask; - intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); - ENGINE_POSTING_READ16(engine, RING_IMR); -} - -void gen2_irq_disable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - - i915->irq_mask |= engine->irq_enable_mask; - intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); -} - -void gen3_irq_enable(struct intel_engine_cs *engine) -{ engine->i915->irq_mask &= ~engine->irq_enable_mask; intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); } -void gen3_irq_disable(struct intel_engine_cs *engine) +void gen2_irq_disable(struct intel_engine_cs *engine) { engine->i915->irq_mask |= engine->irq_enable_mask; intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h index a5cd64a65c9e..7b37560fc356 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h @@ -15,13 +15,13 @@ int gen2_emit_flush(struct i915_request *rq, u32 mode); int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode); int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode); -u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs); +u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs); u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs); int i830_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags); -int gen3_emit_bb_start(struct i915_request *rq, +int gen2_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags); int gen4_emit_bb_start(struct i915_request *rq, @@ -30,8 +30,6 @@ int gen4_emit_bb_start(struct i915_request *rq, void gen2_irq_enable(struct intel_engine_cs *engine); void gen2_irq_disable(struct intel_engine_cs *engine); -void gen3_irq_enable(struct intel_engine_cs *engine); -void gen3_irq_disable(struct intel_engine_cs *engine); void gen5_irq_enable(struct intel_engine_cs *engine); void gen5_irq_disable(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index d38b914d1206..6e89112f68ae 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -399,7 +399,8 @@ static void emit_batch(struct i915_vma * const vma, batch_add(&cmds, MI_LOAD_REGISTER_IMM(2)); batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7)); batch_add(&cmds, 0xffff0000 | - ((IS_IVB_GT1(i915) || IS_VALLEYVIEW(i915)) ? + (((IS_IVYBRIDGE(i915) && INTEL_INFO(i915)->gt == 1) || + IS_VALLEYVIEW(i915)) ? HIZ_RAW_STALL_OPT_DISABLE : 0)); batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1)); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index e1bf13e3d307..e9f65f27b53f 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -189,9 +189,6 @@ static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine) { i915_reg_t reg = gen12_get_aux_inv_reg(engine); - if (IS_PONTEVECCHIO(engine->i915)) - return false; - /* * So far platforms supported by i915 having flat ccs do not require * AUX invalidation. Check also whether the engine requires it. @@ -743,21 +740,25 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) } /* Wa_14014475959:dg2 */ -#define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540 -static u32 ccs_semaphore_offset(struct i915_request *rq) +/* Wa_16019325821 */ +/* Wa_14019159160 */ +#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540 +static u32 hold_switchout_semaphore_offset(struct i915_request *rq) { return i915_ggtt_offset(rq->context->state) + - (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET; + (LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET; } /* Wa_14014475959:dg2 */ -static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) +/* Wa_16019325821 */ +/* Wa_14019159160 */ +static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs) { int i; *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL | MI_ATOMIC_MOVE; - *cs++ = ccs_semaphore_offset(rq); + *cs++ = hold_switchout_semaphore_offset(rq); *cs++ = 0; *cs++ = 1; @@ -773,7 +774,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_EQ_SDD; *cs++ = 0; - *cs++ = ccs_semaphore_offset(rq); + *cs++ = hold_switchout_semaphore_offset(rq); *cs++ = 0; return cs; @@ -790,8 +791,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) cs = gen12_emit_preempt_busywait(rq, cs); /* Wa_14014475959:dg2 */ - if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine)) - cs = ccs_emit_wa_busywait(rq, cs); + /* Wa_16019325821 */ + /* Wa_14019159160 */ + if (intel_engine_uses_wa_hold_switchout(rq->engine)) + cs = hold_switchout_emit_wa_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -827,7 +830,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); - if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) /* Wa_1409600907 */ flags |= PIPE_CONTROL_DEPTH_STALL; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index fa46d2308b0e..398d60a66410 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -500,11 +500,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, } static void -xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - struct sgt_dma *iter, - unsigned int pat_index, - u32 flags) +xehp_ppgtt_insert_huge(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + struct sgt_dma *iter, + unsigned int pat_index, + u32 flags) { const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); unsigned int rem = sg_dma_len(iter->sg); @@ -741,8 +741,8 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma_res); if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { - if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50)) - xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); + if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 55)) + xehp_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); else gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); } else { @@ -781,11 +781,11 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } -static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - unsigned int pat_index, - u32 flags) +static void xehp_ppgtt_insert_entry_lm(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + unsigned int pat_index, + u32 flags) { u64 idx = offset >> GEN8_PTE_SHIFT; struct i915_page_directory * const pdp = @@ -810,15 +810,15 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags); } -static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - unsigned int pat_index, - u32 flags) +static void xehp_ppgtt_insert_entry(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + unsigned int pat_index, + u32 flags) { if (flags & PTE_LM) - return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, - pat_index, flags); + return xehp_ppgtt_insert_entry_lm(vm, addr, offset, + pat_index, flags); return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags); } @@ -961,6 +961,9 @@ static int gen8_init_rsvd(struct i915_address_space *vm) struct i915_vma *vma; int ret; + if (!intel_gt_needs_wa_16018031267(vm->gt)) + return 0; + /* The memory will be used only by GPU. */ obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, I915_BO_ALLOC_VOLATILE | @@ -1042,7 +1045,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; if (HAS_64K_PAGES(gt->i915)) - ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry; + ppgtt->vm.insert_page = xehp_ppgtt_insert_entry; else ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index d650beb8ed22..cc866773ba6f 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -70,7 +70,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) if (!--b->irq_enabled) b->irq_disable(b); - WRITE_ONCE(b->irq_armed, 0); + WRITE_ONCE(b->irq_armed, NULL); intel_gt_pm_put_async(b->irq_engine->gt, wakeref); } @@ -263,8 +263,13 @@ static void signal_irq_work(struct irq_work *work) i915_request_put(rq); } + /* Lazy irq enabling after HW submission */ if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) intel_breadcrumbs_arm_irq(b); + + /* And confirm that we still want irqs enabled before we yield */ + if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) + intel_breadcrumbs_disarm_irq(b); } struct intel_breadcrumbs * @@ -315,13 +320,7 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) return; /* Kick the work once more to drain the signalers, and disarm the irq */ - irq_work_sync(&b->irq_work); - while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { - local_irq_disable(); - signal_irq_work(&b->irq_work); - local_irq_enable(); - cond_resched(); - } + irq_work_queue(&b->irq_work); } void intel_breadcrumbs_free(struct kref *kref) @@ -404,7 +403,7 @@ static void insert_breadcrumb(struct i915_request *rq) * the request as it may have completed and raised the interrupt as * we were attaching it into the lists. */ - if (!b->irq_armed || __i915_request_is_complete(rq)) + if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq)) irq_work_queue(&b->irq_work); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index a2f1245741bb..b1b8695ba7c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -27,6 +27,8 @@ static void rcu_context_free(struct rcu_head *rcu) struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); trace_intel_context_free(ce); + if (intel_context_has_own_state(ce)) + fput(ce->default_state); kmem_cache_free(slab_ce, ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 25564c01507e..9f523999acd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -375,6 +375,28 @@ intel_context_clear_nopreempt(struct intel_context *ce) clear_bit(CONTEXT_NOPREEMPT, &ce->flags); } +#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) +static inline bool intel_context_has_own_state(const struct intel_context *ce) +{ + return test_bit(CONTEXT_OWN_STATE, &ce->flags); +} + +static inline bool intel_context_set_own_state(struct intel_context *ce) +{ + return test_and_set_bit(CONTEXT_OWN_STATE, &ce->flags); +} +#else +static inline bool intel_context_has_own_state(const struct intel_context *ce) +{ + return false; +} + +static inline bool intel_context_set_own_state(struct intel_context *ce) +{ + return true; +} +#endif + u64 intel_context_get_total_runtime_ns(struct intel_context *ce); u64 intel_context_get_avg_runtime_ns(struct intel_context *ce); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 7eccbd70d89f..98c7f6052069 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -99,6 +99,8 @@ struct intel_context { struct i915_address_space *vm; struct i915_gem_context __rcu *gem_context; + struct file *default_state; + /* * @signal_lock protects the list of requests that need signaling, * @signals. While there are any requests that need signaling, @@ -130,6 +132,8 @@ struct intel_context { #define CONTEXT_PERMA_PIN 11 #define CONTEXT_IS_PARKING 12 #define CONTEXT_EXITING 13 +#define CONTEXT_LOW_LATENCY 14 +#define CONTEXT_OWN_STATE 15 struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 40269e4c1e31..325da0414d94 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -126,9 +126,6 @@ execlists_active(const struct intel_engine_execlists *execlists) return active; } -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); - static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1ade568ffbfa..b721bbd23356 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -308,7 +308,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) /* * There is a discrepancy here between the size reported * by the register and the size of the context layout - * in the docs. Both are described as authorative! + * in the docs. Both are described as authoritative! * * The discrepancy is on the order of a few cachelines, * but the total is under one page (4k), which is our @@ -497,9 +497,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->logical_mask = BIT(logical_instance); __sprint_engine_name(engine); - if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) && - __ffs(CCS_MASK(engine->gt)) == engine->instance) || - engine->class == RENDER_CLASS) + if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) && + __ffs(CCS_MASK(engine->gt) | RCS_MASK(engine->gt)) == engine->instance) engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE; /* features common between engines sharing EUs */ @@ -589,7 +588,7 @@ u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value) * NB: The GuC API only supports 32bit values. However, the limit is further * reduced due to internal calculations which would otherwise overflow. */ - if (intel_guc_submission_is_wanted(&engine->gt->uc.guc)) + if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt))) value = min_t(u64, value, guc_policy_max_preempt_timeout_ms()); value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); @@ -610,7 +609,7 @@ u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value) * NB: The GuC API only supports 32bit values. However, the limit is further * reduced due to internal calculations which would otherwise overflow. */ - if (intel_guc_submission_is_wanted(&engine->gt->uc.guc)) + if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt))) value = min_t(u64, value, guc_policy_max_exec_quantum_ms()); value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); @@ -678,8 +677,8 @@ void intel_engines_release(struct intel_gt *gt) * in case we aborted before completely initialising the engines. */ GEM_BUG_ON(intel_gt_pm_is_awake(gt)); - if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) - __intel_gt_reset(gt, ALL_ENGINES); + if (!intel_gt_gpu_reset_clobbers_display(gt)) + intel_gt_reset_all_engines(gt); /* Decouple the backend; but keep the layout for late GPU resets */ for_each_engine(engine, gt, id) { @@ -694,6 +693,8 @@ void intel_engines_release(struct intel_gt *gt) memset(&engine->reset, 0, sizeof(engine->reset)); } + + llist_del_all(>->i915->uabi_engines_llist); } void intel_engine_free_request_pool(struct intel_engine_cs *engine) @@ -765,14 +766,13 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt) * and bits have disable semantices. */ media_fuse = intel_uncore_read(gt->uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); - if (MEDIA_VER_FULL(i915) < IP_VER(12, 50)) + if (MEDIA_VER_FULL(i915) < IP_VER(12, 55)) media_fuse = ~media_fuse; - vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; - vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> - GEN11_GT_VEBOX_DISABLE_SHIFT; + vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse); + vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse); - if (MEDIA_VER_FULL(i915) >= IP_VER(12, 50)) { + if (MEDIA_VER_FULL(i915) >= IP_VER(12, 55)) { fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1); } else { @@ -839,44 +839,12 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) } } -static void engine_mask_apply_copy_fuses(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - struct intel_gt_info *info = >->info; - unsigned long meml3_mask; - unsigned long quad; - - if (!(GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60) && - GRAPHICS_VER_FULL(i915) < IP_VER(12, 70))) - return; - - meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3); - meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask); - - /* - * Link Copy engines may be fused off according to meml3_mask. Each - * bit is a quad that houses 2 Link Copy and two Sub Copy engines. - */ - for_each_clear_bit(quad, &meml3_mask, GEN12_MAX_MSLICES) { - unsigned int instance = quad * 2 + 1; - intel_engine_mask_t mask = GENMASK(_BCS(instance + 1), - _BCS(instance)); - - if (mask & info->engine_mask) { - gt_dbg(gt, "bcs%u fused off\n", instance); - gt_dbg(gt, "bcs%u fused off\n", instance + 1); - - info->engine_mask &= ~mask; - } - } -} - /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access * the blitter forcewake domain to read the engine fuses, but at the same time * we need to know which engines are available on the system to know which - * forcewake domains are present. We solve this by intializing the forcewake + * forcewake domains are present. We solve this by initializing the forcewake * domains based on the full engine mask in the platform capabilities before * calling this function and pruning the domains for fused-off engines * afterwards. @@ -889,7 +857,6 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) engine_mask_apply_media_fuses(gt); engine_mask_apply_compute_fuses(gt); - engine_mask_apply_copy_fuses(gt); /* * The only use of the GSC CS is to load and communicate with the GSC @@ -908,6 +875,29 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) info->engine_mask &= ~BIT(GSC0); } + /* + * Do not create the command streamer for CCS slices beyond the first. + * All the workload submitted to the first engine will be shared among + * all the slices. + * + * Once the user will be allowed to customize the CCS mode, then this + * check needs to be removed. + */ + if (IS_DG2(gt->i915)) { + u8 first_ccs = __ffs(CCS_MASK(gt)); + + /* + * Store the number of active cslices before + * changing the CCS engine configuration + */ + gt->ccs.cslices = CCS_MASK(gt); + + /* Mask off all the CCS engine */ + info->engine_mask &= ~GENMASK(CCS3, CCS0); + /* Put back in the first CCS engine */ + info->engine_mask |= BIT(_CCS(first_ccs)); + } + return info->engine_mask; } @@ -1193,7 +1183,6 @@ static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine) if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 74) || GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) || GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) || - GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) || GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) { regs = xehp_regs; num = ARRAY_SIZE(xehp_regs); @@ -1421,7 +1410,7 @@ create_ggtt_bind_context(struct intel_engine_cs *engine) /* * MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple - * bind requets at a time so get a bigger ring. + * bind requests at a time so get a bigger ring. */ return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_512K, I915_GEM_HWS_GGTT_BIND_ADDR, @@ -1543,7 +1532,7 @@ int intel_engines_init(struct intel_gt *gt) /** * intel_engine_cleanup_common - cleans up the engine state created by - * the common initiailizers. + * the common initializers. * @engine: Engine to cleanup. * * This cleans up everything created by the common helpers. diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 96bdb93a948d..fb7bff27b45a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -279,9 +279,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index a8eac59e3779..1c4784cb296c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -15,6 +15,7 @@ #define HEAD_WRAP_COUNT 0xFFE00000 #define HEAD_WRAP_ONE 0x00200000 #define HEAD_ADDR 0x001FFFFC +#define HEAD_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */ #define RING_START(base) _MMIO((base) + 0x38) #define RING_CTL(base) _MMIO((base) + 0x3c) #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ @@ -26,7 +27,6 @@ #define RING_VALID_MASK 0x00000001 #define RING_VALID 0x00000001 #define RING_INVALID 0x00000000 -#define RING_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */ #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ #define RING_SYNC_0(base) _MMIO((base) + 0x40) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 960e6be2042f..155b6255a63e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -237,7 +237,7 @@ struct intel_engine_execlists { */ struct i915_request * const *active; /** - * @inflight: the set of contexts submitted and acknowleged by HW + * @inflight: the set of contexts submitted and acknowledged by HW * * The set of inflight contexts is managed by reading CS events * from the HW. On a context-switch event (not preemption), we @@ -260,7 +260,7 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @virtual: Queue of requets on a virtual engine, sorted by priority. + * @virtual: Queue of requests on a virtual engine, sorted by priority. * Each RB entry is a struct i915_priolist containing a list of requests * of the same priority. */ @@ -343,6 +343,11 @@ struct intel_engine_guc_stats { * @start_gt_clk: GT clock time of last idle to active transition. */ u64 start_gt_clk; + + /** + * @total: The last value of total returned + */ + u64 total; }; union intel_engine_tlb_inv_reg { @@ -586,7 +591,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_RCS_REG_STATE BIT(9) #define I915_ENGINE_HAS_EU_PRIORITY BIT(10) #define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11) -#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12) +#define I915_ENGINE_USES_WA_HOLD_SWITCHOUT BIT(12) unsigned int flags; /* @@ -696,10 +701,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) } /* Wa_14014475959:dg2 */ +/* Wa_16019325821 */ +/* Wa_14019159160 */ static inline bool -intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine) +intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine) { - return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + return engine->flags & I915_ENGINE_USES_WA_HOLD_SWITCHOUT; } #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 42aade0faf2d..03baa7fa0a27 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -405,15 +405,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) return active; } -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) -{ - struct intel_engine_cs *engine = - container_of(execlists, typeof(*engine), execlists); - - return __unwind_incomplete_requests(engine); -} - static void execlists_context_status_change(struct i915_request *rq, unsigned long status) { @@ -493,7 +484,7 @@ __execlists_schedule_in(struct i915_request *rq) /* Use a fixed tag for OA and friends */ GEM_BUG_ON(ce->tag <= BITS_PER_LONG); ce->lrc.ccid = ce->tag; - } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) { /* We don't need a strict matching tag, just different values */ unsigned int tag = ffs(READ_ONCE(engine->context_tag)); @@ -613,7 +604,7 @@ static void __execlists_schedule_out(struct i915_request * const rq, intel_engine_add_retire(engine, ce->timeline); ccid = ce->lrc.ccid; - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) { ccid >>= XEHP_SW_CTX_ID_SHIFT - 32; ccid &= XEHP_MAX_CONTEXT_HW_ID; } else { @@ -1907,7 +1898,7 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", head, upper_32_bits(csb), lower_32_bits(csb)); - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) promote = xehp_csb_parse(csb); else if (GRAPHICS_VER(engine->i915) >= 12) promote = gen12_csb_parse(csb); @@ -2511,7 +2502,7 @@ static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); ENGINE_TRACE(engine, "semaphore yield: %08x\n", engine->execlists.yield); - if (del_timer(&engine->execlists.timer)) + if (timer_delete(&engine->execlists.timer)) tasklet = true; } @@ -2898,7 +2889,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine) drm_err(&engine->i915->drm, "engine '%s' resumed still in error: %08x\n", engine->name, status); - __intel_gt_reset(engine->gt, engine->mask); + intel_gt_reset_engine(engine); } /* @@ -3272,6 +3263,9 @@ static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); cancel_timer(&engine->execlists.preempt); + + /* Reset upon idling, or we may delay the busy wakeup. */ + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void add_to_engine(struct i915_request *rq) @@ -3312,11 +3306,7 @@ static void remove_from_engine(struct i915_request *rq) static bool can_preempt(struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) > 8) - return true; - - /* GPGPU on bdw requires extra w/a; not implemented */ - return engine->class != RENDER_CLASS; + return GRAPHICS_VER(engine->i915) > 8; } static void kick_execlists(const struct i915_request *rq, int prio) @@ -3380,8 +3370,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) static void execlists_shutdown(struct intel_engine_cs *engine) { /* Synchronise with residual timers and any softirq they raise */ - del_timer_sync(&engine->execlists.timer); - del_timer_sync(&engine->execlists.preempt); + timer_delete_sync(&engine->execlists.timer); + timer_delete_sync(&engine->execlists.preempt); tasklet_kill(&engine->sched_engine->tasklet); } @@ -3479,7 +3469,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) } } - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) { if (intel_engine_has_preemption(engine)) engine->emit_bb_start = xehp_emit_bb_start; else @@ -3582,7 +3572,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); if (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) { + GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 55)) { execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index ec1cbe229f0e..46a5aa4ab9c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -9,10 +9,9 @@ #include <linux/stop_machine.h> #include <drm/drm_managed.h> -#include <drm/i915_drm.h> -#include <drm/intel-gtt.h> +#include <drm/intel/i915_drm.h> +#include <drm/intel/intel-gtt.h> -#include "display/intel_display.h" #include "gem/i915_gem_lmem.h" #include "intel_context.h" @@ -108,11 +107,12 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915) /** * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM * @vm: The VM to suspend the mappings for + * @evict_all: Evict all VMAs * * Suspend the memory mappings for all objects mapped to HW via the GGTT or a * DPT page table. */ -void i915_ggtt_suspend_vm(struct i915_address_space *vm) +void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all) { struct i915_vma *vma, *vn; int save_skip_rewrite; @@ -158,7 +158,7 @@ retry: goto retry; } - if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { + if (evict_all || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { i915_vma_wait_for_bind(vma); __i915_vma_evict(vma, false); @@ -173,13 +173,15 @@ retry: vm->skip_pte_rewrite = save_skip_rewrite; mutex_unlock(&vm->mutex); + + drm_WARN_ON(&vm->i915->drm, evict_all && !list_empty(&vm->bound_list)); } void i915_ggtt_suspend(struct i915_ggtt *ggtt) { struct intel_gt *gt; - i915_ggtt_suspend_vm(&ggtt->vm); + i915_ggtt_suspend_vm(&ggtt->vm, false); ggtt->invalidate(ggtt); list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) @@ -231,11 +233,8 @@ static void guc_ggtt_ct_invalidate(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; intel_wakeref_t wakeref; - with_intel_runtime_pm_if_active(uncore->rpm, wakeref) { - struct intel_guc *guc = >->uc.guc; - - intel_guc_invalidate_tlb_guc(guc); - } + with_intel_runtime_pm_if_active(uncore->rpm, wakeref) + intel_guc_invalidate_tlb_guc(gt_to_guc(gt)); } static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) @@ -246,7 +245,7 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) gen8_ggtt_invalidate(ggtt); list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { - if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) + if (intel_guc_tlb_invalidation_is_available(gt_to_guc(gt))) guc_ggtt_ct_invalidate(gt); else if (GRAPHICS_VER(i915) >= 12) intel_uncore_write_fw(gt->uncore, @@ -290,6 +289,14 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr, return pte; } +static dma_addr_t gen8_ggtt_pte_decode(u64 pte, bool *is_present, bool *is_local) +{ + *is_present = pte & GEN8_PAGE_PRESENT; + *is_local = pte & GEN12_GGTT_PTE_LM; + + return pte & GEN12_GGTT_PTE_ADDR_MASK; +} + static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) { struct intel_gt *gt = ggtt->vm.gt; @@ -436,6 +443,11 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) writeq(pte, addr); } +static gen8_pte_t gen8_get_pte(void __iomem *addr) +{ + return readq(addr); +} + static void gen8_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, @@ -451,6 +463,16 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, ggtt->invalidate(ggtt); } +static dma_addr_t gen8_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + return ggtt->vm.pte_decode(gen8_get_pte(pte), is_present, is_local); +} + static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm, dma_addr_t addr, u64 offset, unsigned int pat_index, u32 flags) @@ -606,6 +628,17 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, ggtt->invalidate(ggtt); } +static dma_addr_t gen6_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, + bool *is_present, bool *is_local) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + return vm->pte_decode(ioread32(pte), is_present, is_local); +} + /* * Binds an object into the global gtt with the specified cache level. * The object will be accessible to the GPU via commands whose operands @@ -770,6 +803,14 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + return ggtt->vm.read_entry(vm, offset, is_present, is_local); +} + /* * Reserve the top of the GuC address space for firmware images. Addresses * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, @@ -1246,6 +1287,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.scratch_range = gen8_ggtt_clear_range; ggtt->vm.insert_entries = gen8_ggtt_insert_entries; + ggtt->vm.read_entry = gen8_ggtt_read_entry; /* * Serialize GTT updates with aperture access on BXT if VT-d is on, @@ -1292,6 +1334,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) else ggtt->vm.pte_encode = gen8_ggtt_pte_encode; + ggtt->vm.pte_decode = gen8_ggtt_pte_decode; + return ggtt_probe_common(ggtt, size); } @@ -1391,6 +1435,14 @@ static u64 iris_pte_encode(dma_addr_t addr, return pte; } +static dma_addr_t gen6_pte_decode(u64 pte, bool *is_present, bool *is_local) +{ + *is_present = pte & GEN6_PTE_VALID; + *is_local = false; + + return ((pte & 0xff0) << 28) | (pte & ~0xfff); +} + static int gen6_gmch_probe(struct i915_ggtt *ggtt) { struct drm_i915_private *i915 = ggtt->vm.i915; @@ -1429,6 +1481,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.scratch_range = gen6_ggtt_clear_range; ggtt->vm.insert_page = gen6_ggtt_insert_page; ggtt->vm.insert_entries = gen6_ggtt_insert_entries; + ggtt->vm.read_entry = gen6_ggtt_read_entry; ggtt->vm.cleanup = gen6_gmch_remove; ggtt->invalidate = gen6_ggtt_invalidate; @@ -1444,6 +1497,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) else ggtt->vm.pte_encode = snb_pte_encode; + ggtt->vm.pte_decode = gen6_pte_decode; + ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; @@ -1549,6 +1604,7 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915) /** * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM * @vm: The VM to restore the mappings for + * @all_evicted: Were all VMAs expected to be evicted on suspend? * * Restore the memory mappings for all objects mapped to HW via the GGTT or a * DPT page table. @@ -1556,13 +1612,18 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915) * Returns %true if restoring the mapping for any object that was in a write * domain before suspend. */ -bool i915_ggtt_resume_vm(struct i915_address_space *vm) +bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted) { struct i915_vma *vma; bool write_domain_objs = false; drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); + if (all_evicted) { + drm_WARN_ON(&vm->i915->drm, !list_empty(&vm->bound_list)); + return false; + } + /* First fill our portion of the GTT with scratch pages */ vm->clear_range(vm, 0, vm->total); @@ -1602,7 +1663,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) intel_gt_check_and_clear_faults(gt); - flush = i915_ggtt_resume_vm(&ggtt->vm); + flush = i915_ggtt_resume_vm(&ggtt->vm, false); if (drm_mm_node_allocated(&ggtt->error_capture)) ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 40371b8a9bbb..0c723e7c71a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -298,6 +298,7 @@ void i915_vma_revoke_fence(struct i915_vma *vma) return; GEM_BUG_ON(fence->vma != vma); + i915_active_wait(&fence->active); GEM_BUG_ON(!i915_active_is_idle(&fence->active)); GEM_BUG_ON(atomic_read(&fence->pin_count)); @@ -327,6 +328,7 @@ static bool fence_is_active(const struct i915_fence_reg *fence) static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) { + struct intel_display *display = &ggtt->vm.i915->display; struct i915_fence_reg *active = NULL; struct i915_fence_reg *fence, *fn; @@ -352,7 +354,7 @@ static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) } /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(ggtt->vm.i915)) + if (intel_has_pending_fb_unpin(display)) return ERR_PTR(-EAGAIN); return ERR_PTR(-ENOBUFS); @@ -417,7 +419,6 @@ out_unpin: * For an untiled surface, this removes any existing fence. * * Returns: - * * 0 on success, negative error code on failure. */ int i915_vma_pin_fence(struct i915_vma *vma) @@ -749,7 +750,7 @@ static void swizzle_page(struct page *page) char *vaddr; int i; - vaddr = kmap(page); + vaddr = kmap_local_page(page); for (i = 0; i < PAGE_SIZE; i += 128) { memcpy(temp, &vaddr[i], 64); @@ -757,7 +758,7 @@ static void swizzle_page(struct page *page) memcpy(&vaddr[i + 64], temp, 64); } - kunmap(page); + kunmap_local(vaddr); } /** diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c index 866c416afb73..c5f5f0bdfb2c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c @@ -5,7 +5,7 @@ #include "intel_ggtt_gmch.h" -#include <drm/intel-gtt.h> +#include <drm/intel/intel-gtt.h> #include <linux/agp_backend.h> @@ -27,6 +27,13 @@ static void gmch_ggtt_insert_page(struct i915_address_space *vm, intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); } +static dma_addr_t gmch_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local) +{ + return intel_gmch_gtt_read_entry(offset >> PAGE_SHIFT, + is_present, is_local); +} + static void gmch_ggtt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, unsigned int pat_index, @@ -103,6 +110,7 @@ int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.insert_entries = gmch_ggtt_insert_entries; ggtt->vm.clear_range = gmch_ggtt_clear_range; ggtt->vm.scratch_range = gmch_ggtt_clear_range; + ggtt->vm.read_entry = gmch_ggtt_read_entry; ggtt->vm.cleanup = gmch_ggtt_remove; ggtt->invalidate = gmch_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 2bd8d98d2110..5394bc7d4daf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -220,6 +220,7 @@ #define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) +#define CMD_3DSTATE_MESH_CONTROL ((0x3 << 29) | (0x3 << 27) | (0x0 << 24) | (0x77 << 16) | (0x3)) #define XY_CTRL_SURF_INSTR_SIZE 5 #define MI_FLUSH_DW_SIZE 3 diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 6d440de8ba01..c43febc862dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -103,19 +103,6 @@ static const struct gsc_def gsc_def_dg1[] = { } }; -static const struct gsc_def gsc_def_xehpsdv[] = { - { - /* HECI1 not enabled on the device. */ - }, - { - .name = "mei-gscfi", - .bar = DG1_GSC_HECI2_BASE, - .bar_size = GSC_BAR_LENGTH, - .use_polling = true, - .slow_firmware = true, - } -}; - static const struct gsc_def gsc_def_dg2[] = { { .name = "mei-gsc", @@ -188,8 +175,6 @@ static void gsc_init_one(struct drm_i915_private *i915, struct intel_gsc *gsc, if (IS_DG1(i915)) { def = &gsc_def_dg1[intf_id]; - } else if (IS_XEHPSDV(i915)) { - def = &gsc_def_xehpsdv[intf_id]; } else if (IS_DG2(i915)) { def = &gsc_def_dg2[intf_id]; } else { @@ -299,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) if (gt->gsc.intf[intf_id].irq < 0) return; - ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); + ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq); if (ret) gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a425db5ed3a2..3d3b1ba76e2b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -4,7 +4,7 @@ */ #include <drm/drm_managed.h> -#include <drm/intel-gtt.h> +#include <drm/intel/intel-gtt.h> #include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" @@ -185,7 +185,7 @@ int intel_gt_init_hw(struct intel_gt *gt) if (IS_HASWELL(i915)) intel_uncore_write(uncore, HSW_MI_PREDICATE_RESULT_2, - IS_HASWELL_GT3(i915) ? + INTEL_INFO(i915)->gt == 3 ? LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); /* Apply the GT workarounds... */ @@ -278,7 +278,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, intel_uncore_posting_read(uncore, XELPMP_RING_FAULT_REG); - } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG, RING_FAULT_VALID, 0); intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); @@ -302,25 +302,48 @@ static void gen6_check_faults(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 fault; for_each_engine(engine, gt, id) { + u32 fault; + fault = GEN6_RING_FAULT_REG_READ(engine); + if (fault & RING_FAULT_VALID) { gt_dbg(gt, "Unexpected fault\n" - "\tAddr: 0x%08lx\n" + "\tAddr: 0x%08x\n" "\tAddress space: %s\n" "\tSource ID: %d\n" "\tType: %d\n", - fault & PAGE_MASK, + fault & RING_FAULT_VADDR_MASK, fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); + REG_FIELD_GET(RING_FAULT_SRCID_MASK, fault), + REG_FIELD_GET(RING_FAULT_FAULT_TYPE_MASK, fault)); } } } +static void gen8_report_fault(struct intel_gt *gt, u32 fault, + u32 fault_data0, u32 fault_data1) +{ + u64 fault_addr; + + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | + ((u64)fault_data0 << 12); + + gt_dbg(gt, "Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + REG_FIELD_GET(RING_FAULT_ENGINE_ID_MASK, fault), + REG_FIELD_GET(RING_FAULT_SRCID_MASK, fault), + REG_FIELD_GET(RING_FAULT_FAULT_TYPE_MASK, fault)); +} + static void xehp_check_faults(struct intel_gt *gt) { u32 fault; @@ -333,28 +356,10 @@ static void xehp_check_faults(struct intel_gt *gt) * toward the primary instance. */ fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); - if (fault & RING_FAULT_VALID) { - u32 fault_data0, fault_data1; - u64 fault_addr; - - fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0); - fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1); - - fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | - ((u64)fault_data0 << 12); - - gt_dbg(gt, "Unexpected fault\n" - "\tAddr: 0x%08x_%08x\n" - "\tAddress space: %s\n" - "\tEngine ID: %d\n" - "\tSource ID: %d\n" - "\tType: %d\n", - upper_32_bits(fault_addr), lower_32_bits(fault_addr), - fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", - GEN8_RING_FAULT_ENGINE_ID(fault), - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); - } + if (fault & RING_FAULT_VALID) + gen8_report_fault(gt, fault, + intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0), + intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1)); } static void gen8_check_faults(struct intel_gt *gt) @@ -374,28 +379,10 @@ static void gen8_check_faults(struct intel_gt *gt) } fault = intel_uncore_read(uncore, fault_reg); - if (fault & RING_FAULT_VALID) { - u32 fault_data0, fault_data1; - u64 fault_addr; - - fault_data0 = intel_uncore_read(uncore, fault_data0_reg); - fault_data1 = intel_uncore_read(uncore, fault_data1_reg); - - fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | - ((u64)fault_data0 << 12); - - gt_dbg(gt, "Unexpected fault\n" - "\tAddr: 0x%08x_%08x\n" - "\tAddress space: %s\n" - "\tEngine ID: %d\n" - "\tSource ID: %d\n" - "\tType: %d\n", - upper_32_bits(fault_addr), lower_32_bits(fault_addr), - fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", - GEN8_RING_FAULT_ENGINE_ID(fault), - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); - } + if (fault & RING_FAULT_VALID) + gen8_report_fault(gt, fault, + intel_uncore_read(uncore, fault_data0_reg), + intel_uncore_read(uncore, fault_data1_reg)); } void intel_gt_check_and_clear_faults(struct intel_gt *gt) @@ -403,7 +390,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; /* From GEN8 onwards we only have one 'All Engine Fault Register' */ - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) xehp_check_faults(gt); else if (GRAPHICS_VER(i915) >= 8) gen8_check_faults(gt); @@ -832,7 +819,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt) /* Scrub all HW state upon release */ with_intel_runtime_pm(gt->uncore->rpm, wakeref) - __intel_gt_reset(gt, ALL_ENGINES); + intel_gt_reset_all_engines(gt); } void intel_gt_driver_release(struct intel_gt *gt) @@ -1024,6 +1011,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, return I915_MAP_WC; } +bool intel_gt_needs_wa_16018031267(struct intel_gt *gt) +{ + /* Wa_16018031267, Wa_16018063123 */ + return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71)); +} + bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) { return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 608f5c872928..998ca029b73a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -82,17 +82,18 @@ struct drm_printer; ##__VA_ARGS__); \ } while (0) -#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ - IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \ - engine->class == COPY_ENGINE_CLASS && engine->instance == 0) - static inline bool gt_is_root(struct intel_gt *gt) { return !gt->info.id; } +bool intel_gt_needs_wa_16018031267(struct intel_gt *gt); bool intel_gt_needs_wa_22016122933(struct intel_gt *gt); +#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ + intel_gt_needs_wa_16018031267(engine->gt) && \ + engine->class == COPY_ENGINE_CLASS && engine->instance == 0) + static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { return container_of(uc, struct intel_gt, uc); @@ -123,6 +124,11 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) return guc_to_gt(guc)->i915; } +static inline struct intel_guc *gt_to_guc(struct intel_gt *gt) +{ + return >->uc.guc; +} + void intel_gt_common_init_early(struct intel_gt *gt); int intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); @@ -168,7 +174,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) int intel_gt_probe_all(struct drm_i915_private *i915); int intel_gt_tiles_init(struct drm_i915_private *i915); -void intel_gt_release_all(struct drm_i915_private *i915); #define for_each_gt(gt__, i915__, id__) \ for ((id__) = 0; \ @@ -202,4 +207,10 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, void intel_gt_bind_context_set_ready(struct intel_gt *gt); void intel_gt_bind_context_set_unready(struct intel_gt *gt); bool intel_gt_is_bind_context_ready(struct intel_gt *gt); + +static inline void intel_gt_set_wedged_async(struct intel_gt *gt) +{ + queue_work(system_highpri_wq, >->wedge); +} + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c new file mode 100644 index 000000000000..3c62a44e9106 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_ccs_mode.h" +#include "intel_gt_regs.h" + +unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt) +{ + int cslice; + u32 mode = 0; + int first_ccs = __ffs(CCS_MASK(gt)); + + if (!IS_DG2(gt->i915)) + return 0; + + /* Build the value for the fixed CCS load balancing */ + for (cslice = 0; cslice < I915_MAX_CCS; cslice++) { + if (gt->ccs.cslices & BIT(cslice)) + /* + * If available, assign the cslice + * to the first available engine... + */ + mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs); + + else + /* + * ... otherwise, mark the cslice as + * unavailable if no CCS dispatches here + */ + mode |= XEHP_CCS_MODE_CSLICE(cslice, + XEHP_CCS_MODE_CSLICE_MASK); + } + + return mode; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h new file mode 100644 index 000000000000..55547f2ff426 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __INTEL_GT_CCS_MODE_H__ +#define __INTEL_GT_CCS_MODE_H__ + +struct intel_gt; + +unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt); + +#endif /* __INTEL_GT_CCS_MODE_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 7c9be4fd1c8c..6c499692d61e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -9,6 +9,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" +#include "soc/intel_dram.h" static u32 read_reference_ts_freq(struct intel_uncore *uncore) { @@ -34,9 +35,7 @@ static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore, u32 f24_mhz = 24000000; u32 f25_mhz = 25000000; u32 f38_4_mhz = 38400000; - u32 crystal_clock = - (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> - GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + u32 crystal_clock = rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK; switch (crystal_clock) { case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: @@ -79,8 +78,7 @@ static u32 gen11_read_clock_frequency(struct intel_uncore *uncore) * register increments from this frequency (it might * increment only every few clock cycle). */ - freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + freq >>= 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0); } return freq; @@ -101,8 +99,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore) * register increments from this frequency (it might * increment only every few clock cycle). */ - freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >> - CTC_SHIFT_PARAMETER_SHIFT); + freq >>= 3 - REG_FIELD_GET(CTC_SHIFT_PARAMETER_MASK, ctc_reg); } return freq; @@ -151,7 +148,7 @@ static u32 gen4_read_clock_frequency(struct intel_uncore *uncore) * * Testing on actual hardware has shown there is no /16. */ - return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000; + return DIV_ROUND_CLOSEST(i9xx_fsb_freq(uncore->i915), 4) * 1000; } static u32 read_clock_frequency(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 77fb57223465..75e802e10be2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -68,9 +68,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, struct intel_gt *media_gt = gt->i915->media_gt; if (instance == OTHER_GUC_INSTANCE) - return guc_irq_handler(>->uc.guc, iir); + return guc_irq_handler(gt_to_guc(gt), iir); if (instance == OTHER_MEDIA_GUC_INSTANCE && media_gt) - return guc_irq_handler(&media_gt->uc.guc, iir); + return guc_irq_handler(gt_to_guc(media_gt), iir); if (instance == OTHER_GTPM_INSTANCE) return gen11_rps_irq_handler(>->rps, iir); @@ -442,7 +442,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl) iir = raw_reg_read(regs, GEN8_GT_IIR(2)); if (likely(iir)) { gen6_rps_irq_handler(>->rps, iir); - guc_irq_handler(>->uc.guc, iir >> 16); + guc_irq_handler(gt_to_guc(gt), iir >> 16); raw_reg_write(regs, GEN8_GT_IIR(2), iir); } } @@ -452,10 +452,10 @@ void gen8_gt_irq_reset(struct intel_gt *gt) { struct intel_uncore *uncore = gt->uncore; - GEN8_IRQ_RESET_NDX(uncore, GT, 0); - GEN8_IRQ_RESET_NDX(uncore, GT, 1); - GEN8_IRQ_RESET_NDX(uncore, GT, 2); - GEN8_IRQ_RESET_NDX(uncore, GT, 3); + gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(0)); + gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(1)); + gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(2)); + gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(3)); } void gen8_gt_irq_postinstall(struct intel_gt *gt) @@ -476,14 +476,14 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt) gt->pm_ier = 0x0; gt->pm_imr = ~gt->pm_ier; - GEN8_IRQ_INIT_NDX(uncore, GT, 0, ~gt_interrupts[0], gt_interrupts[0]); - GEN8_IRQ_INIT_NDX(uncore, GT, 1, ~gt_interrupts[1], gt_interrupts[1]); + gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(0), ~gt_interrupts[0], gt_interrupts[0]); + gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(1), ~gt_interrupts[1], gt_interrupts[1]); /* * RPS interrupts will get enabled/disabled on demand when RPS itself - * is enabled/disabled. Same wil be the case for GuC interrupts. + * is enabled/disabled. Same will be the case for GuC interrupts. */ - GEN8_IRQ_INIT_NDX(uncore, GT, 2, gt->pm_imr, gt->pm_ier); - GEN8_IRQ_INIT_NDX(uncore, GT, 3, ~gt_interrupts[3], gt_interrupts[3]); + gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(2), gt->pm_imr, gt->pm_ier); + gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(3), ~gt_interrupts[3], gt_interrupts[3]); } static void gen5_gt_update_irq(struct intel_gt *gt, @@ -514,9 +514,9 @@ void gen5_gt_irq_reset(struct intel_gt *gt) { struct intel_uncore *uncore = gt->uncore; - GEN3_IRQ_RESET(uncore, GT); + gen2_irq_reset(uncore, GT_IRQ_REGS); if (GRAPHICS_VER(gt->i915) >= 6) - GEN3_IRQ_RESET(uncore, GEN6_PM); + gen2_irq_reset(uncore, GEN6_PM_IRQ_REGS); } void gen5_gt_irq_postinstall(struct intel_gt *gt) @@ -538,7 +538,7 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt) else gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT; - GEN3_IRQ_INIT(uncore, GT, gt->gt_imr, gt_irqs); + gen2_irq_init(uncore, GT_IRQ_REGS, gt->gt_imr, gt_irqs); if (GRAPHICS_VER(gt->i915) >= 6) { /* @@ -551,6 +551,6 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt) } gt->pm_imr = 0xffffffff; - GEN3_IRQ_INIT(uncore, GEN6_PM, gt->pm_imr, pm_irqs); + gen2_irq_init(uncore, GEN6_PM_IRQ_REGS, gt->pm_imr, pm_irqs); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index e253750a51c5..a60822e2b5d4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -57,51 +57,18 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = { * are of a "GAM" subclass that has special rules. Thus we use a separate * GAM table farther down for those. */ -static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { +static const struct intel_mmio_range dg2_mslice_steering_table[] = { { 0x00DD00, 0x00DDFF }, { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ {}, }; -static const struct intel_mmio_range xehpsdv_gam_steering_table[] = { - { 0x004000, 0x004AFF }, - { 0x00C800, 0x00CFFF }, - {}, -}; - -static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { - { 0x00B000, 0x00B0FF }, - { 0x00D800, 0x00D8FF }, - {}, -}; - static const struct intel_mmio_range dg2_lncf_steering_table[] = { { 0x00B000, 0x00B0FF }, { 0x00D880, 0x00D8FF }, {}, }; -/* - * We have several types of MCR registers on PVC where steering to (0,0) - * will always provide us with a non-terminated value. We'll stick them - * all in the same table for simplicity. - */ -static const struct intel_mmio_range pvc_instance0_steering_table[] = { - { 0x004000, 0x004AFF }, /* HALF-BSLICE */ - { 0x008800, 0x00887F }, /* CC */ - { 0x008A80, 0x008AFF }, /* TILEPSMI */ - { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */ - { 0x00B100, 0x00B3FF }, /* L3BANK */ - { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */ - { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */ - { 0x00DD00, 0x00DDFF }, /* BSLICE */ - { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */ - { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */ - { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */ - { 0x024180, 0x0241FF }, /* HALF-BSLICE */ - {}, -}; - static const struct intel_mmio_range xelpg_instance0_steering_table[] = { { 0x000B00, 0x000BFF }, /* SQIDI */ { 0x001000, 0x001FFF }, /* SQIDI */ @@ -154,9 +121,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) gt->info.mslice_mask = intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask, GEN_DSS_PER_MSLICE); - gt->info.mslice_mask |= - (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & - GEN12_MEML3_EN_MASK); + gt->info.mslice_mask |= REG_FIELD_GET(GEN12_MEML3_EN_MASK, + intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3)); if (!gt->info.mslice_mask) /* should be impossible! */ gt_warn(gt, "mslice mask all zero!\n"); @@ -185,22 +151,16 @@ void intel_gt_mcr_init(struct intel_gt *gt) gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table; gt->steering_table[L3BANK] = xelpg_l3bank_steering_table; gt->steering_table[DSS] = xelpg_dss_steering_table; - } else if (IS_PONTEVECCHIO(i915)) { - gt->steering_table[INSTANCE0] = pvc_instance0_steering_table; } else if (IS_DG2(i915)) { - gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; + gt->steering_table[MSLICE] = dg2_mslice_steering_table; gt->steering_table[LNCF] = dg2_lncf_steering_table; /* * No need to hook up the GAM table since it has a dedicated * steering control register on DG2 and can use implicit * steering. */ - } else if (IS_XEHPSDV(i915)) { - gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; - gt->steering_table[LNCF] = xehpsdv_lncf_steering_table; - gt->steering_table[GAM] = xehpsdv_gam_steering_table; } else if (GRAPHICS_VER(i915) >= 11 && - GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) { + GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) { gt->steering_table[L3BANK] = icl_l3bank_steering_table; gt->info.l3bank_mask = ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & @@ -278,7 +238,7 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt, * to remain in multicast mode for reads. There's no real * downside to this, so we'll just go ahead and do so on all * platforms; we'll only clear the multicast bit from the mask - * when exlicitly doing a write operation. + * when explicitly doing a write operation. */ if (rw_flag == FW_REG_WRITE) mcr_mask |= GEN11_MCR_MULTICAST; @@ -821,8 +781,6 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, for (int i = 0; i < NUM_STEERING_TYPES; i++) if (gt->steering_table[i]) report_steering_type(p, gt, i, dump_table); - } else if (IS_PONTEVECCHIO(gt->i915)) { - report_steering_type(p, gt, INSTANCE0, dump_table); } else if (HAS_MSLICE_STEERING(gt->i915)) { report_steering_type(p, gt, MSLICE, dump_table); report_steering_type(p, gt, LNCF, dump_table); @@ -842,10 +800,7 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, unsigned int *group, unsigned int *instance) { - if (IS_PONTEVECCHIO(gt->i915)) { - *group = dss / GEN_DSS_PER_CSLICE; - *instance = dss % GEN_DSS_PER_CSLICE; - } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) { + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 55)) { *group = dss / GEN_DSS_PER_GSLICE; *instance = dss % GEN_DSS_PER_GSLICE; } else { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 01ac565a56a4..a67a4c35a4fa 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -54,7 +54,7 @@ int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, * the topology, so we lookup the DSS ID directly in "slice 0." */ #define _HAS_SS(ss_, gt_, group_, instance_) ( \ - GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 50) ? \ + GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 55) ? \ intel_sseu_has_subslice(&(gt_)->info.sseu, 0, ss_) : \ intel_sseu_has_subslice(&(gt_)->info.sseu, group_, instance_)) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 220ac4f92edf..3182f19b9837 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -70,6 +70,7 @@ static int __gt_unpark(struct intel_wakeref *wf) { struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); struct drm_i915_private *i915 = gt->i915; + struct intel_display *display = &i915->display; GT_TRACE(gt, "\n"); @@ -84,7 +85,7 @@ static int __gt_unpark(struct intel_wakeref *wf) * Work around it by grabbing a GT IRQ power domain whilst there is any * GT activity, preventing any DC state transitions. */ - gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + gt->awake = intel_display_power_get(display, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); intel_rc6_unpark(>->rc6); @@ -103,6 +104,7 @@ static int __gt_park(struct intel_wakeref *wf) struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); intel_wakeref_t wakeref = fetch_and_zero(>->awake); struct drm_i915_private *i915 = gt->i915; + struct intel_display *display = &i915->display; GT_TRACE(gt, "\n"); @@ -120,7 +122,7 @@ static int __gt_park(struct intel_wakeref *wf) /* Defer dropping the display power well for 100ms, it's slow! */ GEM_BUG_ON(!wakeref); - intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); + intel_display_power_put_async(display, POWER_DOMAIN_GT_IRQ, wakeref); return 0; } @@ -156,10 +158,10 @@ void intel_gt_pm_init(struct intel_gt *gt) static bool reset_engines(struct intel_gt *gt) { - if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + if (intel_gt_gpu_reset_clobbers_display(gt)) return false; - return __intel_gt_reset(gt, ALL_ENGINES) == 0; + return intel_gt_reset_all_engines(gt) == 0; } static void gt_sanitize(struct intel_gt *gt, bool force) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 911fd0160221..6f25c747bc29 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -35,7 +35,7 @@ static inline void __intel_gt_pm_get(struct intel_gt *gt) static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt) { if (!intel_wakeref_get_if_active(>->wakeref)) - return 0; + return NULL; return intel_wakeref_track(>->wakeref); } @@ -73,7 +73,7 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t ha } #define with_intel_gt_pm(gt, wf) \ - for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0) + for ((wf) = intel_gt_pm_get(gt); (wf); intel_gt_pm_put((gt), (wf)), (wf) = NULL) /** * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent @@ -84,7 +84,7 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t ha * @wf: pointer to a temporary wakeref. */ #define with_intel_gt_pm_if_awake(gt, wf) \ - for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0) + for ((wf) = intel_gt_pm_get_if_awake(gt); (wf); intel_gt_pm_put_async((gt), (wf)), (wf) = NULL) static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { @@ -105,9 +105,13 @@ int intel_gt_runtime_resume(struct intel_gt *gt); ktime_t intel_gt_get_awake_time(const struct intel_gt *gt); +#define INTEL_WAKEREF_MOCK_GT ERR_PTR(-ENODEV) + static inline bool is_mock_gt(const struct intel_gt *gt) { - return I915_SELFTEST_ONLY(gt->awake == -ENODEV); + BUILD_BUG_ON(INTEL_WAKEREF_DEF == INTEL_WAKEREF_MOCK_GT); + + return I915_SELFTEST_ONLY(gt->awake == INTEL_WAKEREF_MOCK_GT); } #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 7114c116e928..b635aa2820d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -71,6 +71,8 @@ static int fw_domains_show(struct seq_file *m, void *data) struct intel_uncore_forcewake_domain *fw_domain; unsigned int tmp; + spin_lock_irq(&uncore->lock); + seq_printf(m, "user.bypass_count = %u\n", uncore->user_forcewake_count); @@ -79,6 +81,8 @@ static int fw_domains_show(struct seq_file *m, void *data) intel_uncore_forcewake_domain_to_str(fw_domain->id), READ_ONCE(fw_domain->wake_count)); + spin_unlock_irq(&uncore->lock); + return 0; } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); @@ -367,7 +371,6 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) vlv_punit_put(i915); drm_printf(p, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); - drm_printf(p, "DDR freq: %d MHz\n", i915->mem_freq); drm_printf(p, "actual GPU freq: %d MHz\n", intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); @@ -392,10 +395,6 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_puts(p, "no P-state info available\n"); } - drm_printf(p, "Current CD clock frequency: %d kHz\n", i915->display.cdclk.hw.cdclk); - drm_printf(p, "Max CD clock frequency: %d kHz\n", i915->display.cdclk.max_cdclk_freq); - drm_printf(p, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); - intel_runtime_pm_put(uncore->rpm, wakeref); } @@ -432,7 +431,7 @@ static int llc_show(struct seq_file *m, void *data) max_gpu_freq /= GEN9_FREQ_SCALER; } - seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); + seq_puts(m, "GPU freq (MHz)\tEffective GPU freq (MHz)\tEffective Ring freq (MHz)\n"); wakeref = intel_runtime_pm_get(gt->uncore->rpm); for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { @@ -538,7 +537,7 @@ static bool rps_eval(void *data) { struct intel_gt *gt = data; - if (intel_guc_slpc_is_used(>->uc.guc)) + if (intel_guc_slpc_is_used(gt_to_guc(gt))) return false; else return HAS_RPS(gt->i915); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 50962cfd1353..7421ed18d8d1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -30,18 +30,15 @@ /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) -#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 -#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) -#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0 -#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 -#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 -#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 0) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 1) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 2) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 3) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_BIT(3) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ REG_FIELD_PREP(GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 0) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ REG_FIELD_PREP(GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 1) +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) #define RPM_CONFIG1 _MMIO(0xd04) #define GEN10_GT_NOA_ENABLE (1 << 9) @@ -326,6 +323,12 @@ _RING_FAULT_REG_VCS, \ _RING_FAULT_REG_VECS, \ _RING_FAULT_REG_BCS)) +#define RING_FAULT_VADDR_MASK REG_GENMASK(31, 12) /* pre-bdw */ +#define RING_FAULT_ENGINE_ID_MASK REG_GENMASK(16, 12) /* bdw+ */ +#define RING_FAULT_GTTSEL_MASK REG_BIT(11) /* pre-bdw */ +#define RING_FAULT_SRCID_MASK REG_GENMASK(10, 3) +#define RING_FAULT_FAULT_TYPE_MASK REG_GENMASK(2, 1) /* ivb+ */ +#define RING_FAULT_VALID REG_BIT(0) #define ERROR_GEN6 _MMIO(0x40a0) @@ -385,6 +388,8 @@ #define GEN8_FAULT_TLB_DATA0 _MMIO(0x4b10) #define GEN8_FAULT_TLB_DATA1 _MMIO(0x4b14) +#define FAULT_GTT_SEL REG_BIT(4) +#define FAULT_VA_HIGH_BITS REG_GENMASK(3, 0) #define GEN11_GACB_PERF_CTRL _MMIO(0x4b80) #define GEN11_HASH_CTRL_MASK (0x3 << 12 | 0xf << 0) @@ -409,6 +414,9 @@ #define GEN7_SO_PRIM_STORAGE_NEEDED(n) _MMIO(0x5240 + (n) * 8) #define GEN7_SO_PRIM_STORAGE_NEEDED_UDW(n) _MMIO(0x5240 + (n) * 8 + 4) +#define GEN8_WM_CHICKEN2 MCR_REG(0x5584) +#define WAIT_ON_DEPTH_STALL_DONE_DISABLE REG_BIT(5) + #define GEN9_WM_CHICKEN3 _MMIO(0x5588) #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) @@ -432,6 +440,7 @@ #define XEHPG_INSTDONE_GEOM_SVG MCR_REG(0x666c) #define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */ +#define DISABLE_REPACKING_FOR_COMPRESSION REG_BIT(15) /* jsl+ */ #define RC_OP_FLUSH_ENABLE (1 << 0) #define HIZ_RAW_STALL_OPT_DISABLE (1 << 2) #define CACHE_MODE_1 _MMIO(0x7004) /* IVB+ */ @@ -503,11 +512,12 @@ #define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11) #define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice) * 0x4) +#define GEN9_PGCTL_SS_ACK(subslice) REG_BIT(2 + (subslice) * 2) +#define GEN9_PGCTL_SLICE_ACK REG_BIT(0) + #define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \ ((slice) % 3) * 0x4) -#define GEN9_PGCTL_SLICE_ACK (1 << 0) -#define GEN9_PGCTL_SS_ACK(subslice) (1 << (2 + (subslice) * 2)) -#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? 0x7F : 0x1F) +#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? REG_GENMASK(6, 0) : REG_GENMASK(4, 0)) #define GEN9_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + (slice) * 0x8) #define GEN10_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + ((slice) / 3) * 0x30 + \ @@ -515,14 +525,14 @@ #define GEN9_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + (slice) * 0x8) #define GEN10_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + ((slice) / 3) * 0x30 + \ ((slice) % 3) * 0x8) -#define GEN9_PGCTL_SSA_EU08_ACK (1 << 0) -#define GEN9_PGCTL_SSA_EU19_ACK (1 << 2) -#define GEN9_PGCTL_SSA_EU210_ACK (1 << 4) -#define GEN9_PGCTL_SSA_EU311_ACK (1 << 6) -#define GEN9_PGCTL_SSB_EU08_ACK (1 << 8) -#define GEN9_PGCTL_SSB_EU19_ACK (1 << 10) -#define GEN9_PGCTL_SSB_EU210_ACK (1 << 12) -#define GEN9_PGCTL_SSB_EU311_ACK (1 << 14) +#define GEN9_PGCTL_SSB_EU311_ACK REG_BIT(14) +#define GEN9_PGCTL_SSB_EU210_ACK REG_BIT(12) +#define GEN9_PGCTL_SSB_EU19_ACK REG_BIT(10) +#define GEN9_PGCTL_SSB_EU08_ACK REG_BIT(8) +#define GEN9_PGCTL_SSA_EU311_ACK REG_BIT(6) +#define GEN9_PGCTL_SSA_EU210_ACK REG_BIT(4) +#define GEN9_PGCTL_SSA_EU19_ACK REG_BIT(2) +#define GEN9_PGCTL_SSA_EU08_ACK REG_BIT(0) #define VF_PREEMPTION _MMIO(0x83a4) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) @@ -579,7 +589,7 @@ #define GEN10_L3BANK_MASK 0x0F /* on Xe_HP the same fuses indicates mslices instead of L3 banks */ #define GEN12_MAX_MSLICES 4 -#define GEN12_MEML3_EN_MASK 0x0F +#define GEN12_MEML3_EN_MASK REG_GENMASK(3, 0) #define HSW_PAVP_FUSE1 _MMIO(0x911c) #define XEHP_SFC_ENABLE_MASK REG_GENMASK(27, 24) @@ -589,37 +599,30 @@ #define HSW_F1_EU_DIS_6EUS 2 #define GEN8_FUSE2 _MMIO(0x9120) -#define GEN8_F2_SS_DIS_SHIFT 21 -#define GEN8_F2_SS_DIS_MASK (0x7 << GEN8_F2_SS_DIS_SHIFT) -#define GEN8_F2_S_ENA_SHIFT 25 -#define GEN8_F2_S_ENA_MASK (0x7 << GEN8_F2_S_ENA_SHIFT) -#define GEN9_F2_SS_DIS_SHIFT 20 -#define GEN9_F2_SS_DIS_MASK (0xf << GEN9_F2_SS_DIS_SHIFT) -#define GEN10_F2_S_ENA_SHIFT 22 -#define GEN10_F2_S_ENA_MASK (0x3f << GEN10_F2_S_ENA_SHIFT) -#define GEN10_F2_SS_DIS_SHIFT 18 -#define GEN10_F2_SS_DIS_MASK (0xf << GEN10_F2_SS_DIS_SHIFT) +#define GEN10_F2_S_ENA_MASK REG_GENMASK(27, 22) +#define GEN10_F2_SS_DIS_MASK REG_GENMASK(21, 18) +#define GEN8_F2_S_ENA_MASK REG_GENMASK(27, 25) +#define GEN9_F2_SS_DIS_MASK REG_GENMASK(23, 20) +#define GEN8_F2_SS_DIS_MASK REG_GENMASK(23, 21) #define GEN8_EU_DISABLE0 _MMIO(0x9134) #define GEN9_EU_DISABLE(slice) _MMIO(0x9134 + (slice) * 0x4) #define GEN11_EU_DISABLE _MMIO(0x9134) -#define GEN8_EU_DIS0_S0_MASK 0xffffff -#define GEN8_EU_DIS0_S1_SHIFT 24 -#define GEN8_EU_DIS0_S1_MASK (0xff << GEN8_EU_DIS0_S1_SHIFT) -#define GEN11_EU_DIS_MASK 0xFF +#define GEN8_EU_DIS0_S1_MASK REG_GENMASK(31, 24) +#define GEN8_EU_DIS0_S0_MASK REG_GENMASK(23, 0) +#define GEN11_EU_DIS_MASK REG_GENMASK(7, 0) #define XEHP_EU_ENABLE _MMIO(0x9134) -#define XEHP_EU_ENA_MASK 0xFF +#define XEHP_EU_ENA_MASK REG_GENMASK(7, 0) #define GEN8_EU_DISABLE1 _MMIO(0x9138) -#define GEN8_EU_DIS1_S1_MASK 0xffff -#define GEN8_EU_DIS1_S2_SHIFT 16 -#define GEN8_EU_DIS1_S2_MASK (0xffff << GEN8_EU_DIS1_S2_SHIFT) +#define GEN8_EU_DIS1_S2_MASK REG_GENMASK(31, 16) +#define GEN8_EU_DIS1_S1_MASK REG_GENMASK(15, 0) #define GEN11_GT_SLICE_ENABLE _MMIO(0x9138) -#define GEN11_GT_S_ENA_MASK 0xFF +#define GEN11_GT_S_ENA_MASK REG_GENMASK(7, 0) #define GEN8_EU_DISABLE2 _MMIO(0x913c) -#define GEN8_EU_DIS2_S2_MASK 0xff +#define GEN8_EU_DIS2_S2_MASK REG_GENMASK(7, 0) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913c) #define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913c) @@ -627,9 +630,8 @@ #define GEN10_EU_DISABLE3 _MMIO(0x9140) #define GEN10_EU_DIS_SS_MASK 0xff #define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) -#define GEN11_GT_VDBOX_DISABLE_MASK 0xff -#define GEN11_GT_VEBOX_DISABLE_SHIFT 16 -#define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT) +#define GEN11_GT_VEBOX_DISABLE_MASK REG_GENMASK(19, 16) +#define GEN11_GT_VDBOX_DISABLE_MASK REG_GENMASK(7, 0) #define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) #define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT _MMIO(0x9148) @@ -718,44 +720,11 @@ #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS REG_BIT(20) -#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */ #define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */ #define GAMEDIA_CLKGATE_DIS REG_BIT(11) #define HSUNIT_CLKGATE_DIS REG_BIT(8) #define VSUNIT_CLKGATE_DIS REG_BIT(3) -#define UNSLCGCTL9440 _MMIO(0x9440) -#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28) -#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27) -#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26) -#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24) -#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23) -#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22) -#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21) -#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17) -#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16) -#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15) -#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14) -#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6) - -#define UNSLCGCTL9444 _MMIO(0x9444) -#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30) -#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29) -#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28) -#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27) -#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26) -#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25) -#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24) -#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23) -#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22) -#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21) -#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20) -#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19) -#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18) -#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17) -#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) -#define LTCDD_CLKGATE_DIS REG_BIT(10) - #define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) @@ -765,9 +734,6 @@ #define L3_CLKGATE_DIS REG_BIT(16) #define L3_CR2X_CLKGATE_DIS REG_BIT(17) -#define SCCGCTL94DC MCR_REG(0x94dc) -#define CG3DDISURB REG_BIT(14) - #define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4) #define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19) #define PSDUNIT_CLKGATE_DIS REG_BIT(5) @@ -913,11 +879,10 @@ /* GPM unit config (Gen9+) */ #define CTC_MODE _MMIO(0xa26c) -#define CTC_SOURCE_PARAMETER_MASK 1 -#define CTC_SOURCE_CRYSTAL_CLOCK 0 -#define CTC_SOURCE_DIVIDE_LOGIC 1 -#define CTC_SHIFT_PARAMETER_SHIFT 1 -#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) +#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) +#define CTC_SOURCE_PARAMETER_MASK REG_BIT(0) +#define CTC_SOURCE_CRYSTAL_CLOCK REG_FIELD_PREP(CTC_SOURCE_PARAMETER_MASK, 0) +#define CTC_SOURCE_DIVIDE_LOGIC REG_FIELD_PREP(CTC_SOURCE_PARAMETER_MASK, 1) /* GPM MSG_IDLE */ #define MSG_IDLE_CS _MMIO(0x8000) @@ -961,12 +926,12 @@ #define CHV_POWER_SS0_SIG1 _MMIO(0xa720) #define CHV_POWER_SS0_SIG2 _MMIO(0xa724) #define CHV_POWER_SS1_SIG1 _MMIO(0xa728) -#define CHV_SS_PG_ENABLE (1 << 1) -#define CHV_EU08_PG_ENABLE (1 << 9) -#define CHV_EU19_PG_ENABLE (1 << 17) -#define CHV_EU210_PG_ENABLE (1 << 25) +#define CHV_EU210_PG_ENABLE REG_BIT(25) +#define CHV_EU19_PG_ENABLE REG_BIT(17) +#define CHV_EU08_PG_ENABLE REG_BIT(9) +#define CHV_SS_PG_ENABLE REG_BIT(1) #define CHV_POWER_SS1_SIG2 _MMIO(0xa72c) -#define CHV_EU311_PG_ENABLE (1 << 1) +#define CHV_EU311_PG_ENABLE REG_BIT(1) #define GEN7_SARCHKMD _MMIO(0xb000) #define GEN7_DISABLE_DEMAND_PREFETCH (1 << 31) @@ -989,10 +954,6 @@ #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C #define GEN7_L3AGDIS (1 << 19) -#define XEHPC_LNCFMISCCFGREG0 MCR_REG(0xb01c) -#define XEHPC_HOSTCACHEEN REG_BIT(1) -#define XEHPC_OVRLSCCC REG_BIT(0) - #define GEN7_L3CNTLREG2 _MMIO(0xb020) /* MOCS (Memory Object Control State) registers */ @@ -1046,20 +1007,9 @@ #define XEHP_L3SQCREG5 MCR_REG(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) -#define MLTICTXCTL MCR_REG(0xb170) -#define TDONRENDER REG_BIT(2) - #define XEHP_L3SCQREG7 MCR_REG(0xb188) #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) -#define XEHPC_L3SCRUB MCR_REG(0xb18c) -#define SCRUB_CL_DWNGRADE_SHARED REG_BIT(12) -#define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0) -#define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6) - -#define L3SQCREG1_CCS0 MCR_REG(0xb200) -#define FLUSHALLNONCOH REG_BIT(5) - #define GEN11_GLBLINVL _MMIO(0xb404) #define GEN11_BANK_HASH_ADDR_EXCL_MASK (0x7f << 5) #define GEN11_BANK_HASH_ADDR_EXCL_BIT0 (1 << 5) @@ -1085,17 +1035,12 @@ #define XEHP_FAULT_TLB_DATA0 MCR_REG(0xceb8) #define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc) #define XEHP_FAULT_TLB_DATA1 MCR_REG(0xcebc) -#define FAULT_VA_HIGH_BITS (0xf << 0) -#define FAULT_GTT_SEL (1 << 4) +/* see GEN8_FAULT_TLB_DATA0/1 */ #define GEN12_RING_FAULT_REG _MMIO(0xcec4) #define XEHP_RING_FAULT_REG MCR_REG(0xcec4) #define XELPMP_RING_FAULT_REG _MMIO(0xcec4) -#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) -#define RING_FAULT_GTTSEL_MASK (1 << 11) -#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) -#define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) -#define RING_FAULT_VALID (1 << 0) +/* see GEN8_RING_FAULT_REG */ #define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) #define XEHP_GFX_TLB_INV_CR MCR_REG(0xced8) @@ -1109,7 +1054,6 @@ #define XEHP_COMPCTX_TLB_INV_CR MCR_REG(0xcf04) #define XELPMP_GSC_TLB_INV_CR _MMIO(0xcf04) /* media GT only */ -#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28) #define RENDER_MOD_CTRL MCR_REG(0xcf2c) #define COMP_MOD_CTRL MCR_REG(0xcf30) #define XELPMP_GSC_MOD_CTRL _MMIO(0xcf30) /* media GT only */ @@ -1185,7 +1129,6 @@ #define EU_PERF_CNTL4 PERF_REG(0xe45c) #define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c) -#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) #define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) #define GEN12_DISABLE_TDL_PUSH REG_BIT(9) #define GEN11_DIS_PICK_2ND_EU REG_BIT(7) @@ -1202,7 +1145,6 @@ #define FLOW_CONTROL_ENABLE REG_BIT(15) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) -#define SYSTOLIC_DOP_CLOCK_GATING_DIS REG_BIT(10) #define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8) #define STALL_DOP_GATING_DISABLE REG_BIT(5) #define THROTTLE_12_5 REG_GENMASK(4, 2) @@ -1215,6 +1157,7 @@ #define GEN12_DISABLE_EARLY_READ REG_BIT(14) #define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) #define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define XELPG_DISABLE_TDL_SVHS_GATING REG_BIT(1) #define GEN12_DISABLE_DOP_GATING REG_BIT(0) #define RT_CTRL MCR_REG(0xe530) @@ -1477,19 +1420,21 @@ #define ECOBITS_PPGTT_CACHE4B (0 << 8) #define GEN12_RCU_MODE _MMIO(0x14800) +#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) #define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) +#define XEHP_CCS_MODE _MMIO(0x14804) +#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */ +#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1) +#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH)) + #define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168) -#define CHV_FGT_DISABLE_SS0 (1 << 10) -#define CHV_FGT_DISABLE_SS1 (1 << 11) -#define CHV_FGT_EU_DIS_SS0_R0_SHIFT 16 -#define CHV_FGT_EU_DIS_SS0_R0_MASK (0xf << CHV_FGT_EU_DIS_SS0_R0_SHIFT) -#define CHV_FGT_EU_DIS_SS0_R1_SHIFT 20 -#define CHV_FGT_EU_DIS_SS0_R1_MASK (0xf << CHV_FGT_EU_DIS_SS0_R1_SHIFT) -#define CHV_FGT_EU_DIS_SS1_R0_SHIFT 24 -#define CHV_FGT_EU_DIS_SS1_R0_MASK (0xf << CHV_FGT_EU_DIS_SS1_R0_SHIFT) -#define CHV_FGT_EU_DIS_SS1_R1_SHIFT 28 -#define CHV_FGT_EU_DIS_SS1_R1_MASK (0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT) +#define CHV_FGT_EU_DIS_SS1_R1_MASK REG_GENMASK(31, 28) +#define CHV_FGT_EU_DIS_SS1_R0_MASK REG_GENMASK(27, 24) +#define CHV_FGT_EU_DIS_SS0_R1_MASK REG_GENMASK(23, 20) +#define CHV_FGT_EU_DIS_SS0_R0_MASK REG_GENMASK(19, 16) +#define CHV_FGT_DISABLE_SS1 REG_BIT(11) +#define CHV_FGT_DISABLE_SS0 REG_BIT(10) #define BCS_SWCTRL _MMIO(0x22200) #define BCS_SRC_Y REG_BIT(0) @@ -1519,6 +1464,10 @@ GEN6_PM_RP_DOWN_THRESHOLD | \ GEN6_PM_RP_DOWN_TIMEOUT) +#define GEN6_PM_IRQ_REGS I915_IRQ_REGS(GEN6_PMIMR, \ + GEN6_PMIER, \ + GEN6_PMIIR) + #define GEN7_GT_SCRATCH(i) _MMIO(0x4f100 + (i) * 4) #define GEN7_GT_SCRATCH_REG_NUM 8 @@ -1600,6 +1549,8 @@ #define VLV_RENDER_C0_COUNT _MMIO(0x138118) #define VLV_MEDIA_C0_COUNT _MMIO(0x13811c) +#define PCU_PWM_FAN_SPEED _MMIO(0x138140) + #define GEN12_RPSTAT1 _MMIO(0x1381b4) #define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) #define GEN12_CAGF_MASK REG_GENMASK(19, 11) @@ -1679,11 +1630,6 @@ #define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) -#define GT0_PACKAGE_ENERGY_STATUS _MMIO(0x250004) -#define GT0_PACKAGE_RAPL_LIMIT _MMIO(0x250008) -#define GT0_PACKAGE_POWER_SKU_UNIT _MMIO(0x250068) -#define GT0_PLATFORM_ENERGY_STATUS _MMIO(0x25006c) - /* * Standalone Media's non-engine GT registers are located at their regular GT * offsets plus 0x380000. This extra offset is stored inside the intel_uncore diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index c0b202223940..1154cd2b7c34 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -442,7 +442,7 @@ static ssize_t slpc_ignore_eff_freq_show(struct kobject *kobj, char *buff) { struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); - struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_guc_slpc *slpc = >_to_guc(gt)->slpc; return sysfs_emit(buff, "%u\n", slpc->ignore_eff_freq); } @@ -452,7 +452,7 @@ static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj, const char *buff, size_t count) { struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); - struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_guc_slpc *slpc = >_to_guc(gt)->slpc; int err; u32 val; @@ -464,6 +464,45 @@ static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj, return err ?: count; } +static ssize_t slpc_power_profile_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + + switch (slpc->power_profile) { + case SLPC_POWER_PROFILES_BASE: + return sysfs_emit(buff, "[%s] %s\n", "base", "power_saving"); + case SLPC_POWER_PROFILES_POWER_SAVING: + return sysfs_emit(buff, "%s [%s]\n", "base", "power_saving"); + } + + return sysfs_emit(buff, "%u\n", slpc->power_profile); +} + +static ssize_t slpc_power_profile_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buff, size_t count) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + char power_saving[] = "power_saving"; + char base[] = "base"; + int err; + u32 val; + + if (!strncmp(buff, power_saving, sizeof(power_saving) - 1)) + val = SLPC_POWER_PROFILES_POWER_SAVING; + else if (!strncmp(buff, base, sizeof(base) - 1)) + val = SLPC_POWER_PROFILES_BASE; + else + return -EINVAL; + + err = intel_guc_slpc_set_power_profile(slpc, val); + return err ?: count; +} + struct intel_gt_bool_throttle_attr { struct attribute attr; ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, @@ -573,7 +612,6 @@ static ssize_t media_freq_factor_show(struct kobject *kobj, char *buff) { struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); - struct intel_guc_slpc *slpc = >->uc.guc.slpc; intel_wakeref_t wakeref; u32 mode; @@ -581,20 +619,12 @@ static ssize_t media_freq_factor_show(struct kobject *kobj, * Retrieve media_ratio_mode from GEN6_RPNSWREQ bit 13 set by * GuC. GEN6_RPNSWREQ:13 value 0 represents 1:2 and 1 represents 1:1 */ - if (IS_XEHPSDV(gt->i915) && - slpc->media_ratio_mode == SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL) { - /* - * For XEHPSDV dynamic mode GEN6_RPNSWREQ:13 does not contain - * the media_ratio_mode, just return the cached media ratio - */ - mode = slpc->media_ratio_mode; - } else { - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - mode = intel_uncore_read(gt->uncore, GEN6_RPNSWREQ); - mode = REG_FIELD_GET(GEN12_MEDIA_FREQ_RATIO, mode) ? - SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE : - SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO; - } + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + mode = intel_uncore_read(gt->uncore, GEN6_RPNSWREQ); + + mode = REG_FIELD_GET(GEN12_MEDIA_FREQ_RATIO, mode) ? + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE : + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO; return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode)); } @@ -604,7 +634,7 @@ static ssize_t media_freq_factor_store(struct kobject *kobj, const char *buff, size_t count) { struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); - struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_guc_slpc *slpc = >_to_guc(gt)->slpc; u32 factor, mode; int err; @@ -677,6 +707,7 @@ INTEL_GT_ATTR_RO(media_RP0_freq_mhz); INTEL_GT_ATTR_RO(media_RPn_freq_mhz); INTEL_GT_ATTR_RW(slpc_ignore_eff_freq); +INTEL_GT_ATTR_RW(slpc_power_profile); static const struct attribute *media_perf_power_attrs[] = { &attr_media_freq_factor.attr, @@ -873,6 +904,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) gt_warn(gt, "failed to create ignore_eff_freq sysfs (%pe)", ERR_PTR(ret)); } + if (intel_uc_uses_guc_slpc(>->uc)) { + ret = sysfs_create_file(kobj, &attr_slpc_power_profile.attr); + if (ret) + gt_warn(gt, "failed to create slpc_power_profile sysfs (%pe)", + ERR_PTR(ret)); + } + if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) { ret = sysfs_create_files(kobj, throttle_reason_attrs); if (ret) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index def7dd0eb6f1..bcee084b1f27 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -207,6 +207,14 @@ struct intel_gt { [MAX_ENGINE_INSTANCE + 1]; enum intel_submission_method submission_method; + struct { + /* + * Mask of the non fused CCS slices + * to be used for the load balancing + */ + intel_engine_mask_t cslices; + } ccs; + /* * Default address space (either GGTT or ppGTT depending on arch). * @@ -284,6 +292,8 @@ struct intel_gt { struct gt_defaults defaults; struct kobject *sysfs_defaults; + struct work_struct wedge; + struct i915_perf_gt perf; /** link: &ggtt.gt_list */ diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 7811a8c9da06..afbc5c769308 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -176,7 +176,6 @@ static void clear_vm_list(struct list_head *list) i915_vma_destroy_locked(vma); i915_gem_object_put(obj); } - } } @@ -680,7 +679,7 @@ void setup_private_pat(struct intel_gt *gt) if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) xelpg_setup_private_ppat(gt); - else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) xehp_setup_private_ppat(gt); else if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 6b85222ee3ea..9d3a3ad567a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -312,6 +312,7 @@ struct i915_address_space { u64 (*pte_encode)(dma_addr_t addr, unsigned int pat_index, u32 flags); /* Create a valid PTE */ + dma_addr_t (*pte_decode)(u64 pte, bool *is_present, bool *is_local); #define PTE_READ_ONLY BIT(0) #define PTE_LM BIT(1) @@ -340,6 +341,8 @@ struct i915_address_space { struct i915_vma_resource *vma_res, unsigned int pat_index, u32 flags); + dma_addr_t (*read_entry)(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local); void (*cleanup)(struct i915_address_space *vm); void (*foreach)(struct i915_address_space *vm, @@ -590,6 +593,9 @@ void intel_ggtt_bind_vma(struct i915_address_space *vm, void intel_ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma_resource *vma_res); +dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm, + u64 offset, bool *is_present, bool *is_local); + int i915_ggtt_probe_hw(struct drm_i915_private *i915); int i915_ggtt_init_hw(struct drm_i915_private *i915); int i915_ggtt_enable_hw(struct drm_i915_private *i915); @@ -608,8 +614,8 @@ int i915_ppgtt_init_hw(struct intel_gt *gt); struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags); -void i915_ggtt_suspend_vm(struct i915_address_space *vm); -bool i915_ggtt_resume_vm(struct i915_address_space *vm); +void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all); +bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 7c367ba8d9dc..c481b56fa67d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -546,47 +546,6 @@ static const u8 gen12_rcs_offsets[] = { END }; -static const u8 xehp_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - END -}; - static const u8 dg2_rcs_offsets[] = { NOP(1), LRI(15, POSTED), @@ -695,8 +654,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) return mtl_rcs_offsets; else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_rcs_offsets; - else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) - return xehp_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 11) @@ -719,7 +676,7 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return 0x70; else if (GRAPHICS_VER(engine->i915) >= 12) return 0x60; @@ -733,7 +690,7 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) static int lrc_ring_bb_offset(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return 0x80; else if (GRAPHICS_VER(engine->i915) >= 12) return 0x70; @@ -748,7 +705,7 @@ static int lrc_ring_bb_offset(const struct intel_engine_cs *engine) static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return 0x84; else if (GRAPHICS_VER(engine->i915) >= 12) return 0x74; @@ -794,8 +751,7 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) { - - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) /* * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL * simply to match the RCS context image layout. @@ -863,8 +819,10 @@ static bool ctx_needs_runalone(const struct intel_context *ce) bool ctx_is_protected = false; /* - * On MTL and newer platforms, protected contexts require setting - * the LRC run-alone bit or else the encryption will not happen. + * Wa_14019159160 - Case 2. + * On some platforms, protected contexts require setting + * the LRC run-alone bit or else the encryption/decryption will not happen. + * NOTE: Case 2 only applies to PXP use-case of said workaround. */ if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) && (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) { @@ -893,6 +851,7 @@ static void init_common_regs(u32 * const regs, if (GRAPHICS_VER(engine->i915) < 11) ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); + /* Wa_14019159160 - Case 2.*/ if (ctx_needs_runalone(ce)) ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE); regs[CTX_CONTEXT_CONTROL] = ctl; @@ -1060,9 +1019,8 @@ void lrc_init_state(struct intel_context *ce, set_redzone(state, engine); - if (engine->default_state) { - shmem_read(engine->default_state, 0, - state, engine->context_size); + if (ce->default_state) { + shmem_read(ce->default_state, 0, state, engine->context_size); __set_bit(CONTEXT_VALID_BIT, &ce->flags); inhibit = false; } @@ -1174,6 +1132,9 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) GEM_BUG_ON(ce->state); + if (!intel_context_has_own_state(ce)) + ce->default_state = engine->default_state; + vma = __lrc_alloc_state(ce, engine); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 576e5ef0289b..aff5aca591e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -35,9 +35,9 @@ static bool engine_supports_migration(struct intel_engine_cs *engine) return true; } -static void xehpsdv_toggle_pdes(struct i915_address_space *vm, - struct i915_page_table *pt, - void *data) +static void xehp_toggle_pdes(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data) { struct insert_pte_data *d = data; @@ -52,9 +52,9 @@ static void xehpsdv_toggle_pdes(struct i915_address_space *vm, d->offset += SZ_2M; } -static void xehpsdv_insert_pte(struct i915_address_space *vm, - struct i915_page_table *pt, - void *data) +static void xehp_insert_pte(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data) { struct insert_pte_data *d = data; @@ -120,7 +120,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) * 512 entry layout using 4K GTT pages. The other two windows just map * lmem pages and must use the new compact 32 entry layout using 64K GTT * pages, which ensures we can address any lmem object that the user - * throws at us. We then also use the xehpsdv_toggle_pdes as a way of + * throws at us. We then also use the xehp_toggle_pdes as a way of * just toggling the PDE bit(GEN12_PDE_64K) for us, to enable the * compact layout for each of these page-tables, that fall within the * [CHUNK_SIZE, 3 * CHUNK_SIZE) range. @@ -209,12 +209,12 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) /* Now allow the GPU to rewrite the PTE via its own ppGTT */ if (HAS_64K_PAGES(gt->i915)) { vm->vm.foreach(&vm->vm, base, d.offset - base, - xehpsdv_insert_pte, &d); + xehp_insert_pte, &d); d.offset = base + CHUNK_SZ; vm->vm.foreach(&vm->vm, d.offset, 2 * CHUNK_SZ, - xehpsdv_toggle_pdes, &d); + xehp_toggle_pdes, &d); } else { vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d); @@ -304,7 +304,7 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m) struct intel_context *ce; /* - * We randomly distribute contexts across the engines upon constrction, + * We randomly distribute contexts across the engines upon construction, * as they all share the same pinned vm, and so in order to allow * multiple blits to run in parallel, we must construct each blit * to use a different range of the vm for its GTT. This has to be @@ -646,7 +646,7 @@ calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, * When CHUNK_SZ is passed all the pages upto CHUNK_SZ * will be taken for the blt. in Flat-ccs supported * platform Smem obj will have more pages than required - * for main meory hence limit it to the required size + * for main memory hence limit it to the required size * for main memory */ return min_t(u64, bytes_to_cpy, CHUNK_SZ); @@ -925,7 +925,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size, GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) ring_sz = XY_FAST_COLOR_BLT_DW; else if (ver >= 8) ring_sz = 8; @@ -936,7 +936,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size, if (IS_ERR(cs)) return PTR_ERR(cs); - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | (XY_FAST_COLOR_BLT_DW - 2); *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 25c1023eb5f9..5dd8121f4b15 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -53,7 +53,6 @@ struct drm_i915_mocs_table { /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ -#define PVC_NUM_MOCS_ENTRIES 3 #define MTL_NUM_MOCS_ENTRIES 16 /* (e)LLC caching options */ @@ -315,7 +314,6 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), /* WB - L3 */ @@ -367,31 +365,6 @@ static const struct drm_i915_mocs_entry gen12_mocs_table[] = { L3_3_WB), }; -static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = { - /* wa_1608975824 */ - MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)), - - /* UC - Coherent; GO:L3 */ - MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)), - /* UC - Coherent; GO:Memory */ - MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Non-Coherent; GO:Memory */ - MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)), - /* UC - Non-Coherent; GO:L3 */ - MOCS_ENTRY(4, 0, L3_1_UC), - - /* WB */ - MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)), - - /* HW Reserved - SW program but never use. */ - MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)), - MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)), - MOCS_ENTRY(60, 0, L3_1_UC), - MOCS_ENTRY(61, 0, L3_1_UC), - MOCS_ENTRY(62, 0, L3_1_UC), - MOCS_ENTRY(63, 0, L3_1_UC), -}; - static const struct drm_i915_mocs_entry dg2_mocs_table[] = { /* UC - Coherent; GO:L3 */ MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)), @@ -404,17 +377,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; -static const struct drm_i915_mocs_entry pvc_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_3_WB), - - /* UC */ - MOCS_ENTRY(1, 0, L3_1_UC), - - /* WB */ - MOCS_ENTRY(2, 0, L3_3_WB), -}; - static const struct drm_i915_mocs_entry mtl_mocs_table[] = { /* Error - Reserved for Non-Use */ MOCS_ENTRY(0, @@ -501,25 +463,12 @@ static unsigned int get_mocs_settings(struct drm_i915_private *i915, table->n_entries = MTL_NUM_MOCS_ENTRIES; table->uc_index = 9; table->unused_entries_index = 1; - } else if (IS_PONTEVECCHIO(i915)) { - table->size = ARRAY_SIZE(pvc_mocs_table); - table->table = pvc_mocs_table; - table->n_entries = PVC_NUM_MOCS_ENTRIES; - table->uc_index = 1; - table->wb_index = 2; - table->unused_entries_index = 2; } else if (IS_DG2(i915)) { table->size = ARRAY_SIZE(dg2_mocs_table); table->table = dg2_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->unused_entries_index = 3; - } else if (IS_XEHPSDV(i915)) { - table->size = ARRAY_SIZE(xehpsdv_mocs_table); - table->table = xehpsdv_mocs_table; - table->uc_index = 2; - table->n_entries = GEN9_NUM_MOCS_ENTRIES; - table->unused_entries_index = 5; } else if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; @@ -670,7 +619,7 @@ static void init_l3cc_table(struct intel_gt *gt, intel_gt_mcr_lock(gt, &flags); for_each_l3cc(l3cc, table, i) - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 55)) intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc); else intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc); @@ -725,7 +674,7 @@ void intel_mocs_init(struct intel_gt *gt) __init_mocs_table(gt->uncore, &table, global_mocs_offset()); /* - * Initialize the L3CC table as part of mocs initalization to make + * Initialize the L3CC table as part of mocs initialization to make * sure the LNCFCMOCSx registers are programmed for the subsequent * memory transactions including guc transactions */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 8f4b3c8af09c..9ca42589da4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -109,7 +109,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) * thus allowing GuC to control RC6 entry/exit fully instead. * We will not set the HW ENABLE and EI bits */ - if (!intel_guc_rc_enable(>->uc.guc)) + if (!intel_guc_rc_enable(gt_to_guc(gt))) rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE; else rc6->ctl_enable = @@ -117,23 +117,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - /* - * BSpec 52698 - Render powergating must be off. - * FIXME BSpec is outdated, disabling powergating for MTL is just - * temporary wa and should be removed after fixing real cause - * of forcewake timeouts. - */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) - pg_enable = - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; - else - pg_enable = - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; - if (GRAPHICS_VER(gt->i915) >= 12) { + if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { for (i = 0; i < I915_MAX_VCS; i++) if (HAS_ENGINE(gt, _VCS(i))) pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | @@ -569,7 +558,7 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) struct intel_gt *gt = rc6_to_gt(rc6); /* Take control of RC6 back from GuC */ - intel_guc_rc_disable(>->uc.guc); + intel_guc_rc_disable(gt_to_guc(gt)); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (GRAPHICS_VER(i915) >= 9) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index c8e9aa41fdea..dbdcfe130ad4 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -764,7 +764,7 @@ wa_14015076503_end(struct intel_gt *gt, intel_engine_mask_t engine_mask) HECI_H_GS1_ER_PREP, 0); } -int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) +static int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) { const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; reset_func reset; @@ -879,8 +879,17 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt) intel_engine_mask_t awake = 0; enum intel_engine_id id; - /* For GuC mode, ensure submission is disabled before stopping ring */ - intel_uc_reset_prepare(>->uc); + /** + * For GuC mode with submission enabled, ensure submission + * is disabled before stopping ring. + * + * For GuC mode with submission disabled, ensure that GuC is not + * sanitized, do that after engine reset. reset_prepare() + * is followed by engine reset which in this mode requires GuC to + * process any CSB FIFO entries generated by the resets. + */ + if (intel_uc_uses_guc_submission(>->uc)) + intel_uc_reset_prepare(>->uc); for_each_engine(engine, gt, id) { if (intel_engine_pm_get_if_awake(engine)) @@ -977,8 +986,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) awake = reset_prepare(gt); /* Even if the GPU reset fails, it should still stop the engines */ - if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) - __intel_gt_reset(gt, ALL_ENGINES); + if (!intel_gt_gpu_reset_clobbers_display(gt)) + intel_gt_reset_all_engines(gt); for_each_engine(engine, gt, id) engine->submit_request = nop_submit_request; @@ -1004,6 +1013,15 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) GT_TRACE(gt, "end\n"); } +static void set_wedged_work(struct work_struct *w) +{ + struct intel_gt *gt = container_of(w, struct intel_gt, wedge); + intel_wakeref_t wf; + + with_intel_runtime_pm(gt->uncore->rpm, wf) + __intel_gt_set_wedged(gt); +} + void intel_gt_set_wedged(struct intel_gt *gt) { intel_wakeref_t wakeref; @@ -1016,7 +1034,7 @@ void intel_gt_set_wedged(struct intel_gt *gt) if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_dbg_printer(>->i915->drm, - DRM_UT_DRIVER, __func__); + DRM_UT_DRIVER, NULL); struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1080,7 +1098,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT); dma_fence_put(fence); - /* Restart iteration after droping lock */ + /* Restart iteration after dropping lock */ spin_lock(&timelines->lock); tl = list_entry(&timelines->active_list, typeof(*tl), link); } @@ -1088,8 +1106,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) /* We must reset pending GPU events before restoring our submission */ ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ - if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) - ok = __intel_gt_reset(gt, ALL_ENGINES) == 0; + if (!intel_gt_gpu_reset_clobbers_display(gt)) + ok = intel_gt_reset_all_engines(gt) == 0; if (!ok) { /* * Warn CI about the unrecoverable wedged condition. @@ -1133,10 +1151,10 @@ static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) { int err, i; - err = __intel_gt_reset(gt, ALL_ENGINES); + err = intel_gt_reset_all_engines(gt); for (i = 0; err && i < RESET_MAX_RETRIES; i++) { msleep(10 * (i + 1)); - err = __intel_gt_reset(gt, ALL_ENGINES); + err = intel_gt_reset_all_engines(gt); } if (err) return err; @@ -1159,6 +1177,13 @@ static int resume(struct intel_gt *gt) return 0; } +bool intel_gt_gpu_reset_clobbers_display(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + return INTEL_INFO(i915)->gpu_reset_clobbers_display; +} + /** * intel_gt_reset - reset chip after a hang * @gt: #intel_gt to reset @@ -1180,6 +1205,7 @@ void intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask, const char *reason) { + struct intel_display *display = >->i915->display; intel_engine_mask_t awake; int ret; @@ -1214,19 +1240,22 @@ void intel_gt_reset(struct intel_gt *gt, goto error; } - if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) - intel_runtime_pm_disable_interrupts(gt->i915); + if (intel_gt_gpu_reset_clobbers_display(gt)) + intel_irq_suspend(gt->i915); if (do_reset(gt, stalled_mask)) { gt_err(gt, "Failed to reset chip\n"); goto taint; } - if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) - intel_runtime_pm_enable_interrupts(gt->i915); + if (intel_gt_gpu_reset_clobbers_display(gt)) + intel_irq_resume(gt->i915); - intel_overlay_reset(gt->i915); + intel_overlay_reset(display); + /* sanitize uC after engine reset */ + if (!intel_uc_uses_guc_submission(>->uc)) + intel_uc_reset_prepare(>->uc); /* * Next we need to restore the context, but we don't use those * yet either... @@ -1270,7 +1299,30 @@ error: goto finish; } -static int intel_gt_reset_engine(struct intel_engine_cs *engine) +/** + * intel_gt_reset_all_engines() - Reset all engines in the given gt. + * @gt: the GT to reset all engines for. + * + * This function resets all engines within the given gt. + * + * Returns: + * Zero on success, negative error code on failure. + */ +int intel_gt_reset_all_engines(struct intel_gt *gt) +{ + return __intel_gt_reset(gt, ALL_ENGINES); +} + +/** + * intel_gt_reset_engine() - Reset a specific engine within a gt. + * @engine: engine to be reset. + * + * This function resets the specified engine within a gt. + * + * Returns: + * Zero on success, negative error code on failure. + */ +int intel_gt_reset_engine(struct intel_engine_cs *engine) { return __intel_gt_reset(engine->gt, engine->mask); } @@ -1348,6 +1400,11 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) return err; } +static void display_reset_modeset_stuck(void *gt) +{ + intel_gt_set_wedged(gt); +} + static void intel_gt_reset_global(struct intel_gt *gt, u32 engine_mask, const char *reason) @@ -1365,15 +1422,33 @@ static void intel_gt_reset_global(struct intel_gt *gt, /* Use a watchdog to ensure that our reset completes */ intel_wedge_on_timeout(&w, gt, 60 * HZ) { - intel_display_reset_prepare(gt->i915); + struct drm_i915_private *i915 = gt->i915; + struct intel_display *display = &i915->display; + bool need_display_reset; + bool reset_display; + + need_display_reset = intel_gt_gpu_reset_clobbers_display(gt) && + intel_has_gpu_reset(gt); + + reset_display = intel_display_reset_test(display) || + need_display_reset; + + if (reset_display) + reset_display = intel_display_reset_prepare(display, + display_reset_modeset_stuck, + gt); intel_gt_reset(gt, engine_mask, reason); - intel_display_reset_finish(gt->i915); + if (reset_display) + intel_display_reset_finish(display, !need_display_reset); } if (!test_bit(I915_WEDGED, >->reset.flags)) kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); + else + drm_dev_wedged_event(>->i915->drm, + DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET); } /** @@ -1434,7 +1509,7 @@ void intel_gt_handle_error(struct intel_gt *gt, intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { local_bh_disable(); for_each_engine_masked(engine, gt, engine_mask, tmp) { - BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); + BUILD_BUG_ON(I915_RESET_BACKOFF >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) continue; @@ -1579,6 +1654,7 @@ void intel_gt_init_reset(struct intel_gt *gt) init_waitqueue_head(>->reset.queue); mutex_init(>->reset.mutex); init_srcu_struct(>->reset.backoff_srcu); + INIT_WORK(>->wedge, set_wedged_work); /* * While undesirable to wait inside the shrinker, complain anyway. @@ -1605,7 +1681,7 @@ static void intel_wedge_me(struct work_struct *work) struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name); - intel_gt_set_wedged(w->gt); + set_wedged_work(&w->gt->wedge); } void __intel_init_wedge(struct intel_wedge_me *w, diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index f615b30b81c5..724ea6d64f33 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -28,6 +28,8 @@ void intel_gt_handle_error(struct intel_gt *gt, const char *fmt, ...); #define I915_ERROR_CAPTURE BIT(0) +bool intel_gt_gpu_reset_clobbers_display(struct intel_gt *gt); + void intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask, const char *reason); @@ -54,7 +56,8 @@ int intel_gt_terminally_wedged(struct intel_gt *gt); void intel_gt_set_wedged_on_init(struct intel_gt *gt); void intel_gt_set_wedged_on_fini(struct intel_gt *gt); -int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask); +int intel_gt_reset_engine(struct intel_engine_cs *engine); +int intel_gt_reset_all_engines(struct intel_gt *gt); int intel_reset_guc(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index 80351f0a856c..4f5fd393af6f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -41,8 +41,7 @@ struct intel_reset { */ unsigned long flags; #define I915_RESET_BACKOFF 0 -#define I915_RESET_MODESET 1 -#define I915_RESET_ENGINE 2 +#define I915_RESET_ENGINE 1 #define I915_WEDGED_ON_INIT (BITS_PER_LONG - 3) #define I915_WEDGED_ON_FINI (BITS_PER_LONG - 2) #define I915_WEDGED (BITS_PER_LONG - 1) diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 59da4b7bd262..b74d9205c0f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -308,30 +308,6 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) return cs; } -/* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct i915_request *rq) -{ - int num_dwords; - void *cs; - - num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - if (num_dwords == 0) - return 0; - - num_dwords = CACHELINE_DWORDS - num_dwords; - GEM_BUG_ON(num_dwords & 1); - - cs = intel_ring_begin(rq, num_dwords); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); - intel_ring_advance(rq, cs + num_dwords); - - GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); - return 0; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_ring.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index 1b32dadfb8c3..64b322e25f36 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -16,7 +16,6 @@ struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords); -int intel_ring_cacheline_align(struct i915_request *rq); unsigned int intel_ring_update_space(struct intel_ring *ring); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 92085ffd23de..2a6d79abf25b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -26,6 +26,7 @@ #include "shmem_utils.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" +#include "intel_gt_print.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -192,6 +193,7 @@ static bool stop_ring(struct intel_engine_cs *engine) static int xcs_resume(struct intel_engine_cs *engine) { struct intel_ring *ring = engine->legacy.ring; + ktime_t kt; ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", ring->head, ring->tail); @@ -229,10 +231,33 @@ static int xcs_resume(struct intel_engine_cs *engine) set_pp_dir(engine); - /* First wake the ring up to an empty/idle ring */ - ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); + /* + * First wake the ring up to an empty/idle ring. + * Use 50ms of delay to let the engine write successfully + * for all platforms. Experimented with different values and + * determined that 50ms works best based on testing. + */ + for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC); + ktime_before(ktime_get(), (kt)); cpu_relax()) { + /* + * In case of resets fails because engine resumes from + * incorrect RING_HEAD and then GPU may be then fed + * to invalid instructions, which may lead to unrecoverable + * hang. So at first write doesn't succeed then try again. + */ + ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); + if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head) + break; + } + ENGINE_WRITE_FW(engine, RING_TAIL, ring->head); - ENGINE_POSTING_READ(engine, RING_TAIL); + if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) { + ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n", + ENGINE_READ_FW(engine, RING_HEAD), + ENGINE_READ_FW(engine, RING_TAIL), + ring->head); + goto err; + } ENGINE_WRITE_FW(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID); @@ -241,12 +266,16 @@ static int xcs_resume(struct intel_engine_cs *engine) if (__intel_wait_for_register_fw(engine->uncore, RING_CTL(engine->mmio_base), RING_VALID, RING_VALID, - 5000, 0, NULL)) + 5000, 0, NULL)) { + ENGINE_TRACE(engine, "failed to restart\n"); goto err; + } - if (GRAPHICS_VER(engine->i915) > 2) + if (GRAPHICS_VER(engine->i915) > 2) { ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); + ENGINE_POSTING_READ(engine, RING_MI_MODE); + } /* Now awake, let it get started */ if (ring->tail != ring->head) { @@ -259,16 +288,16 @@ static int xcs_resume(struct intel_engine_cs *engine) return 0; err: - drm_err(&engine->i915->drm, - "%s initialization failed; " - "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_CTL) & RING_VALID, - ENGINE_READ(engine, RING_HEAD), ring->head, - ENGINE_READ(engine, RING_TAIL), ring->tail, - ENGINE_READ(engine, RING_START), - i915_ggtt_offset(ring->vma)); + gt_err(engine->gt, "%s initialization failed\n", engine->name); + ENGINE_TRACE(engine, + "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_CTL) & RING_VALID, + ENGINE_READ(engine, RING_HEAD), ring->head, + ENGINE_READ(engine, RING_TAIL), ring->tail, + ENGINE_READ(engine, RING_START), + i915_ggtt_offset(ring->vma)); + GEM_TRACE_DUMP(); return -EIO; } @@ -336,7 +365,13 @@ static void reset_prepare(struct intel_engine_cs *engine) ENGINE_READ_FW(engine, RING_HEAD), ENGINE_READ_FW(engine, RING_TAIL), ENGINE_READ_FW(engine, RING_START)); - if (!stop_ring(engine)) { + /* + * Sometimes engine head failed to set to zero even after writing into it. + * Use wait_for_atomic() with 20ms delay to let engine resumes from + * correct RING_HEAD. Experimented different values and determined + * that 20ms works best based on testing. + */ + if (wait_for_atomic((!stop_ring(engine) == 0), 20)) { drm_err(&engine->i915->drm, "failed to set %s head to zero " "ctl %08x head %08x tail %08x start %08x\n", @@ -474,8 +509,7 @@ static int ring_context_init_default_state(struct intel_context *ce, if (IS_ERR(vaddr)) return PTR_ERR(vaddr); - shmem_read(ce->engine->default_state, 0, - vaddr, ce->engine->context_size); + shmem_read(ce->default_state, 0, vaddr, ce->engine->context_size); i915_gem_object_flush_map(obj); __i915_gem_object_release_map(obj); @@ -491,7 +525,7 @@ static int ring_context_pre_pin(struct intel_context *ce, struct i915_address_space *vm; int err = 0; - if (ce->engine->default_state && + if (ce->default_state && !test_bit(CONTEXT_VALID_BIT, &ce->flags)) { err = ring_context_init_default_state(ce, ww); if (err) @@ -570,10 +604,12 @@ static int ring_context_alloc(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; + if (!intel_context_has_own_state(ce)) + ce->default_state = engine->default_state; + /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); ce->ring = engine->legacy.ring; - ce->timeline = intel_timeline_get(engine->legacy.timeline); GEM_BUG_ON(ce->state); if (engine->context_size) { @@ -586,6 +622,8 @@ static int ring_context_alloc(struct intel_context *ce) ce->state = vma; } + ce->timeline = intel_timeline_get(engine->legacy.timeline); + return 0; } @@ -1088,9 +1126,6 @@ static void setup_irq(struct intel_engine_cs *engine) } else if (GRAPHICS_VER(i915) >= 5) { engine->irq_enable = gen5_irq_enable; engine->irq_disable = gen5_irq_disable; - } else if (GRAPHICS_VER(i915) >= 3) { - engine->irq_enable = gen3_irq_enable; - engine->irq_disable = gen3_irq_disable; } else { engine->irq_enable = gen2_irq_enable; engine->irq_disable = gen2_irq_disable; @@ -1144,7 +1179,7 @@ static void setup_common(struct intel_engine_cs *engine) * equivalent to our next initial bread so we can elide * engine->emit_init_breadcrumb(). */ - engine->emit_fini_breadcrumb = gen3_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen2_emit_breadcrumb; if (GRAPHICS_VER(i915) == 5) engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; @@ -1157,7 +1192,7 @@ static void setup_common(struct intel_engine_cs *engine) else if (IS_I830(i915) || IS_I845G(i915)) engine->emit_bb_start = i830_emit_bb_start; else - engine->emit_bb_start = gen3_emit_bb_start; + engine->emit_bb_start = gen2_emit_bb_start; } static void setup_rcs(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 4feef874e6d6..eb89948cc112 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -5,10 +5,10 @@ #include <linux/string_helpers.h> -#include <drm/i915_drm.h> +#include <drm/intel/i915_drm.h> #include "display/intel_display.h" -#include "display/intel_display_irq.h" +#include "display/intel_display_rps.h" #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" @@ -52,7 +52,7 @@ static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - return >->uc.guc.slpc; + return >_to_guc(gt)->slpc; } static bool rps_uses_slpc(struct intel_rps *rps) @@ -74,7 +74,7 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) static void rps_timer(struct timer_list *t) { - struct intel_rps *rps = from_timer(rps, t, timer); + struct intel_rps *rps = timer_container_of(rps, t, timer); struct intel_gt *gt = rps_to_gt(rps); struct intel_engine_cs *engine; ktime_t dt, last, timestamp; @@ -161,7 +161,7 @@ static void rps_start_timer(struct intel_rps *rps) static void rps_stop_timer(struct intel_rps *rps) { - del_timer_sync(&rps->timer); + timer_delete_sync(&rps->timer); rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); cancel_work_sync(&rps->work); } @@ -265,10 +265,10 @@ static const struct cparams { u16 c; } cparams[] = { { 1, 1333, 301, 28664 }, - { 1, 1066, 294, 24460 }, + { 1, 1067, 294, 24460 }, { 1, 800, 294, 25192 }, { 0, 1333, 276, 27605 }, - { 0, 1066, 276, 27605 }, + { 0, 1067, 276, 27605 }, { 0, 800, 231, 23784 }, }; @@ -280,15 +280,16 @@ static void gen5_rps_init(struct intel_rps *rps) u32 rgvmodectl; int c_m, i; - if (i915->fsb_freq <= 3200) + if (i915->fsb_freq <= 3200000) c_m = 0; - else if (i915->fsb_freq <= 4800) + else if (i915->fsb_freq <= 4800000) c_m = 1; else c_m = 2; for (i = 0; i < ARRAY_SIZE(cparams); i++) { - if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { + if (cparams[i].i == c_m && + cparams[i].t == DIV_ROUND_CLOSEST(i915->mem_freq, 1000)) { rps->ips.m = cparams[i].m; rps->ips.c = cparams[i].c; break; @@ -549,6 +550,7 @@ static unsigned int init_emon(struct intel_uncore *uncore) static bool gen5_rps_enable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_display *display = &i915->display; struct intel_uncore *uncore = rps_to_uncore(rps); u8 fstart, vstart; u32 rgvmodectl; @@ -606,9 +608,7 @@ static bool gen5_rps_enable(struct intel_rps *rps) rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); rps->ips.last_time2 = ktime_get_raw_ns(); - spin_lock(&i915->irq_lock); - ilk_enable_display_irq(i915, DE_PCU_EVENT); - spin_unlock(&i915->irq_lock); + ilk_display_rps_enable(display); spin_unlock_irq(&mchdev_lock); @@ -620,14 +620,13 @@ static bool gen5_rps_enable(struct intel_rps *rps) static void gen5_rps_disable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_display *display = &i915->display; struct intel_uncore *uncore = rps_to_uncore(rps); u16 rgvswctl; spin_lock_irq(&mchdev_lock); - spin_lock(&i915->irq_lock); - ilk_disable_display_irq(i915, DE_PCU_EVENT); - spin_unlock(&i915->irq_lock); + ilk_display_rps_disable(display); rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); @@ -1000,6 +999,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + /* Don't decrement num_waiters for req where increment was skipped */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + intel_guc_slpc_dec_waiters(slpc); } else { atomic_dec(&rps->num_waiters); @@ -1013,6 +1016,10 @@ void intel_rps_boost(struct i915_request *rq) if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; + /* Waitboost is not needed for contexts marked with a Freq hint */ + if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags)) + return; + /* Serializes with i915_request_retire() */ if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; @@ -1020,11 +1027,19 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); - if (slpc->min_freq_softlimit >= slpc->boost_freq) + /* Waitboost should not be done with power saving profile */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) return; /* Return if old value is non zero */ if (!atomic_fetch_inc(&slpc->num_waiters)) { + /* + * Skip queuing boost work if frequency is already boosted, + * but still increment num_waiters. + */ + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); queue_work(rps_to_gt(rps)->i915->unordered_wq, @@ -1086,11 +1101,7 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); - if (IS_PONTEVECCHIO(i915)) - return intel_uncore_read(uncore, PVC_RP_STATE_CAP); - else if (IS_XEHPSDV(i915)) - return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); - else if (IS_GEN9_LP(i915)) + if (IS_GEN9_LP(i915)) return intel_uncore_read(uncore, BXT_RP_STATE_CAP); else return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h index 6507fa3f6d1e..ece445109305 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -40,7 +40,7 @@ enum { /** * struct intel_rps_freq_caps - rps freq capabilities * @rp0_freq: non-overclocked max frequency - * @rp1_freq: "less than" RP0 power/freqency + * @rp1_freq: "less than" RP0 power/frequency * @min_freq: aka RPn, minimum frequency * * Freq caps exposed by HW, values are in "hw units" and intel_gpu_freq() @@ -57,7 +57,7 @@ struct intel_rps { /* * work, interrupts_enabled and pm_iir are protected by - * i915->irq_lock + * gt->irq_lock */ struct timer_list timer; struct work_struct work; @@ -90,7 +90,7 @@ struct intel_rps { u8 boost_freq; /* Frequency to request when wait boosting */ u8 idle_freq; /* Frequency to request when we are idle */ u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ - u8 rp1_freq; /* "less than" RP0 power/freqency */ + u8 rp1_freq; /* "less than" RP0 power/frequency */ u8 rp0_freq; /* Non-overclocked max frequency. */ u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c index 8c1dbcbcbc4f..2945526d52d1 100644 --- a/drivers/gpu/drm/i915/gt/intel_sa_media.c +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c @@ -27,7 +27,7 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, /* * Standalone media shares the general MMIO space with the primary - * GT. We'll re-use the primary GT's mapping. + * GT. We'll reuse the primary GT's mapping. */ uncore->regs = intel_uncore_regs(&i915->uncore); if (drm_WARN_ON(&i915->drm, uncore->regs == NULL)) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 6a3246240e81..9501d323d0d3 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -214,13 +214,8 @@ static void xehp_sseu_info_init(struct intel_gt *gt) int num_compute_regs, num_geometry_regs; int eu; - if (IS_PONTEVECCHIO(gt->i915)) { - num_geometry_regs = 0; - num_compute_regs = 2; - } else { - num_geometry_regs = 1; - num_compute_regs = 1; - } + num_geometry_regs = 1; + num_compute_regs = 1; /* * The concept of slice has been removed in Xe_HP. To be compatible @@ -241,7 +236,8 @@ static void xehp_sseu_info_init(struct intel_gt *gt) GEN12_GT_COMPUTE_DSS_ENABLE, XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); - eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; + eu_en_fuse = REG_FIELD_GET(XEHP_EU_ENA_MASK, + intel_uncore_read(uncore, XEHP_EU_ENABLE)); if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915)) eu_en = eu_en_fuse; @@ -274,15 +270,15 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * Although gen12 architecture supported multiple slices, TGL, RKL, * DG1, and ADL only had a single slice. */ - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK, + intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE)); drm_WARN_ON(>->i915->drm, s_en != 0x1); g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); /* one bit per pair of EUs */ - eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + eu_en_fuse = ~REG_FIELD_GET(GEN11_EU_DIS_MASK, + intel_uncore_read(uncore, GEN11_EU_DISABLE)); for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) if (eu_en_fuse & BIT(eu)) @@ -311,14 +307,14 @@ static void gen11_sseu_info_init(struct intel_gt *gt) * Although gen11 architecture supported multiple slices, ICL and * EHL/JSL only had a single slice in practice. */ - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK, + intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE)); drm_WARN_ON(>->i915->drm, s_en != 0x1); ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); - eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + eu_en = ~REG_FIELD_GET(GEN11_EU_DIS_MASK, + intel_uncore_read(uncore, GEN11_EU_DISABLE)); gen11_compute_sseu_info(sseu, ss_en, eu_en); @@ -340,10 +336,8 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) if (!(fuse & CHV_FGT_DISABLE_SS0)) { u8 disabled_mask = - ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> - CHV_FGT_EU_DIS_SS0_R0_SHIFT) | - (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> - CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); + REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R0_MASK, fuse) | + REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS0_R0_MASK); sseu->subslice_mask.hsw[0] |= BIT(0); sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF); @@ -351,10 +345,8 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) if (!(fuse & CHV_FGT_DISABLE_SS1)) { u8 disabled_mask = - ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> - CHV_FGT_EU_DIS_SS1_R0_SHIFT) | - (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> - CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); + REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R0_MASK, fuse) | + REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS1_R0_MASK); sseu->subslice_mask.hsw[0] |= BIT(1); sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF); @@ -390,7 +382,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) int s, ss; fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); - sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2); /* BXT has a single slice and at most 3 subslices. */ intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, @@ -401,8 +393,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) * to each of the enabled slices. */ subslice_mask = (1 << sseu->max_subslices) - 1; - subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> - GEN9_F2_SS_DIS_SHIFT); + subslice_mask &= ~REG_FIELD_GET(GEN9_F2_SS_DIS_MASK, fuse2); /* * Iterate through enabled slices and subslices to @@ -495,7 +486,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt) u32 eu_disable0, eu_disable1, eu_disable2; fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); - sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2); intel_sseu_set_info(sseu, 3, 3, 8); /* @@ -503,18 +494,18 @@ static void bdw_sseu_info_init(struct intel_gt *gt) * to each of the enabled slices. */ subslice_mask = GENMASK(sseu->max_subslices - 1, 0); - subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> - GEN8_F2_SS_DIS_SHIFT); + subslice_mask &= ~REG_FIELD_GET(GEN8_F2_SS_DIS_MASK, fuse2); eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); - eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; - eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | - ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << - (32 - GEN8_EU_DIS0_S1_SHIFT)); - eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | - ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << - (32 - GEN8_EU_DIS1_S2_SHIFT)); + eu_disable[0] = + REG_FIELD_GET(GEN8_EU_DIS0_S0_MASK, eu_disable0); + eu_disable[1] = + REG_FIELD_GET(GEN8_EU_DIS0_S1_MASK, eu_disable0) | + REG_FIELD_GET(GEN8_EU_DIS1_S1_MASK, eu_disable1) << hweight32(GEN8_EU_DIS0_S1_MASK); + eu_disable[2] = + REG_FIELD_GET(GEN8_EU_DIS1_S2_MASK, eu_disable1) | + REG_FIELD_GET(GEN8_EU_DIS2_S2_MASK, eu_disable2) << hweight32(GEN8_EU_DIS1_S2_MASK); /* * Iterate through enabled slices and subslices to @@ -642,7 +633,7 @@ void intel_sseu_info_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) xehp_sseu_info_init(gt); else if (GRAPHICS_VER(i915) >= 12) gen12_sseu_info_init(gt); @@ -692,7 +683,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, * According to documentation software must consider the configuration * as 2x4x8 and hardware will translate this to 1x8x8. * - * Furthemore, even though SScount is three bits, maximum documented + * Furthermore, even though SScount is three bits, maximum documented * value for it is four. From this some rules/restrictions follow: * * 1. @@ -851,7 +842,7 @@ void intel_sseu_print_topology(struct drm_i915_private *i915, { if (sseu->max_slices == 0) drm_printf(p, "Unavailable\n"); - else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) sseu_print_xehp_topology(sseu, p); else sseu_print_hsw_topology(sseu, p); diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c index 4bb13d1890e3..2487768bc230 100644 --- a/drivers/gpu/drm/i915/gt/intel_tlb.c +++ b/drivers/gpu/drm/i915/gt/intel_tlb.c @@ -122,7 +122,7 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno) { intel_wakeref_t wakeref; - if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + if (is_mock_gt(gt)) return; if (intel_gt_is_wedged(gt)) @@ -132,7 +132,7 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno) return; with_intel_gt_pm_if_awake(gt, wakeref) { - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); mutex_lock(>->tlb.invalidate_lock); if (tlb_seqno_passed(gt, seqno)) diff --git a/drivers/gpu/drm/i915/gt/intel_wopcm.h b/drivers/gpu/drm/i915/gt/intel_wopcm.h index 17d6aa86008a..d2038b6de5e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_wopcm.h +++ b/drivers/gpu/drm/i915/gt/intel_wopcm.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2017-2018 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d67d44611c28..b37e400f74e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -10,12 +10,15 @@ #include "intel_engine_regs.h" #include "intel_gpu_commands.h" #include "intel_gt.h" +#include "intel_gt_ccs_mode.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_ring.h" #include "intel_workarounds.h" +#include "display/intel_fbc_regs.h" + /** * DOC: Hardware workarounds * @@ -51,7 +54,8 @@ * registers belonging to BCS, VCS or VECS should be implemented in * xcs_engine_wa_init(). Workarounds for registers not belonging to a specific * engine's MMIO range but that are part of of the common RCS/CCS reset domain - * should be implemented in general_render_compute_wa_init(). + * should be implemented in general_render_compute_wa_init(). The settings + * about the CCS load balancing should be added in ccs_engine_wa_mode(). * * - GT workarounds: the list of these WAs is applied whenever these registers * revert to their default values: on GPU reset, suspend/resume [1]_, etc. @@ -107,9 +111,8 @@ static void wa_init_finish(struct i915_wa_list *wal) { /* Trim unused entries. */ if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { - struct i915_wa *list = kmemdup(wal->list, - wal->count * sizeof(*list), - GFP_KERNEL); + struct i915_wa *list = kmemdup_array(wal->list, wal->count, + sizeof(*list), GFP_KERNEL); if (list) { kfree(wal->list); @@ -153,7 +156,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ struct i915_wa *list; - list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), + list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*list), GFP_KERNEL); if (!list) { drm_err(&i915->drm, "No space for workaround init!\n"); @@ -258,12 +261,6 @@ wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) } static void -wa_mcr_write(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set) -{ - wa_mcr_write_clr_set(wal, reg, ~0, set); -} - -static void wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) { wa_write_clr_set(wal, reg, set, set); @@ -421,7 +418,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, /* WaForceContextSaveRestoreNonCoherent:bdw */ HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BROADWELL_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + (INTEL_INFO(i915)->gt == 3 ? HDC_FENCE_DEST_SLM_DISABLE : 0)); } static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -694,16 +691,17 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, struct drm_i915_private *i915 = engine->i915; /* - * Wa_1409142259:tgl,dg1,adl-p + * Wa_1409142259:tgl,dg1,adl-p,adl-n * Wa_1409347922:tgl,dg1,adl-p * Wa_1409252684:tgl,dg1,adl-p * Wa_1409217633:tgl,dg1,adl-p * Wa_1409207793:tgl,dg1,adl-p - * Wa_1409178076:tgl,dg1,adl-p - * Wa_1408979724:tgl,dg1,adl-p - * Wa_14010443199:tgl,rkl,dg1,adl-p - * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p - * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p + * Wa_1409178076:tgl,dg1,adl-p,adl-n + * Wa_1408979724:tgl,dg1,adl-p,adl-n + * Wa_14010443199:tgl,rkl,dg1,adl-p,adl-n + * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p,adl-n + * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p,adl-n + * Wa_22010465259:tgl,rkl,dg1,adl-s,adl-p,adl-n */ wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); @@ -744,6 +742,12 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1606376872 */ wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC); } + + /* + * This bit must be set to enable performance optimization for fast + * clears. + */ + wa_mcr_write_or(wal, GEN8_WM_CHICKEN2, WAIT_ON_DEPTH_STALL_DONE_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -918,12 +922,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_ctx_workarounds_init(engine, wal); - else if (IS_PONTEVECCHIO(i915)) - ; /* noop; none at this time */ else if (IS_DG2(i915)) dg2_ctx_workarounds_init(engine, wal); - else if (IS_XEHPSDV(i915)) - ; /* noop; none at this time */ else if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 12) @@ -980,7 +980,12 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) if (ret) return ret; - cs = intel_ring_begin(rq, (wal->count * 2 + 2)); + if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) + cs = intel_ring_begin(rq, (wal->count * 2 + 6)); + else + cs = intel_ring_begin(rq, (wal->count * 2 + 2)); + if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1010,6 +1015,15 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) } *cs++ = MI_NOOP; + /* Wa_14019789679 */ + if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) { + *cs++ = CMD_3DSTATE_MESH_CONTROL; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + } + intel_uncore_forcewake_put__locked(uncore, fw); spin_unlock(&uncore->lock); intel_gt_mcr_unlock(wal->gt, flags); @@ -1311,7 +1325,7 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) * We'll do our default/implicit steering based on GSLICE (in the * sliceid field) and DSS (in the subsliceid field). If we can * find overlap between the valid MSLICE and/or LNCF values with - * a suitable GSLICE, then we can just re-use the default value and + * a suitable GSLICE, then we can just reuse the default value and * skip and explicit steering at runtime. * * We only need to look for overlap between GSLICE/MSLICE/LNCF to find @@ -1350,9 +1364,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) gt->steering_table[MSLICE] = NULL; } - if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0)) - gt->steering_table[GAM] = NULL; - slice = __ffs(slice_mask); subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) % GEN_DSS_PER_GSLICE; @@ -1380,20 +1391,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) } static void -pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) -{ - unsigned int dss; - - /* - * Setup implicit steering for COMPUTE and DSS ranges to the first - * non-fused-off DSS. All other types of MCR registers will be - * explicitly steered. - */ - dss = intel_sseu_find_first_xehp_dss(>->info.sseu, 0, 0); - __add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE); -} - -static void icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; @@ -1520,76 +1517,6 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) } static void -xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) -{ - struct drm_i915_private *i915 = gt->i915; - - xehp_init_mcr(gt, wal); - - /* Wa_1409757795:xehpsdv */ - wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB); - - /* Wa_18011725039:xehpsdv */ - if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { - wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER); - wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); - } - - /* Wa_16011155590:xehpsdv */ - if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, - TSGUNIT_CLKGATE_DIS); - - /* Wa_14011780169:xehpsdv */ - if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) { - wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | - GAMTLBVDBOX7_CLKGATE_DIS | - GAMTLBVDBOX6_CLKGATE_DIS | - GAMTLBVDBOX5_CLKGATE_DIS | - GAMTLBVDBOX4_CLKGATE_DIS | - GAMTLBVDBOX3_CLKGATE_DIS | - GAMTLBVDBOX2_CLKGATE_DIS | - GAMTLBVDBOX1_CLKGATE_DIS | - GAMTLBVDBOX0_CLKGATE_DIS | - GAMTLBKCR_CLKGATE_DIS | - GAMTLBGUC_CLKGATE_DIS | - GAMTLBBLT_CLKGATE_DIS); - wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | - GAMTLBGFXA1_CLKGATE_DIS | - GAMTLBCOMPA0_CLKGATE_DIS | - GAMTLBCOMPA1_CLKGATE_DIS | - GAMTLBCOMPB0_CLKGATE_DIS | - GAMTLBCOMPB1_CLKGATE_DIS | - GAMTLBCOMPC0_CLKGATE_DIS | - GAMTLBCOMPC1_CLKGATE_DIS | - GAMTLBCOMPD0_CLKGATE_DIS | - GAMTLBCOMPD1_CLKGATE_DIS | - GAMTLBMERT_CLKGATE_DIS | - GAMTLBVEBOX3_CLKGATE_DIS | - GAMTLBVEBOX2_CLKGATE_DIS | - GAMTLBVEBOX1_CLKGATE_DIS | - GAMTLBVEBOX0_CLKGATE_DIS); - } - - /* Wa_16012725990:xehpsdv */ - if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER)) - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS); - - /* Wa_14011060649:xehpsdv */ - wa_14011060649(gt, wal); - - /* Wa_14012362059:xehpsdv */ - wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); - - /* Wa_14014368820:xehpsdv */ - wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL, - INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE); - - /* Wa_14010670810:xehpsdv */ - wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); -} - -static void dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { xehp_init_mcr(gt, wal); @@ -1632,27 +1559,10 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) } static void -pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) -{ - pvc_init_mcr(gt, wal); - - /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); - - /* Wa_18018781329 */ - wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB); - - /* Wa_16016694945 */ - wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); -} - -static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* Wa_14018575942 / Wa_18018781329 */ + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ @@ -1700,6 +1610,14 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB); + /* + * Wa_14018575942 + * + * Issue is seen on media KPI test running on VDBOX engine + * especially VP9 encoding WLs + */ + wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB); + /* Wa_22016670082 */ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); @@ -1724,12 +1642,6 @@ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); } - if (IS_PONTEVECCHIO(gt->i915)) { - wa_mcr_write(wal, XEHPC_L3SCRUB, - SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); - wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN); - } - if (IS_DG2(gt->i915)) { wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); @@ -1754,12 +1666,8 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_gt_workarounds_init(gt, wal); - else if (IS_PONTEVECCHIO(i915)) - pvc_gt_workarounds_init(gt, wal); else if (IS_DG2(i915)) dg2_gt_workarounds_init(gt, wal); - else if (IS_XEHPSDV(i915)) - xehpsdv_gt_workarounds_init(gt, wal); else if (IS_DG1(i915)) dg1_gt_workarounds_init(gt, wal); else if (GRAPHICS_VER(i915) == 12) @@ -2170,37 +2078,13 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) case RENDER_CLASS: /* Required by recommended tuning setting (not a workaround) */ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3); - + whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1); break; default: break; } } -static void blacklist_trtt(struct intel_engine_cs *engine) -{ - struct i915_wa_list *w = &engine->whitelist; - - /* - * Prevent read/write access to [0x4400, 0x4600) which covers - * the TRTT range across all engines. Note that normally userspace - * cannot access the other engines' trtt control, but for simplicity - * we cover the entire range on each engine. - */ - whitelist_reg_ext(w, _MMIO(0x4400), - RING_FORCE_TO_NONPRIV_DENY | - RING_FORCE_TO_NONPRIV_RANGE_64); - whitelist_reg_ext(w, _MMIO(0x4500), - RING_FORCE_TO_NONPRIV_DENY | - RING_FORCE_TO_NONPRIV_RANGE_64); -} - -static void pvc_whitelist_build(struct intel_engine_cs *engine) -{ - /* Wa_16014440446:pvc */ - blacklist_trtt(engine); -} - static void xelpg_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -2209,7 +2093,7 @@ static void xelpg_whitelist_build(struct intel_engine_cs *engine) case RENDER_CLASS: /* Required by recommended tuning setting (not a workaround) */ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3); - + whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1); break; default: break; @@ -2227,12 +2111,8 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) ; /* none yet */ else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_whitelist_build(engine); - else if (IS_PONTEVECCHIO(i915)) - pvc_whitelist_build(engine); else if (IS_DG2(i915)) dg2_whitelist_build(engine); - else if (IS_XEHPSDV(i915)) - ; /* none needed */ else if (GRAPHICS_VER(i915) == 12) tgl_whitelist_build(engine); else if (GRAPHICS_VER(i915) == 11) @@ -2426,6 +2306,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } + if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { + /* + * "Disable Repacking for Compression (masked R/W access) + * before rendering compressed surfaces for display." + */ + wa_masked_en(wal, CACHE_MODE_0_GEN7, + DISABLE_REPACKING_FOR_COMPRESSION); + } + if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -2664,7 +2553,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_FF_DS_SCHED_HW); /* WaDisablePSDDualDispatchEnable:ivb */ - if (IS_IVB_GT1(i915)) + if (INTEL_INFO(i915)->gt == 1) wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); @@ -2813,10 +2702,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) static void ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) { - /* Wa_14014999345:pvc */ - wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); - } + /* boilerplate for any CCS engine workaround */ } /* @@ -2849,10 +2735,34 @@ add_render_compute_tuning_settings(struct intel_gt *gt, wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, THREAD_EX_ARB_MODE_RR_AFTER_DEP); - if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC); } +static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + struct intel_gt *gt = engine->gt; + u32 mode; + + if (!IS_DG2(gt->i915)) + return; + + /* + * Wa_14019159160: This workaround, along with others, leads to + * significant challenges in utilizing load balancing among the + * CCS slices. Consequently, an architectural decision has been + * made to completely disable automatic CCS load balancing. + */ + wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE); + + /* + * After having disabled automatic load balancing we need to + * assign all slices to a single CCS. We will call it CCS mode 1 + */ + mode = intel_gt_apply_ccs_mode(gt); + wa_masked_en(wal, XEHP_CCS_MODE, mode); +} + /* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a @@ -2891,10 +2801,14 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) || - IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) + IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) { /* Wa_14017856879 */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); + /* Wa_14020495402 */ + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, XELPG_DISABLE_TDL_SVHS_GATING); + } + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* @@ -2922,21 +2836,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || - IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { /* Wa_22014226127 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); } - if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { + if (IS_DG2(i915)) { /* Wa_14015227452:dg2,pvc */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); - /* Wa_16015675438:dg2,pvc */ - wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); - } - - if (IS_DG2(i915)) { /* * Wa_16011620976:dg2_g11 * Wa_22015475538:dg2 @@ -2972,22 +2880,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li 0 /* write-only, so skip validation */, true); } - - if (IS_XEHPSDV(i915)) { - /* Wa_1409954639 */ - wa_mcr_masked_en(wal, - GEN8_ROW_CHICKEN, - SYSTOLIC_DOP_CLOCK_GATING_DIS); - - /* Wa_1607196519 */ - wa_mcr_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); - - /* Wa_14010449647:xehpsdv */ - wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); - } } static void @@ -3003,8 +2895,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal * to a single RCS/CCS engine's workaround list since * they're reset as part of the general render domain reset. */ - if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) + if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) { general_render_compute_wa_init(engine, wal); + ccs_engine_wa_mode(engine, wal); + } if (engine->class == COMPUTE_CLASS) ccs_engine_wa_init(engine, wal); @@ -3068,7 +2962,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset) const struct i915_range *mcr_ranges; int i; - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) mcr_ranges = mcr_ranges_xehp; else if (GRAPHICS_VER(i915) >= 12) mcr_ranges = mcr_ranges_gen12; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index c0637bf799a3..79741f043f03 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -108,7 +108,7 @@ static void advance(struct i915_request *request) static void hw_delay_complete(struct timer_list *t) { - struct mock_engine *engine = from_timer(engine, t, hw_delay); + struct mock_engine *engine = timer_container_of(engine, t, hw_delay); struct i915_request *request; unsigned long flags; @@ -297,7 +297,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) struct i915_request *rq; unsigned long flags; - del_timer_sync(&mock->hw_delay); + timer_delete_sync(&mock->hw_delay); spin_lock_irqsave(&engine->sched_engine->lock, flags); @@ -432,7 +432,7 @@ void mock_engine_flush(struct intel_engine_cs *engine) container_of(engine, typeof(*mock), base); struct i915_request *request, *rn; - del_timer_sync(&mock->hw_delay); + timer_delete_sync(&mock->hw_delay); spin_lock_irq(&mock->hw_lock); list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link) diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 12eca750f7d0..5eb46700dc4e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -286,7 +286,7 @@ out_engine: if (intel_engine_pm_is_awake(engine)) { struct drm_printer p = drm_dbg_printer(&engine->i915->drm, - DRM_UT_DRIVER, __func__); + DRM_UT_DRIVER, NULL); intel_engine_dump(engine, &p, "%s is still awake:%d after idle-barriers\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index ef014df4c4fc..9e4f0e417b3b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -193,115 +193,6 @@ static int live_idle_pulse(void *arg) return err; } -static int cmp_u32(const void *_a, const void *_b) -{ - const u32 *a = _a, *b = _b; - - return *a - *b; -} - -static int __live_heartbeat_fast(struct intel_engine_cs *engine) -{ - const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6)); - struct intel_context *ce; - struct i915_request *rq; - ktime_t t0, t1; - u32 times[5]; - int err; - int i; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - intel_engine_pm_get(engine); - - err = intel_engine_set_heartbeat(engine, 1); - if (err) - goto err_pm; - - for (i = 0; i < ARRAY_SIZE(times); i++) { - do { - /* Manufacture a tick */ - intel_engine_park_heartbeat(engine); - GEM_BUG_ON(engine->heartbeat.systole); - engine->serial++; /* pretend we are not idle! */ - intel_engine_unpark_heartbeat(engine); - - flush_delayed_work(&engine->heartbeat.work); - if (!delayed_work_pending(&engine->heartbeat.work)) { - pr_err("%s: heartbeat %d did not start\n", - engine->name, i); - err = -EINVAL; - goto err_pm; - } - - rcu_read_lock(); - rq = READ_ONCE(engine->heartbeat.systole); - if (rq) - rq = i915_request_get_rcu(rq); - rcu_read_unlock(); - } while (!rq); - - t0 = ktime_get(); - while (rq == READ_ONCE(engine->heartbeat.systole)) - yield(); /* work is on the local cpu! */ - t1 = ktime_get(); - - i915_request_put(rq); - times[i] = ktime_us_delta(t1, t0); - } - - sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); - - pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - times[0], - times[ARRAY_SIZE(times) - 1]); - - /* - * Ideally, the upper bound on min work delay would be something like - * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we - * are, even with system_wq_highpri, at the mercy of the CPU scheduler - * and may be stuck behind some slow work for many millisecond. Such - * as our very own display workers. - */ - if (times[ARRAY_SIZE(times) / 2] > error_threshold) { - pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - error_threshold); - err = -EINVAL; - } - - reset_heartbeat(engine); -err_pm: - intel_engine_pm_put(engine); - intel_context_put(ce); - return err; -} - -static int live_heartbeat_fast(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - /* Check that the heartbeat ticks at the desired rate. */ - if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) - return 0; - - for_each_engine(engine, gt, id) { - err = __live_heartbeat_fast(engine); - if (err) - break; - } - - return err; -} - static int __live_heartbeat_off(struct intel_engine_cs *engine) { int err; @@ -372,7 +263,6 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(live_idle_flush), SUBTEST(live_idle_pulse), - SUBTEST(live_heartbeat_fast), SUBTEST(live_heartbeat_off), }; int saved_hangcheck; diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 4202df5b8c12..0454eb1814bb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -53,7 +53,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, if (i915_request_completed(rq)) /* that was quick! */ return 0; - /* Wait until the HW has acknowleged the submission (or err) */ + /* Wait until the HW has acknowledged the submission (or err) */ intel_engine_flush_submission(engine); if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) return 0; @@ -93,7 +93,7 @@ static int wait_for_reset(struct intel_engine_cs *engine, return -EINVAL; } - /* Give the request a jiffie to complete after flushing the worker */ + /* Give the request a jiffy to complete after flushing the worker */ if (i915_request_wait(rq, 0, max(0l, (long)(timeout - jiffies)) + 1) < 0) { pr_err("%s: hanging request %llx:%lld did not complete\n", @@ -1198,7 +1198,7 @@ static int live_timeslice_rewind(void *arg) ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ /* Wait for the timeslice to kick in */ - del_timer(&engine->execlists.timer); + timer_delete(&engine->execlists.timer); tasklet_hi_schedule(&engine->sched_engine->tasklet); intel_engine_flush_submission(engine); } @@ -2357,7 +2357,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg) /* force preempt reset [failure] */ while (!engine->execlists.pending[0]) intel_engine_flush_submission(engine); - del_timer_sync(&engine->execlists.preempt); + timer_delete_sync(&engine->execlists.preempt); intel_engine_flush_submission(engine); cancel_reset_timeout(engine); @@ -3426,7 +3426,7 @@ static int live_preempt_timeout(void *arg) cpu_relax(); saved_timeout = engine->props.preempt_timeout_ms; - engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ + engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffy */ i915_request_get(rq); i915_request_add(rq); @@ -3574,7 +3574,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) arg[id].batch = NULL; arg[id].count = 0; - worker[id] = kthread_create_worker(0, "igt/smoke:%d", id); + worker[id] = kthread_run_worker(0, "igt/smoke:%d", id); if (IS_ERR(worker[id])) { err = PTR_ERR(worker[id]); break; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 0dd4d00ee894..f057c16410e7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -319,7 +319,7 @@ static int igt_hang_sanitycheck(void *arg) i915_request_add(rq); timeout = 0; - intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) + intel_wedge_on_timeout(&w, gt, HZ / 5 /* 200ms */) timeout = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); if (intel_gt_is_wedged(gt)) @@ -548,7 +548,7 @@ static int igt_reset_fail_engine(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; - /* Check that we can recover from engine-reset failues */ + /* Check that we can recover from engine-reset failures */ if (!intel_has_reset_engine(gt)) return 0; @@ -1025,7 +1025,7 @@ static int __igt_reset_engines(struct intel_gt *gt, threads[tmp].engine = other; threads[tmp].flags = flags; - worker = kthread_create_worker(0, "igt/%s", + worker = kthread_run_worker(0, "igt/%s", other->name); if (IS_ERR(worker)) { err = PTR_ERR(worker); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index e17b8777d21d..23f04f6f8fba 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -7,6 +7,7 @@ #include "gem/i915_gem_internal.h" +#include "i915_drv.h" #include "i915_selftest.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" @@ -63,7 +64,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, if (i915_request_completed(rq)) /* that was quick! */ return 0; - /* Wait until the HW has acknowleged the submission (or err) */ + /* Wait until the HW has acknowledged the submission (or err) */ intel_engine_flush_submission(engine); if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) return 0; @@ -859,6 +860,14 @@ static int live_lrc_timestamp(void *arg) }; /* + * This test was designed to isolate a hardware bug. + * The bug was found and fixed in future generations but + * now the test pollutes our CI on previous generation. + */ + if (GRAPHICS_VER(gt->i915) == 12) + return 0; + + /* * We want to verify that the timestamp is saved and restore across * context switches and is monotonic. * diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 3eff364ccf3a..54bc447efce0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -262,7 +262,7 @@ static int clear(struct intel_migrate *migrate, { struct drm_i915_private *i915 = migrate->context->engine->i915; struct drm_i915_gem_object *obj; - struct i915_request *rq; + struct i915_request *rq = NULL; struct i915_gem_ww_ctx ww; u32 *vaddr, val = 0; bool ccs_cap = false; @@ -336,7 +336,7 @@ static int clear(struct intel_migrate *migrate, if (vaddr[x] != val) { pr_err("%ps failed, (%u != %u), offset: %zu\n", - fn, vaddr[x], val, x * sizeof(u32)); + fn, vaddr[x], val, x * sizeof(u32)); igt_hexdump(vaddr + i * 1024, 4096); err = -EINVAL; } @@ -537,7 +537,7 @@ struct spinner_timer { static void spinner_kill(struct timer_list *timer) { - struct spinner_timer *st = from_timer(st, timer, timer); + struct spinner_timer *st = timer_container_of(st, timer, timer); igt_spinner_end(&st->spin); pr_info("%s\n", __func__); @@ -660,8 +660,8 @@ static int live_emit_pte_full_ring(void *arg) out_rq: i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */ - del_timer_sync(&st.timer); - destroy_timer_on_stack(&st.timer); + timer_delete_sync(&st.timer); + timer_destroy_on_stack(&st.timer); out_unpin: intel_context_unpin(ce); out_put: diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 1aa1446c8fb0..41716ed454b7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -8,6 +8,7 @@ #include "intel_gpu_commands.h" #include "intel_gt_requests.h" #include "intel_ring.h" +#include "intel_rps.h" #include "selftest_rc6.h" #include "selftests/i915_random.h" @@ -32,12 +33,22 @@ int live_rc6_manual(void *arg) { struct intel_gt *gt = arg; struct intel_rc6 *rc6 = >->rc6; - u64 rc0_power, rc6_power; + struct intel_rps *rps = >->rps; intel_wakeref_t wakeref; + u64 rc0_sample_energy[2]; + u64 rc6_sample_energy[2]; + u64 sleep_time = 1000; + u32 rc0_freq = 0; + u32 rc6_freq = 0; + u64 rc0_power; + u64 rc6_power; bool has_power; + u64 threshold; ktime_t dt; u64 res[2]; int err = 0; + u64 diff; + /* * Our claim is that we can "encourage" the GPU to enter rc6 at will. @@ -56,16 +67,18 @@ int live_rc6_manual(void *arg) /* Force RC6 off for starters */ __intel_rc6_disable(rc6); - msleep(1); /* wakeup is not immediate, takes about 100us on icl */ + /* wakeup is not immediate, takes about 100us on icl */ + usleep_range(1000, 2000); res[0] = rc6_residency(rc6); dt = ktime_get(); - rc0_power = librapl_energy_uJ(); - msleep(1000); - rc0_power = librapl_energy_uJ() - rc0_power; + rc0_sample_energy[0] = librapl_energy_uJ(); + msleep(sleep_time); + rc0_sample_energy[1] = librapl_energy_uJ() - rc0_sample_energy[0]; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); + rc0_freq = intel_rps_read_actual_frequency_fw(rps); if ((res[1] - res[0]) >> 10) { pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n", (res[1] - res[0]) >> 10); @@ -74,10 +87,15 @@ int live_rc6_manual(void *arg) } if (has_power) { - rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, + rc0_power = div64_u64(NSEC_PER_SEC * rc0_sample_energy[1], ktime_to_ns(dt)); + if (!rc0_power) { - pr_err("No power measured while in RC0\n"); + if (rc0_freq) + pr_debug("No power measured while in RC0! GPU Freq: %uMHz in RC0\n", + rc0_freq); + else + pr_err("No power and freq measured while in RC0\n"); err = -EINVAL; goto out_unlock; } @@ -89,9 +107,10 @@ int live_rc6_manual(void *arg) res[0] = rc6_residency(rc6); intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL); dt = ktime_get(); - rc6_power = librapl_energy_uJ(); - msleep(100); - rc6_power = librapl_energy_uJ() - rc6_power; + rc6_sample_energy[0] = librapl_energy_uJ(); + msleep(sleep_time); + rc6_freq = intel_rps_read_actual_frequency_fw(rps); + rc6_sample_energy[1] = librapl_energy_uJ() - rc6_sample_energy[0]; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); if (res[1] == res[0]) { @@ -103,12 +122,24 @@ int live_rc6_manual(void *arg) } if (has_power) { - rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, + rc6_power = div64_u64(NSEC_PER_SEC * rc6_sample_energy[1], ktime_to_ns(dt)); - pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n", + pr_info("GPU consumed %lluuW in RC0 and %lluuW in RC6\n", rc0_power, rc6_power); + if (2 * rc6_power > rc0_power) { - pr_err("GPU leaked energy while in RC6!\n"); + pr_err("GPU leaked energy while in RC6!\n" + "GPU Freq: %uMHz in RC6 and %uMHz in RC0\n" + "RC0 energy before & after sleep respectively: %lluuJ %lluuJ\n" + "RC6 energy before & after sleep respectively: %lluuJ %lluuJ\n", + rc6_freq, rc0_freq, rc0_sample_energy[0], rc0_sample_energy[1], + rc6_sample_energy[0], rc6_sample_energy[1]); + + diff = res[1] - res[0]; + threshold = (9 * NSEC_PER_MSEC * sleep_time) / 10; + if (diff < threshold) + pr_err("Did not enter RC6 properly, RC6 start residency=%lluns, RC6 end residency=%lluns\n", + res[0], res[1]); err = -EINVAL; goto out_unlock; } @@ -211,7 +242,7 @@ int live_rc6_ctx_wa(void *arg) i915_reset_engine_count(error, engine); const u32 *res; - /* Use a sacrifical context */ + /* Use a sacrificial context */ ce = intel_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index f40de408cd3a..2cfc23c58e90 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -281,7 +281,7 @@ static int igt_atomic_reset(void *arg) awake = reset_prepare(gt); p->critical_section_begin(); - err = __intel_gt_reset(gt, ALL_ENGINES); + err = intel_gt_reset_all_engines(gt); p->critical_section_end(); reset_finish(gt, awake); diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index dcef8d498919..73bc91c6ea07 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -22,7 +22,7 @@ #include "selftests/igt_spinner.h" #include "selftests/librapl.h" -/* Try to isolate the impact of cstates from determing frequency response */ +/* Try to isolate the impact of cstates from determining frequency response */ #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ static void dummy_rps_work(struct work_struct *wrk) @@ -477,12 +477,13 @@ int live_rps_control(void *arg) limit, intel_gpu_freq(rps, limit), min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); - if (limit == rps->min_freq) { - pr_err("%s: GPU throttled to minimum!\n", - engine->name); + if (limit != rps->max_freq) { + u32 throttle = intel_uncore_read(gt->uncore, + intel_gt_perf_limit_reasons_reg(gt)); + + pr_warn("%s: GPU throttled with reasons 0x%08x\n", + engine->name, throttle & GT0_PERF_LIMIT_REASONS_MASK); show_pstate_limits(rps); - err = -ENODEV; - break; } if (igt_flush_test(gt->i915)) { @@ -1115,7 +1116,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq) for (i = 0; i < 5; i++) x[i] = __measure_power(5); - *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; + *freq = (*freq + read_cagf(rps)) / 2; /* A simple triangle filter for better result stability */ sort(x, 5, sizeof(*x), cmp_u64, NULL); @@ -1125,6 +1126,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq) static u64 measure_power_at(struct intel_rps *rps, int *freq) { *freq = rps_set_check(rps, *freq); + msleep(100); return measure_power(rps, freq); } diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 302d0540295d..e61bb0bad12c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -53,7 +53,7 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) static int slpc_set_freq(struct intel_gt *gt, u32 freq) { int err; - struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_guc_slpc *slpc = >_to_guc(gt)->slpc; err = slpc_set_max_freq(slpc, freq); if (err) { @@ -95,6 +95,21 @@ static int slpc_restore_freq(struct intel_guc_slpc *slpc, u32 min, u32 max) return 0; } +static u64 slpc_measure_power(struct intel_rps *rps, int *freq) +{ + u64 x[5]; + int i; + + for (i = 0; i < 5; i++) + x[i] = __measure_power(5); + + *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; + + /* A simple triangle filter for better result stability */ + sort(x, 5, sizeof(*x), cmp_u64, NULL); + return div_u64(x[1] + 2 * x[2] + x[3], 4); +} + static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power) { int err = 0; @@ -103,7 +118,7 @@ static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power) if (err) return err; *freq = intel_rps_read_actual_frequency(>->rps); - *power = measure_power(>->rps, freq); + *power = slpc_measure_power(>->rps, freq); return err; } @@ -182,7 +197,7 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine) { - struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_guc_slpc *slpc = >_to_guc(gt)->slpc; struct { u64 power; int freq; @@ -262,7 +277,7 @@ static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, static int run_test(struct intel_gt *gt, int test_type) { - struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_guc_slpc *slpc = >_to_guc(gt)->slpc; struct intel_rps *rps = >->rps; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -489,7 +504,7 @@ static int live_slpc_tile_interaction(void *arg) return -ENOMEM; for_each_gt(gt, i915, i) { - threads[i].worker = kthread_create_worker(0, "igt/slpc_parallel:%d", gt->info.id); + threads[i].worker = kthread_run_worker(0, "igt/slpc_parallel:%d", gt->info.id); if (IS_ERR(threads[i].worker)) { ret = PTR_ERR(threads[i].worker); diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index 3941f2d6fa47..69ed946a39e5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -143,7 +143,7 @@ pte_tlbinv(struct intel_context *ce, if (ce->engine->class == OTHER_CLASS) msleep(200); else - msleep(10); + usleep_range(10000, 20000); if (va == vb) { if (!i915_request_completed(rq)) { diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README index e7e96d7073c7..22f8dabed434 100644 --- a/drivers/gpu/drm/i915/gt/shaders/README +++ b/drivers/gpu/drm/i915/gt/shaders/README @@ -10,7 +10,7 @@ i915/gt/shaders/clear_kernel directory. The generated .c files should never be modified directly. Instead, any modification needs to be done on the on their respective ASM files and build instructions below -needes to be followed. +needs to be followed. Building ======== @@ -24,7 +24,7 @@ on building. Please make sure your Mesa tool is compiled with "-Dtools=intel" and "-Ddri-drivers=i965", and run this script from IGT source root directory" -The instructions bellow assume: +The instructions below assume: * IGT gpu tools source code is located on your home directory (~) as ~/igt * Mesa source code is located on your home directory (~) as ~/mesa and built under the ~/mesa/build directory @@ -43,4 +43,4 @@ igt $ ./scripts/generate_clear_kernel.sh -g ivb \ ~/igt/lib/i915/shaders/clear_kernel/hsw.asm ~ $ cd ~/igt igt $ ./scripts/generate_clear_kernel.sh -g hsw \ - -m ~/mesa/build/src/intel/tools/i965_asm
\ No newline at end of file + -m ~/mesa/build/src/intel/tools/i965_asm diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm index 5fdf384bb621..6c0c89daf96c 100644 --- a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm @@ -24,7 +24,7 @@ mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; * DW 1.4 - Rsvd (intended for context ID) * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) - * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * DW 1.7 - Rsvd MBZ (intended for Total Thread Count) * * Binding Table * diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm index 97c7ac9e3854..27c28e63d6cc 100644 --- a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm @@ -24,7 +24,7 @@ mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; * DW 1.4 - Rsvd (intended for context ID) * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) - * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * DW 1.7 - Rsvd MBZ (intended for Total Thread Count) * * Binding Table * diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index bccc3a1200bc..365c4b8b04f4 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/shmem_fs.h> +#include <linux/vmalloc.h> #include "i915_drv.h" #include "gem/i915_gem_object.h" @@ -39,7 +40,7 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) if (i915_gem_object_is_shmem(obj)) { file = obj->base.filp; - atomic_long_inc(&file->f_count); + get_file(file); return file; } @@ -107,7 +108,7 @@ static int __shmem_rw(struct file *file, loff_t off, if (IS_ERR(page)) return PTR_ERR(page); - vaddr = kmap(page); + vaddr = kmap_local_page(page); if (write) { memcpy(vaddr + offset_in_page(off), ptr, this); set_page_dirty(page); @@ -115,7 +116,7 @@ static int __shmem_rw(struct file *file, loff_t off, memcpy(ptr, vaddr + offset_in_page(off), this); } mark_page_accessed(page); - kunmap(page); + kunmap_local(vaddr); put_page(page); len -= this; @@ -142,11 +143,11 @@ int shmem_read_to_iosys_map(struct file *file, loff_t off, if (IS_ERR(page)) return PTR_ERR(page); - vaddr = kmap(page); + vaddr = kmap_local_page(page); iosys_map_memcpy_to(map, map_off, vaddr + offset_in_page(off), this); mark_page_accessed(page); - kunmap(page); + kunmap_local(vaddr); put_page(page); len -= this; diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index 021f51d9b456..aab2759067d2 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -530,9 +530,8 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915) err_object: kobject_put(kobj); err_engine: - dev_err(kdev, "Failed to add sysfs engine '%s'\n", - engine->name); - break; + dev_warn(kdev, "Failed to add sysfs engine '%s'\n", + engine->name); } } } diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h index 811add10c30d..6de87ae5669e 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -207,6 +207,32 @@ struct slpc_shared_data { u8 reserved_mode_definition[4096]; } __packed; +struct slpc_context_frequency_request { + u32 frequency_request:16; + u32 reserved:12; + u32 is_compute:1; + u32 ignore_busyness:1; + u32 is_minimum:1; + u32 is_predefined:1; +} __packed; + +#define SLPC_CTX_FREQ_REQ_IS_COMPUTE REG_BIT(28) + +struct slpc_optimized_strategies { + u32 compute:1; + u32 async_flip:1; + u32 media:1; + u32 vsync_flip:1; + u32 reserved:28; +} __packed; + +#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0) + +enum slpc_power_profiles { + SLPC_POWER_PROFILES_BASE = 0x0, + SLPC_POWER_PROFILES_POWER_SAVING = 0x1 +}; + /** * DOC: SLPC H2G MESSAGE FORMAT * diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index dabeaf4f245f..00d6402333f8 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -36,6 +36,7 @@ enum intel_guc_load_status { INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR = 0x75, INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, INTEL_GUC_LOAD_STATUS_READY = 0xF0, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index 58012edd4eb0..0c709e6c15be 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -29,9 +29,9 @@ */ #define GUC_KLV_LEN_MIN 1u -#define GUC_KLV_0_KEY (0xffff << 16) -#define GUC_KLV_0_LEN (0xffff << 0) -#define GUC_KLV_n_VALUE (0xffffffff << 0) +#define GUC_KLV_0_KEY (0xffffu << 16) +#define GUC_KLV_0_LEN (0xffffu << 0) +#define GUC_KLV_n_VALUE (0xffffffffu << 0) /** * DOC: GuC Self Config KLVs @@ -101,4 +101,13 @@ enum { GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, }; +/* + * Workaround keys: + */ +enum { + GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE = 0x9006, +}; + #endif /* _ABI_GUC_KLVS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h index 1fc0c17b1230..803c0379d97d 100644 --- a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h @@ -81,7 +81,7 @@ struct guc_debug_capture_list { * * intel_guc_capture module uses these structures to maintain static * tables (per unique platform) that consists of lists of registers - * (offsets, names, flags,...) that are used at the ADS regisration + * (offsets, names, flags,...) that are used at the ADS registration * time as well as during runtime processing and reporting of error- * capture states generated by GuC just prior to engine reset events. */ @@ -200,7 +200,7 @@ struct intel_guc_state_capture { * dynamically allocate new nodes when receiving the G2H notification * because the event handlers for all G2H event-processing is called * by the ct processing worker queue and when that queue is being - * processed, there is no absoluate guarantee that we are not in the + * processed, there is no absolute guarantee that we are not in the * midst of a GT reset operation (which doesn't allow allocations). */ struct list_head cachelist; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index e2e42b3e0d5d..d550eb6edfb8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -80,6 +80,7 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s const struct intel_gsc_cpd_header_v2 *cpd_header = NULL; const struct intel_gsc_cpd_entry *cpd_entry = NULL; const struct intel_gsc_manifest_header *manifest; + struct intel_uc_fw_ver min_ver = { 0 }; size_t min_size = sizeof(*layout); int i; @@ -212,6 +213,50 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s } } + /* + * ARL SKUs require newer firmwares, but the blob is actually common + * across all MTL and ARL SKUs, so we need to do an explicit version check + * here rather than using a separate table entry. If a too old version + * is found, then just don't use GSC rather than aborting the driver load. + * Note that the major number in the GSC FW version is used to indicate + * the platform, so we expect it to always be 102 for MTL/ARL binaries. + */ + if (IS_ARROWLAKE_S(gt->i915)) + min_ver = (struct intel_uc_fw_ver){ 102, 0, 10, 1878 }; + else if (IS_ARROWLAKE_H(gt->i915) || IS_ARROWLAKE_U(gt->i915)) + min_ver = (struct intel_uc_fw_ver){ 102, 1, 15, 1926 }; + + if (IS_METEORLAKE(gt->i915) && gsc->release.major != 102) { + gt_info(gt, "Invalid GSC firmware for MTL/ARL, got %d.%d.%d.%d but need 102.x.x.x", + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build); + return -EINVAL; + } + + if (min_ver.major) { + bool too_old = false; + + if (gsc->release.minor < min_ver.minor) { + too_old = true; + } else if (gsc->release.minor == min_ver.minor) { + if (gsc->release.patch < min_ver.patch) { + too_old = true; + } else if (gsc->release.patch == min_ver.patch) { + if (gsc->release.build < min_ver.build) + too_old = true; + } + } + + if (too_old) { + gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least %d.%d.%d.%d", + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build, + min_ver.major, min_ver.minor, + min_ver.patch, min_ver.build); + return -EINVAL; + } + } + return 0; } @@ -298,7 +343,7 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) memcpy_toio(gsc->local_vaddr, src, gsc->fw.size); memset_io(gsc->local_vaddr + gsc->fw.size, 0, gsc->local->size - gsc->fw.size); - intel_guc_write_barrier(>->uc.guc); + intel_guc_write_barrier(gt_to_guc(gt)); i915_gem_object_unpin_map(gsc->fw.obj); @@ -351,7 +396,7 @@ static int gsc_fw_query_compatibility_version(struct intel_gsc_uc *gsc) void *vaddr; int err; - err = intel_guc_allocate_and_map_vma(>->uc.guc, GSC_VER_PKT_SZ * 2, + err = intel_guc_allocate_and_map_vma(gt_to_guc(gt), GSC_VER_PKT_SZ * 2, &vma, &vaddr); if (err) { gt_err(gt, "failed to allocate vma for GSC version query\n"); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c index 40817ebcca71..d8edd7c054c8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -5,8 +5,8 @@ #include <linux/component.h> -#include <drm/i915_component.h> -#include <drm/i915_gsc_proxy_mei_interface.h> +#include <drm/intel/i915_component.h> +#include <drm/intel/i915_gsc_proxy_mei_interface.h> #include "gt/intel_gt.h" #include "gt/intel_gt_print.h" @@ -358,7 +358,8 @@ static int proxy_channel_alloc(struct intel_gsc_uc *gsc) void *vaddr; int err; - err = intel_guc_allocate_and_map_vma(>->uc.guc, GSC_PROXY_CHANNEL_SIZE, + err = intel_guc_allocate_and_map_vma(gt_to_guc(gt), + GSC_PROXY_CHANNEL_SIZE, &vma, &vaddr); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c index 453d855dd1de..3d3191deb0ab 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -302,7 +302,7 @@ void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc) { struct intel_gt *gt = gsc_uc_to_gt(gsc); - if (!intel_uc_fw_is_loadable(&gsc->fw)) + if (!intel_uc_fw_is_loadable(&gsc->fw) || intel_uc_fw_is_in_error(&gsc->fw)) return; if (intel_gsc_uc_fw_init_done(gsc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 2b450c43bbd7..9df80c325fc1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -239,8 +239,16 @@ static u32 guc_ctl_debug_flags(struct intel_guc *guc) static u32 guc_ctl_feature_flags(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); u32 flags = 0; + /* + * Enable PXP GuC autoteardown flow. + * NB: MTL does things differently. + */ + if (HAS_PXP(gt->i915) && !IS_METEORLAKE(gt->i915)) + flags |= GUC_CTL_ENABLE_GUC_PXP_CTL; + if (!intel_guc_submission_is_used(guc)) flags |= GUC_CTL_DISABLE_SCHEDULER; @@ -286,7 +294,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) /* Wa_22012773006:gen11,gen12 < XeHP */ if (GRAPHICS_VER(gt->i915) >= 11 && - GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) + GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 55)) flags |= GUC_WA_POLLCS; /* Wa_14014475959 */ @@ -294,6 +302,11 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; + /* Wa_16019325821 */ + /* Wa_14019159160 */ + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) + flags |= GUC_WA_RCS_CCS_SWITCHOUT; + /* * Wa_14012197797 * Wa_22011391025 @@ -315,15 +328,12 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) if (IS_DG2_G11(gt->i915)) flags |= GUC_WA_CONTEXT_ISOLATION; - /* Wa_16015675438 */ - if (!RCS_MASK(gt)) - flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; - - /* Wa_14018913170 */ - if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0)) { - if (IS_DG2(gt->i915) || IS_METEORLAKE(gt->i915) || IS_PONTEVECCHIO(gt->i915)) - flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; - } + /* + * Wa_14018913170: Applicable to all platforms supported by i915 so + * don't bother testing for all X/Y/Z platforms explicitly. + */ + if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0)) + flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; return flags; } @@ -680,7 +690,7 @@ int intel_guc_suspend(struct intel_guc *guc) * H2G MMIO command completes. * * Don't abort on a failure code from the GuC. Keep going and do the - * clean up in santize() and re-initialisation on resume and hopefully + * clean up in sanitize() and re-initialisation on resume and hopefully * the error here won't be problematic. */ ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index be70c46604b4..053780f562c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -204,6 +204,8 @@ struct intel_guc { struct guc_mmio_reg *ads_regset; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @ads_waklv_size: size of workaround KLVs */ + u32 ads_waklv_size; /** @ads_capture_size: size of register lists in the ADS used for error capture */ u32 ads_capture_size; @@ -293,7 +295,7 @@ struct intel_guc { */ struct work_struct dead_guc_worker; /** - * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrance + * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrence * used to prevent a fundamentally broken system from continuously * reloading the GuC. */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index f7372f736a77..46fabbfc775e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -46,6 +46,10 @@ * +---------------------------------------+ * | padding | * +---------------------------------------+ <== 4K aligned + * | w/a KLVs | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned * | capture lists | * +---------------------------------------+ * | padding | @@ -88,6 +92,11 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) return PAGE_ALIGN(guc->ads_golden_ctxt_size); } +static u32 guc_ads_waklv_size(struct intel_guc *guc) +{ + return PAGE_ALIGN(guc->ads_waklv_size); +} + static u32 guc_ads_capture_size(struct intel_guc *guc) { return PAGE_ALIGN(guc->ads_capture_size); @@ -113,7 +122,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } -static u32 guc_ads_capture_offset(struct intel_guc *guc) +static u32 guc_ads_waklv_offset(struct intel_guc *guc) { u32 offset; @@ -123,6 +132,16 @@ static u32 guc_ads_capture_offset(struct intel_guc *guc) return PAGE_ALIGN(offset); } +static u32 guc_ads_capture_offset(struct intel_guc *guc) +{ + u32 offset; + + offset = guc_ads_waklv_offset(guc) + + guc_ads_waklv_size(guc); + + return PAGE_ALIGN(offset); +} + static u32 guc_ads_private_data_offset(struct intel_guc *guc) { u32 offset; @@ -393,7 +412,7 @@ static int guc_mmio_regset_init(struct temp_regset *regset, /* add in local MOCS registers */ for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false); else ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false); @@ -503,7 +522,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt, #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) #define XEHP_LR_HW_CONTEXT_SIZE (96 * sizeof(u32)) -#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50) ? \ +#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55) ? \ XEHP_LR_HW_CONTEXT_SIZE : \ LR_HW_CONTEXT_SIZE) #define LRC_SKIP_SIZE(i915) (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SZ(i915)) @@ -796,6 +815,73 @@ engine_instance_list: return PAGE_ALIGN(total_size); } +static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id) +{ + u32 size; + u32 klv_entry[] = { + /* 16:16 key/length */ + FIELD_PREP(GUC_KLV_0_KEY, klv_id) | + FIELD_PREP(GUC_KLV_0_LEN, 0), + /* 0 dwords data */ + }; + + size = sizeof(klv_entry); + GEM_BUG_ON(*remain < size); + + iosys_map_memcpy_to(&guc->ads_map, *offset, klv_entry, size); + *offset += size; + *remain -= size; +} + +static void guc_waklv_init(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + u32 offset, addr_ggtt, remain, size; + + if (!intel_uc_uses_guc_submission(>->uc)) + return; + + if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0)) + return; + + GEM_BUG_ON(iosys_map_is_null(&guc->ads_map)); + offset = guc_ads_waklv_offset(guc); + remain = guc_ads_waklv_size(guc); + + /* Wa_14019159160 */ + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { + guc_waklv_enable_simple(guc, &offset, &remain, + GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE); + guc_waklv_enable_simple(guc, &offset, &remain, + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE); + } + + /* Wa_16021333562 */ + if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) && + (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) || + IS_DG2(gt->i915))) + guc_waklv_enable_simple(guc, &offset, &remain, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); + + size = guc_ads_waklv_size(guc) - remain; + if (!size) + return; + + offset = guc_ads_waklv_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + + ads_blob_write(guc, ads.wa_klv_addr_lo, addr_ggtt); + ads_blob_write(guc, ads.wa_klv_addr_hi, 0); + ads_blob_write(guc, ads.wa_klv_size, size); +} + +static int guc_prep_waklv(struct intel_guc *guc) +{ + /* Fudge something chunky for now: */ + return PAGE_SIZE; +} + static void __guc_ads_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -843,6 +929,9 @@ static void __guc_ads_init(struct intel_guc *guc) /* MMIO save/restore list */ guc_mmio_reg_state_init(guc); + /* Workaround KLV list */ + guc_waklv_init(guc); + /* Private Data */ ads_blob_write(guc, ads.private_data, base + guc_ads_private_data_offset(guc)); @@ -886,6 +975,12 @@ int intel_guc_ads_create(struct intel_guc *guc) return ret; guc->ads_capture_size = ret; + /* And don't forget the workaround KLVs: */ + ret = guc_prep_waklv(guc); + if (ret < 0) + return ret; + guc->ads_waklv_size = ret; + /* Now the total size can be determined: */ size = guc_ads_blob_size(guc); @@ -961,7 +1056,7 @@ u32 intel_guc_engine_usage_offset(struct intel_guc *guc) struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine) { - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u8 guc_class = engine_class_to_guc_class(engine->class); size_t offset = offsetof(struct __guc_ads_blob, engine_usage.engines[guc_class][ilog2(engine->logical_mask)]); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index a1cd40d80517..9547fff672bd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -51,6 +51,7 @@ { RING_ESR(0), 0, 0, "ESR" }, \ { RING_DMA_FADD(0), 0, 0, "RING_DMA_FADD_LDW" }, \ { RING_DMA_FADD_UDW(0), 0, 0, "RING_DMA_FADD_UDW" }, \ + { RING_EIR(0), 0, 0, "EIR" }, \ { RING_IPEIR(0), 0, 0, "IPEIR" }, \ { RING_IPEHR(0), 0, 0, "IPEHR" }, \ { RING_INSTPS(0), 0, 0, "INSTPS" }, \ @@ -80,9 +81,6 @@ { GEN8_RING_PDP_LDW(0, 3), 0, 0, "PDP3_LDW" }, \ { GEN8_RING_PDP_UDW(0, 3), 0, 0, "PDP3_UDW" } -#define COMMON_BASE_HAS_EU \ - { EIR, 0, 0, "EIR" } - #define COMMON_BASE_RENDER \ { GEN7_SC_INSTDONE, 0, 0, "GEN7_SC_INSTDONE" } @@ -105,7 +103,6 @@ static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = { /* XE_LP Render / Compute Per-Class */ static const struct __guc_mmio_reg_descr xe_lp_rc_class_regs[] = { - COMMON_BASE_HAS_EU, COMMON_BASE_RENDER, COMMON_GEN12BASE_RENDER, }; @@ -148,7 +145,6 @@ static const struct __guc_mmio_reg_descr gen8_global_regs[] = { }; static const struct __guc_mmio_reg_descr gen8_rc_class_regs[] = { - COMMON_BASE_HAS_EU, COMMON_BASE_RENDER, }; @@ -1441,7 +1437,7 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf, if (!cap || !ee->engine) return -ENODEV; - guc = &ee->engine->gt->uc.guc; + guc = gt_to_guc(ee->engine->gt); i915_error_printf(ebuf, "global --- GuC Error Capture on %s command stream:\n", ee->engine->name); @@ -1543,7 +1539,7 @@ bool intel_guc_capture_is_matching_engine(struct intel_gt *gt, if (!gt || !ce || !engine) return false; - guc = >->uc.guc; + guc = gt_to_guc(gt); if (!guc->capture) return false; @@ -1573,7 +1569,7 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt, if (!gt || !ee || !ce) return; - guc = >->uc.guc; + guc = gt_to_guc(gt); if (!guc->capture) return; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 52332bb14339..e7ccfa520df3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -26,7 +26,7 @@ static void guc_prepare_xfer(struct intel_gt *gt) GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | GUC_ENABLE_MIA_CLOCK_GATING; - if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 55)) shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES | GUC_ENABLE_MIA_CACHING; @@ -115,6 +115,7 @@ static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID: case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID: case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: *success = false; return true; } @@ -144,7 +145,7 @@ static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool * an end user should hit the timeout is in case of extreme thermal throttling. * And a system that is that hot during boot is probably dead anyway! */ -#if defined(CONFIG_DRM_I915_DEBUG_GEM) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #define GUC_LOAD_RETRY_LIMIT 20 #else #define GUC_LOAD_RETRY_LIMIT 3 @@ -241,6 +242,11 @@ static int guc_wait_ucode(struct intel_guc *guc) ret = -EPERM; break; + case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: + guc_info(guc, "invalid w/a KLV entry\n"); + ret = -EINVAL; + break; + case INTEL_GUC_LOAD_STATUS_HWCONFIG_START: guc_info(guc, "still extracting hwconfig table.\n"); ret = -ETIMEDOUT; @@ -253,13 +259,14 @@ static int guc_wait_ucode(struct intel_guc *guc) } else if (delta_ms > 200) { guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", delta_ms, status, count, ret); - guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", - intel_rps_read_actual_frequency(>->rps), before_freq, + guc_warn(guc, "excessive init time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n", + before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { - guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", - delta_ms, intel_rps_read_actual_frequency(>->rps), - before_freq, status, count, ret); + guc_dbg(guc, "init took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), status, count, ret); } return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 8ae1846431da..eded00f0c7e1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -96,14 +96,16 @@ #define GUC_WA_GAM_CREDITS BIT(10) #define GUC_WA_DUAL_QUEUE BIT(11) #define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13) -#define GUC_WA_CONTEXT_ISOLATION BIT(15) #define GUC_WA_PRE_PARSER BIT(14) +#define GUC_WA_CONTEXT_ISOLATION BIT(15) +#define GUC_WA_RCS_CCS_SWITCHOUT BIT(16) #define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17) #define GUC_WA_POLLCS BIT(18) #define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) #define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22) #define GUC_CTL_FEATURE 2 +#define GUC_CTL_ENABLE_GUC_PXP_CTL BIT(1) #define GUC_CTL_ENABLE_SLPC BIT(2) #define GUC_CTL_DISABLE_SCHEDULER BIT(14) @@ -294,7 +296,7 @@ struct guc_update_scheduling_policy_header { } __packed; /* - * Can't dynmically allocate memory for the scheduling policy KLV because + * Can't dynamically allocate memory for the scheduling policy KLV because * it will be sent from within the reset path. Need a fixed size lump on * the stack instead :(. * @@ -406,7 +408,7 @@ enum guc_capture_type { GUC_CAPTURE_LIST_TYPE_MAX, }; -/* Class indecies for capture_class and capture_instance arrays */ +/* Class indices for capture_class and capture_instance arrays */ enum { GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0, GUC_CAPTURE_LIST_CLASS_VIDEO = 1, @@ -430,7 +432,10 @@ struct guc_ads { u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX]; - u32 reserved[14]; + u32 wa_klv_addr_lo; + u32 wa_klv_addr_hi; + u32 wa_klv_size; + u32 reserved[11]; } __packed; /* Engine usage stats */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index cc9569af7f0c..868195c33f5b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -7,6 +7,7 @@ #include "gt/intel_hwconfig.h" #include "i915_drv.h" #include "i915_memcpy.h" +#include "intel_guc_print.h" /* * GuC has a blob containing hardware configuration information (HWConfig). @@ -42,6 +43,8 @@ static int __guc_action_get_hwconfig(struct intel_guc *guc, }; int ret; + guc_dbg(guc, "Querying HW config table: size = %d, offset = 0x%08X\n", + ggtt_size, ggtt_offset); ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (ret == -ENXIO) return -ENOENT; @@ -111,7 +114,7 @@ static bool has_table(struct drm_i915_private *i915) static int guc_hwconfig_init(struct intel_gt *gt) { struct intel_hwconfig *hwconfig = >->info.hwconfig; - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); int ret; if (!has_table(gt->i915)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index bf16351c9349..e8a04e476c57 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -14,11 +14,11 @@ #include "intel_guc_log.h" #include "intel_guc_print.h" -#if defined(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M #define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M -#elif defined(CONFIG_DRM_I915_DEBUG_GEM) +#elif IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 3e681ab6fbf9..d5ee6e5e1443 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -15,6 +15,34 @@ #include "gt/intel_gt_regs.h" #include "gt/intel_rps.h" +/** + * DOC: SLPC - Dynamic Frequency management + * + * Single Loop Power Control (SLPC) is a GuC algorithm that manages + * GT frequency based on busyness and how KMD initializes it. SLPC is + * almost completely in control after initialization except for a few + * scenarios mentioned below. + * + * KMD uses the concept of waitboost to ramp frequency to RP0 when there + * are pending submissions for a context. It achieves this by sending GuC a + * request to update the min frequency to RP0. Waitboost is disabled + * when the request retires. + * + * Another form of frequency control happens through per-context hints. + * A context can be marked as low latency during creation. That will ensure + * that SLPC uses an aggressive frequency ramp when that context is active. + * + * Power profiles add another level of control to these mechanisms. + * When power saving profile is chosen, SLPC will use conservative + * thresholds to ramp frequency, thus saving power. KMD will disable + * waitboosts as well, which achieves further power savings. Base profile + * is default and ensures balanced performance for any workload. + * + * Lastly, users have some level of control through sysfs, where min/max + * frequency values can be altered and the use of efficient freq + * can be toggled. + */ + static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) { return container_of(slpc, struct intel_guc, slpc); @@ -265,6 +293,8 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->num_boosts = 0; slpc->media_ratio_mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL; + slpc->power_profile = SLPC_POWER_PROFILES_BASE; + mutex_init(&slpc->lock); INIT_WORK(&slpc->boost_work, slpc_boost_work); @@ -357,21 +387,29 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); } -static void slpc_shared_data_reset(struct slpc_shared_data *data) +static void slpc_shared_data_reset(struct intel_guc_slpc *slpc) { - memset(data, 0, sizeof(struct slpc_shared_data)); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct slpc_shared_data *data = slpc->vaddr; + memset(data, 0, sizeof(struct slpc_shared_data)); data->header.size = sizeof(struct slpc_shared_data); /* Enable only GTPERF task, disable others */ slpc_mem_set_enabled(data, SLPC_PARAM_TASK_ENABLE_GTPERF, SLPC_PARAM_TASK_DISABLE_GTPERF); - slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER, - SLPC_PARAM_TASK_DISABLE_BALANCER); + /* + * Don't allow balancer related algorithms on platforms before + * Xe_LPG, where GuC started to restrict it to TDP limited scenarios. + */ + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)) { + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER, + SLPC_PARAM_TASK_DISABLE_BALANCER); - slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC, - SLPC_PARAM_TASK_DISABLE_DCC); + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC, + SLPC_PARAM_TASK_DISABLE_DCC); + } } /** @@ -537,6 +575,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + ret = slpc_set_param(slpc, + SLPC_PARAM_STRATEGIES, + val); + + return ret; +} + int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -553,6 +605,34 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val) return ret; } +int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + if (val > SLPC_POWER_PROFILES_POWER_SAVING) + return -EINVAL; + + mutex_lock(&slpc->lock); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + ret = slpc_set_param(slpc, + SLPC_PARAM_POWER_PROFILE, + val); + if (ret) + guc_err(slpc_to_guc(slpc), + "Failed to set power profile to %d: %pe\n", + val, ERR_PTR(ret)); + else + slpc->power_profile = val; + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&slpc->lock); + + return ret; +} + void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) { u32 pm_intrmsk_mbz = 0; @@ -672,7 +752,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) GEM_BUG_ON(!slpc->vma); - slpc_shared_data_reset(slpc->vaddr); + slpc_shared_data_reset(slpc); ret = slpc_reset(slpc); if (unlikely(ret < 0)) { @@ -711,6 +791,16 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Set cached media freq ratio mode */ intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); + /* Enable SLPC Optimized Strategy for compute */ + intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); + + /* Set cached value of power_profile */ + ret = intel_guc_slpc_set_power_profile(slpc, slpc->power_profile); + if (unlikely(ret)) { + guc_probe_error(guc, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); + return ret; + } + return 0; } @@ -774,6 +864,23 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p drm_printf(p, "\tSLPC state: %s\n", slpc_get_state_string(slpc)); drm_printf(p, "\tGTPERF task active: %s\n", str_yes_no(slpc_tasks->status & SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tDCC enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_DCC_TASK_ENABLED)); + drm_printf(p, "\tDCC in: %s\n", + str_yes_no(slpc_tasks->status & SLPC_IN_DCC)); + drm_printf(p, "\tBalancer enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_BALANCER_ENABLED)); + drm_printf(p, "\tIBC enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_IBC_TASK_ENABLED)); + drm_printf(p, "\tBalancer IA LMT enabled: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_BALANCER_IA_LMT_ENABLED)); + drm_printf(p, "\tBalancer IA LMT active: %s\n", + str_yes_no(slpc_tasks->status & + SLPC_BALANCER_IA_LMT_ACTIVE)); drm_printf(p, "\tMax freq: %u MHz\n", slpc_decode_max_freq(slpc)); drm_printf(p, "\tMin freq: %u MHz\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 6ac6503c39d4..fc9f761b4372 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -45,5 +45,7 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); +int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index a88651331497..83673b10ac4e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -33,6 +33,9 @@ struct intel_guc_slpc { u32 max_freq_softlimit; bool ignore_eff_freq; + /* Base or power saving */ + u32 power_profile; + /* cached media ratio mode */ u32 media_ratio_mode; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f3dcae4b9d45..127316d2c8aa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -398,7 +398,7 @@ static inline void set_context_guc_id_invalid(struct intel_context *ce) static inline struct intel_guc *ce_to_guc(struct intel_context *ce) { - return &ce->engine->gt->uc.guc; + return gt_to_guc(ce->engine->gt); } static inline struct i915_priolist *to_priolist(struct rb_node *rb) @@ -633,7 +633,7 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, atomic_inc(&guc->outstanding_submission_g2h); ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - if (ret) + if (ret && g2h_len_dw) atomic_dec(&guc->outstanding_submission_g2h); return ret; @@ -1223,7 +1223,7 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) * determine validity of these values. Instead we read the values multiple times * until they are consistent. In test runs, 3 attempts results in consistent * values. The upper bound is set to 6 attempts and may need to be tuned as per - * any new occurences. + * any new occurrences. */ static void __get_engine_usage_record(struct intel_engine_cs *engine, u32 *last_in, u32 *id, u32 *total) @@ -1243,10 +1243,25 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine, } while (++i < 6); } +static void __set_engine_usage_record(struct intel_engine_cs *engine, + u32 last_in, u32 id, u32 total) +{ + struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); + +#define record_write(map_, field_, val_) \ + iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_) + + record_write(&rec_map, last_switch_in_stamp, last_in); + record_write(&rec_map, current_context_index, id); + record_write(&rec_map, total_runtime, total); + +#undef record_write +} + static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) { struct intel_engine_guc_stats *stats = &engine->stats.guc; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u32 last_switch, ctx_id, total; lockdep_assert_held(&guc->timestamp.lock); @@ -1270,15 +1285,12 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) static u32 gpm_timestamp_shift(struct intel_gt *gt) { intel_wakeref_t wakeref; - u32 reg, shift; + u32 reg; with_intel_runtime_pm(gt->uncore->rpm, wakeref) reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); - shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; - - return 3 - shift; + return 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); } static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) @@ -1311,7 +1323,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; struct intel_gt *gt = engine->gt; - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); u64 total, gt_stamp_saved; unsigned long flags; u32 reset_count; @@ -1339,7 +1351,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) * start_gt_clk is derived from GuC state. To get a consistent * view of activity, we query the GuC state only if gt is awake. */ - wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt); + wakeref = in_reset ? NULL : intel_gt_pm_get_if_awake(gt); if (wakeref) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; @@ -1363,9 +1375,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) total += intel_gt_clock_interval_to_ns(gt, clk); } + if (total > stats->total) + stats->total = total; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); - return ns_to_ktime(total); + return ns_to_ktime(stats->total); } static void guc_enable_busyness_worker(struct intel_guc *guc) @@ -1403,14 +1418,17 @@ static void guc_cancel_busyness_worker(struct intel_guc *guc) * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through * every possible call stack is unfeasible. It would be too intrusive to many * areas that really don't care about the GuC backend. However, there is the - * 'reset_in_progress' flag available, so just use that. + * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked. + * So just use those. Note that testing both is required due to the hideously + * complex nature of the i915 driver's reset code paths. * * And note that in the case of a reset occurring during driver unload - * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set - * is fine because there is another cancel in _finish (when the reset flag is - * not). + * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the + * reset flag/mutex are set) is fine because there is another explicit cancel in + * intel_guc_submission_fini (when the reset flag/mutex are not). */ - if (guc_to_gt(guc)->uc.reset_in_progress) + if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) || + test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags)) cancel_delayed_work(&guc->timestamp.work); else cancel_delayed_work_sync(&guc->timestamp.work); @@ -1424,19 +1442,43 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) unsigned long flags; ktime_t unused; - guc_cancel_busyness_worker(guc); - spin_lock_irqsave(&guc->timestamp.lock, flags); guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { + struct intel_engine_guc_stats *stats = &engine->stats.guc; + guc_update_engine_gt_clks(engine); - engine->stats.guc.prev_total = 0; + + /* + * If resetting a running context, accumulate the active + * time as well since there will be no context switch. + */ + if (stats->running) { + u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; + + stats->total_gt_clks += clk; + } + stats->prev_total = 0; + stats->running = 0; } spin_unlock_irqrestore(&guc->timestamp.lock, flags); } +static void __update_guc_busyness_running_state(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) + engine->stats.guc.running = false; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + static void __update_guc_busyness_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1542,6 +1584,9 @@ err_trylock: static int guc_action_enable_usage_stats(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; u32 offset = intel_guc_engine_usage_offset(guc); u32 action[] = { INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, @@ -1549,6 +1594,9 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc) 0, }; + for_each_engine(engine, gt, id) + __set_engine_usage_record(engine, 0, 0xffffffff, 0); + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } @@ -1576,11 +1624,14 @@ static void guc_fini_engine_stats(struct intel_guc *guc) void intel_guc_busyness_park(struct intel_gt *gt) { - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); if (!guc_submission_initialized(guc)) return; + /* Assume no engines are running and set running state to false */ + __update_guc_busyness_running_state(guc); + /* * There is a race with suspend flow where the worker runs after suspend * and causes an unclaimed register access warning. Cancel the worker @@ -1603,7 +1654,7 @@ void intel_guc_busyness_park(struct intel_gt *gt) void intel_guc_busyness_unpark(struct intel_gt *gt) { - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); unsigned long flags; ktime_t unused; @@ -1687,6 +1738,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_lock_irq(guc_to_gt(guc)->irq_lock); spin_unlock_irq(guc_to_gt(guc)->irq_lock); + /* Flush tasklet */ + tasklet_disable(&guc->ct.receive_tasklet); + tasklet_enable(&guc->ct.receive_tasklet); + guc_flush_submissions(guc); guc_flush_destroyed_contexts(guc); flush_work(&guc->ct.requests.worker); @@ -2004,12 +2059,7 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) void intel_guc_submission_reset_finish(struct intel_guc *guc) { - /* - * Ensure the busyness worker gets cancelled even on a fatal wedge. - * Note that reset_prepare is not allowed to because it confuses lockdep. - */ - if (guc_submission_initialized(guc)) - guc_cancel_busyness_worker(guc); + int outstanding; /* Reset called during driver load or during wedge? */ if (unlikely(!guc_submission_initialized(guc) || @@ -2020,11 +2070,14 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) /* * Technically possible for either of these values to be non-zero here, - * but very unlikely + harmless. Regardless let's add a warn so we can + * but very unlikely + harmless. Regardless let's add an error so we can * see in CI if this happens frequently / a precursor to taking down the * machine. */ - GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); + outstanding = atomic_read(&guc->outstanding_submission_g2h); + if (outstanding) + guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n", + outstanding); atomic_set(&guc->outstanding_submission_g2h, 0); intel_guc_global_policies_update(guc); @@ -2136,6 +2189,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) if (!guc->submission_initialized) return; + guc_fini_engine_stats(guc); guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy_v69(guc); i915_sched_engine_put(guc->sched_engine); @@ -2194,7 +2248,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) static void guc_submit_request(struct i915_request *rq) { struct i915_sched_engine *sched_engine = rq->engine->sched_engine; - struct intel_guc *guc = &rq->engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(rq->engine->gt); unsigned long flags; /* Will be called from irq-context when using foreign fences. */ @@ -2220,11 +2274,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) order_base_2(ce->parallel.number_children + 1)); else - ret = ida_simple_get(&guc->submission_state.guc_ids, - NUMBER_MULTI_LRC_GUC_ID(guc), - guc->submission_state.num_guc_ids, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | - __GFP_NOWARN); + ret = ida_alloc_range(&guc->submission_state.guc_ids, + NUMBER_MULTI_LRC_GUC_ID(guc), + guc->submission_state.num_guc_ids - 1, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(ret < 0)) return ret; @@ -2247,8 +2300,8 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) + 1)); } else { --guc->submission_state.guc_ids_in_use; - ida_simple_remove(&guc->submission_state.guc_ids, - ce->guc_id.id); + ida_free(&guc->submission_state.guc_ids, + ce->guc_id.id); } clr_ctx_id_mapping(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); @@ -2645,6 +2698,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) +MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY) #undef MAKE_CONTEXT_POLICY_ADD @@ -2660,10 +2714,11 @@ static int __guc_context_set_context_policies(struct intel_guc *guc, static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); struct context_policy policy; u32 execution_quantum; u32 preemption_timeout; + u32 slpc_ctx_freq_req = 0; unsigned long flags; int ret; @@ -2675,11 +2730,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) execution_quantum = engine->props.timeslice_duration_ms * 1000; preemption_timeout = engine->props.preempt_timeout_ms * 1000; + if (ce->flags & BIT(CONTEXT_LOW_LATENCY)) + slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; + __guc_context_policy_start_klv(&policy, ce->guc_id.id); __guc_context_policy_add_priority(&policy, ce->guc_state.prio); __guc_context_policy_add_execution_quantum(&policy, execution_quantum); __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req); if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) __guc_context_policy_add_preempt_to_idle(&policy, 1); @@ -2736,7 +2795,7 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) static void prepare_context_registration_info_v69(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u32 ctx_id = ce->guc_id.id; struct guc_lrc_desc_v69 *desc; struct intel_context *child; @@ -2805,7 +2864,7 @@ static void prepare_context_registration_info_v70(struct intel_context *ce, struct guc_ctxt_registration_info *info) { struct intel_engine_cs *engine = ce->engine; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u32 ctx_id = ce->guc_id.id; GEM_BUG_ON(!engine->mask); @@ -2842,9 +2901,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce, ce->parallel.guc.wqi_tail = 0; ce->parallel.guc.wqi_head = 0; - wq_desc_offset = i915_ggtt_offset(ce->state) + + wq_desc_offset = (u64)i915_ggtt_offset(ce->state) + __get_parent_scratch_offset(ce); - wq_base_offset = i915_ggtt_offset(ce->state) + + wq_base_offset = (u64)i915_ggtt_offset(ce->state) + __get_wq_offset(ce); info->wq_desc_lo = lower_32_bits(wq_desc_offset); info->wq_desc_hi = upper_32_bits(wq_desc_offset); @@ -2868,7 +2927,7 @@ static int try_context_registration(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); intel_wakeref_t wakeref; u32 ctx_id = ce->guc_id.id; bool context_registered; @@ -2949,7 +3008,7 @@ static int __guc_context_pin(struct intel_context *ce, /* * GuC context gets pinned in guc_request_alloc. See that function for - * explaination of why. + * explanation of why. */ return lrc_pin(ce, engine, vaddr); @@ -3384,18 +3443,29 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) * GuC is active, lets destroy this context, but at this point we can still be racing * with suspend, so we undo everything if the H2G fails in deregister_context so * that GuC reset will find this context during clean up. + * + * There is a race condition where the reset code could have altered + * this context's state and done a wakeref put before we try to + * deregister it here. So check if the context is still set to be + * destroyed before undoing earlier changes, to avoid two wakeref puts + * on the same context. */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { - spin_lock(&ce->guc_state.lock); - set_context_registered(ce); - clr_context_destroyed(ce); - spin_unlock(&ce->guc_state.lock); + bool pending_destroyed; + spin_lock_irqsave(&ce->guc_state.lock, flags); + pending_destroyed = context_destroyed(ce); + if (pending_destroyed) { + set_context_registered(ce); + clr_context_destroyed(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. */ - intel_wakeref_put_async(>->wakeref); + if (pending_destroyed) + intel_wakeref_put_async(>->wakeref); } return ret; @@ -4267,20 +4337,25 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); /* Short circuit function */ - if (prio < I915_PRIORITY_NORMAL || - rq->guc_prio == GUC_PRIO_FINI || - (rq->guc_prio != GUC_PRIO_INIT && - !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) + if (prio < I915_PRIORITY_NORMAL) return; spin_lock(&ce->guc_state.lock); - if (rq->guc_prio != GUC_PRIO_FINI) { - if (rq->guc_prio != GUC_PRIO_INIT) - sub_context_inflight_prio(ce, rq->guc_prio); - rq->guc_prio = new_guc_prio; - add_context_inflight_prio(ce, rq->guc_prio); - update_context_prio(ce); - } + + if (rq->guc_prio == GUC_PRIO_FINI) + goto exit; + + if (!new_guc_prio_higher(rq->guc_prio, new_guc_prio)) + goto exit; + + if (rq->guc_prio != GUC_PRIO_INIT) + sub_context_inflight_prio(ce, rq->guc_prio); + + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + +exit: spin_unlock(&ce->guc_state.lock); } @@ -4496,7 +4571,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) if (engine->class == COMPUTE_CLASS) if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(engine->i915)) - engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT; + + /* Wa_16019325821 */ + /* Wa_14019159160 */ + if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) && + IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) + engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT; /* * TODO: GuC supports timeslicing and semaphores as well, but they're @@ -4507,7 +4588,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) */ engine->emit_bb_start = gen8_emit_bb_start; - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) engine->emit_bb_start = xehp_emit_bb_start; } @@ -4549,7 +4630,7 @@ static void guc_sched_engine_destroy(struct kref *kref) int intel_guc_submission_setup(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); /* * The setup relies on several assumptions (e.g. irqs always enabled) @@ -5308,7 +5389,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, void intel_guc_find_hung_context(struct intel_engine_cs *engine) { - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); struct intel_context *ce; struct i915_request *rq; unsigned long index; @@ -5370,7 +5451,7 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine, struct i915_request *hung_rq, struct drm_printer *m) { - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); struct intel_context *ce; unsigned long index; unsigned long flags; @@ -5462,12 +5543,20 @@ static inline void guc_log_context(struct drm_printer *p, { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); + if (intel_context_pin_if_active(ce)) { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + intel_context_unpin(ce); + } else { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n", + ce->ring->head); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n", + ce->ring->tail); + } drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", @@ -5822,7 +5911,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, if (!ve) return ERR_PTR(-ENOMEM); - guc = &siblings[0]->gt->uc.guc; + guc = gt_to_guc(siblings[0]->gt); ve->base.i915 = siblings[0]->i915; ve->base.gt = siblings[0]->gt; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 0945b177d5f9..456d3372eef8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -231,8 +231,8 @@ static void delayed_huc_load_init(struct intel_huc *huc) sw_fence_dummy_notify); i915_sw_fence_commit(&huc->delayed_load.fence); - hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - huc->delayed_load.timer.function = huc_delayed_load_timer_callback; + hrtimer_setup(&huc->delayed_load.timer, huc_delayed_load_timer_callback, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); } static void delayed_huc_load_fini(struct intel_huc *huc) @@ -317,6 +317,11 @@ void intel_huc_init_early(struct intel_huc *huc) } } +void intel_huc_fini_late(struct intel_huc *huc) +{ + delayed_huc_load_fini(huc); +} + #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") static int check_huc_loading_mode(struct intel_huc *huc) { @@ -385,7 +390,7 @@ int intel_huc_init(struct intel_huc *huc) if (HAS_ENGINE(gt, GSC0)) { struct i915_vma *vma; - vma = intel_guc_allocate_vma(>->uc.guc, PXP43_HUC_AUTH_INOUT_SIZE * 2); + vma = intel_guc_allocate_vma(gt_to_guc(gt), PXP43_HUC_AUTH_INOUT_SIZE * 2); if (IS_ERR(vma)) { err = PTR_ERR(vma); huc_info(huc, "Failed to allocate heci pkt\n"); @@ -414,12 +419,6 @@ out: void intel_huc_fini(struct intel_huc *huc) { - /* - * the fence is initialized in init_early, so we need to clean it up - * even if HuC loading is off. - */ - delayed_huc_load_fini(huc); - if (huc->heci_pkt) i915_vma_unpin_and_release(&huc->heci_pkt, 0); @@ -427,19 +426,6 @@ void intel_huc_fini(struct intel_huc *huc) intel_uc_fw_fini(&huc->fw); } -void intel_huc_suspend(struct intel_huc *huc) -{ - if (!intel_uc_fw_is_loadable(&huc->fw)) - return; - - /* - * in the unlikely case that we're suspending before the GSC has - * completed its loading sequence, just stop waiting. We'll restart - * on resume. - */ - delayed_huc_load_complete(huc); -} - static const char *auth_mode_string(struct intel_huc *huc, enum intel_huc_authentication_type type) { @@ -455,7 +441,7 @@ static const char *auth_mode_string(struct intel_huc *huc, * an end user should hit the timeout is in case of extreme thermal throttling. * And a system that is that hot during boot is probably dead anyway! */ -#if defined(CONFIG_DRM_I915_DEBUG_GEM) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #define HUC_LOAD_RETRY_LIMIT 20 #else #define HUC_LOAD_RETRY_LIMIT 3 @@ -502,13 +488,15 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc, if (delta_ms > 50) { huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", delta_ms, huc->status[type].reg.reg, count, ret); - huc_warn(huc, "excessive auth time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", - intel_rps_read_actual_frequency(>->rps), before_freq, + huc_warn(huc, "excessive auth time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n", + before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { - huc_dbg(huc, "auth took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", - delta_ms, intel_rps_read_actual_frequency(>->rps), - before_freq, huc->status[type].reg.reg, count, ret); + huc_dbg(huc, "auth took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), + huc->status[type].reg.reg, count, ret); } /* mark the load process as complete even if the wait failed */ @@ -540,7 +528,7 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc, int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type) { struct intel_gt *gt = huc_to_gt(huc); - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); int ret; if (!intel_uc_fw_is_loaded(&huc->fw)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index ba5cb08e9e7b..921ad4b1687f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -55,9 +55,9 @@ struct intel_huc { int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); +void intel_huc_fini_late(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); -void intel_huc_suspend(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type); int intel_huc_wait_for_auth_complete(struct intel_huc *huc, enum intel_huc_authentication_type type); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 6dfe5d9456c6..4a3493e8d433 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -50,10 +50,6 @@ static void uc_expand_default_options(struct intel_uc *uc) /* Default: enable HuC authentication and GuC submission */ i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION; - - /* XEHPSDV and PVC do not use HuC */ - if (IS_XEHPSDV(i915) || IS_PONTEVECCHIO(i915)) - i915->params.enable_guc &= ~ENABLE_GUC_LOAD_HUC; } /* Reset GuC providing us with fresh state for both GuC and HuC. @@ -103,7 +99,7 @@ static void __confirm_options(struct intel_uc *uc) } if (!intel_uc_supports_guc(uc)) - gt_info(gt, "Incompatible option enable_guc=%d - %s\n", + gt_info(gt, "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "GuC is not supported!"); if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION && @@ -140,6 +136,7 @@ void intel_uc_init_late(struct intel_uc *uc) void intel_uc_driver_late_release(struct intel_uc *uc) { + intel_huc_fini_late(&uc->huc); } /** @@ -516,7 +513,7 @@ static int __uc_init_hw(struct intel_uc *uc) ERR_PTR(ret), attempts); } - /* Did we succeded or run out of retries? */ + /* Did we succeed or run out of retries? */ if (ret) goto err_log_capture; @@ -637,6 +634,10 @@ void intel_uc_reset_finish(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; + /* + * NB: The wedge code path results in prepare -> prepare -> finish -> finish. + * So this function is sometimes called with the in-progress flag not set. + */ uc->reset_in_progress = false; /* Firmware expected to be running when this function is called */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 756093eaf2ad..ec33ad942115 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -698,12 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt, const struct firmware *fw, struct intel_uc_fw *uc_fw) { + int ret; + switch (uc_fw->type) { case INTEL_UC_FW_TYPE_HUC: - intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; case INTEL_UC_FW_TYPE_GSC: - intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; default: MISSING_CASE(uc_fw->type); @@ -807,7 +813,7 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware ** static int check_mtl_huc_guc_compatibility(struct intel_gt *gt, struct intel_uc_fw_file *huc_selected) { - struct intel_uc_fw_file *guc_selected = >->uc.guc.fw.file_selected; + struct intel_uc_fw_file *guc_selected = >_to_guc(gt)->fw.file_selected; struct intel_uc_fw_ver *huc_ver = &huc_selected->ver; struct intel_uc_fw_ver *guc_ver = &guc_selected->ver; bool new_huc, new_guc; @@ -1209,7 +1215,7 @@ static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw) * since its GGTT offset will be GuC accessible. */ GEM_BUG_ON(uc_fw->rsa_size > PAGE_SIZE); - vma = intel_guc_allocate_vma(>->uc.guc, PAGE_SIZE); + vma = intel_guc_allocate_vma(gt_to_guc(gt), PAGE_SIZE); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 9a431726c8d5..ac7b3aad2222 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -258,6 +258,11 @@ static inline bool intel_uc_fw_is_running(struct intel_uc_fw *uc_fw) return __intel_uc_fw_status(uc_fw) == INTEL_UC_FIRMWARE_RUNNING; } +static inline bool intel_uc_fw_is_in_error(struct intel_uc_fw *uc_fw) +{ + return intel_uc_fw_status_to_error(__intel_uc_fw_status(uc_fw)) != 0; +} + static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw) { return uc_fw->user_overridden; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index c900aac85adb..68feb55654f7 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -144,7 +144,7 @@ err: static int intel_guc_steal_guc_ids(void *arg) { struct intel_gt *gt = arg; - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); int ret, sv, context_index = 0; intel_wakeref_t wakeref; struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c index 26fdc392fce6..83801c992488 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c @@ -64,7 +64,7 @@ static int intel_hang_guc(void *arg) old_beat = engine->props.heartbeat_interval_ms; ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL); if (ret) { - gt_err(gt, "Failed to boost heatbeat interval: %pe\n", ERR_PTR(ret)); + gt_err(gt, "Failed to boost heartbeat interval: %pe\n", ERR_PTR(ret)); goto err; } |