summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.c23
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.c3
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c31
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c43
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c77
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c30
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c43
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c30
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c52
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h73
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c45
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c66
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c54
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c79
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c74
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c301
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c3
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c120
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c21
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c1
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_slpc.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c4
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c3
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c49
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c124
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c12
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c20
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c17
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c305
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c81
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c2
87 files changed, 1247 insertions, 1060 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
index 8fe0499308ff..4904d0f4162c 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
@@ -169,7 +169,7 @@ static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
return cs;
}
-u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
return __gen2_emit_breadcrumb(rq, cs, 16, 8);
}
@@ -248,7 +248,7 @@ int i830_emit_bb_start(struct i915_request *rq,
return 0;
}
-int gen3_emit_bb_start(struct i915_request *rq,
+int gen2_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags)
{
@@ -292,29 +292,12 @@ int gen4_emit_bb_start(struct i915_request *rq,
void gen2_irq_enable(struct intel_engine_cs *engine)
{
- struct drm_i915_private *i915 = engine->i915;
-
- i915->irq_mask &= ~engine->irq_enable_mask;
- intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
- ENGINE_POSTING_READ16(engine, RING_IMR);
-}
-
-void gen2_irq_disable(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
-
- i915->irq_mask |= engine->irq_enable_mask;
- intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
-}
-
-void gen3_irq_enable(struct intel_engine_cs *engine)
-{
engine->i915->irq_mask &= ~engine->irq_enable_mask;
intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
}
-void gen3_irq_disable(struct intel_engine_cs *engine)
+void gen2_irq_disable(struct intel_engine_cs *engine)
{
engine->i915->irq_mask |= engine->irq_enable_mask;
intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
index a5cd64a65c9e..7b37560fc356 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
@@ -15,13 +15,13 @@ int gen2_emit_flush(struct i915_request *rq, u32 mode);
int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode);
int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode);
-u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs);
+u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs);
u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs);
int i830_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags);
-int gen3_emit_bb_start(struct i915_request *rq,
+int gen2_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags);
int gen4_emit_bb_start(struct i915_request *rq,
@@ -30,8 +30,6 @@ int gen4_emit_bb_start(struct i915_request *rq,
void gen2_irq_enable(struct intel_engine_cs *engine);
void gen2_irq_disable(struct intel_engine_cs *engine);
-void gen3_irq_enable(struct intel_engine_cs *engine);
-void gen3_irq_disable(struct intel_engine_cs *engine);
void gen5_irq_enable(struct intel_engine_cs *engine);
void gen5_irq_disable(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
index d38b914d1206..6e89112f68ae 100644
--- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -399,7 +399,8 @@ static void emit_batch(struct i915_vma * const vma,
batch_add(&cmds, MI_LOAD_REGISTER_IMM(2));
batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7));
batch_add(&cmds, 0xffff0000 |
- ((IS_IVB_GT1(i915) || IS_VALLEYVIEW(i915)) ?
+ (((IS_IVYBRIDGE(i915) && INTEL_INFO(i915)->gt == 1) ||
+ IS_VALLEYVIEW(i915)) ?
HIZ_RAW_STALL_OPT_DISABLE :
0));
batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1));
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 86a04afff64b..e9f65f27b53f 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -189,9 +189,6 @@ static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
{
i915_reg_t reg = gen12_get_aux_inv_reg(engine);
- if (IS_PONTEVECCHIO(engine->i915))
- return false;
-
/*
* So far platforms supported by i915 having flat ccs do not require
* AUX invalidation. Check also whether the engine requires it.
@@ -226,7 +223,7 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
static int mtl_dummy_pipe_control(struct i915_request *rq)
{
/* Wa_14016712196 */
- if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) ||
+ if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
IS_DG2(rq->i915)) {
u32 *cs;
@@ -743,21 +740,25 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
}
/* Wa_14014475959:dg2 */
-#define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540
-static u32 ccs_semaphore_offset(struct i915_request *rq)
+/* Wa_16019325821 */
+/* Wa_14019159160 */
+#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540
+static u32 hold_switchout_semaphore_offset(struct i915_request *rq)
{
return i915_ggtt_offset(rq->context->state) +
- (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
+ (LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET;
}
/* Wa_14014475959:dg2 */
-static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
+/* Wa_16019325821 */
+/* Wa_14019159160 */
+static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs)
{
int i;
*cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
MI_ATOMIC_MOVE;
- *cs++ = ccs_semaphore_offset(rq);
+ *cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
*cs++ = 1;
@@ -773,7 +774,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
- *cs++ = ccs_semaphore_offset(rq);
+ *cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
return cs;
@@ -790,8 +791,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
cs = gen12_emit_preempt_busywait(rq, cs);
/* Wa_14014475959:dg2 */
- if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
- cs = ccs_emit_wa_busywait(rq, cs);
+ /* Wa_16019325821 */
+ /* Wa_14019159160 */
+ if (intel_engine_uses_wa_hold_switchout(rq->engine))
+ cs = hold_switchout_emit_wa_busywait(rq, cs);
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
@@ -822,12 +825,12 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
flags |= PIPE_CONTROL_FLUSH_L3;
/* Wa_14016712196 */
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
/* dummy PIPE_CONTROL + depth flush */
cs = gen12_emit_pipe_control(cs, 0,
PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
- if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+ if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
/* Wa_1409600907 */
flags |= PIPE_CONTROL_DEPTH_STALL;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index fa46d2308b0e..398d60a66410 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -500,11 +500,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
}
static void
-xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
- struct i915_vma_resource *vma_res,
- struct sgt_dma *iter,
- unsigned int pat_index,
- u32 flags)
+xehp_ppgtt_insert_huge(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ struct sgt_dma *iter,
+ unsigned int pat_index,
+ u32 flags)
{
const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
unsigned int rem = sg_dma_len(iter->sg);
@@ -741,8 +741,8 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
struct sgt_dma iter = sgt_dma(vma_res);
if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
- if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50))
- xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
+ if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 55))
+ xehp_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
else
gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
} else {
@@ -781,11 +781,11 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
}
-static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
- dma_addr_t addr,
- u64 offset,
- unsigned int pat_index,
- u32 flags)
+static void xehp_ppgtt_insert_entry_lm(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ unsigned int pat_index,
+ u32 flags)
{
u64 idx = offset >> GEN8_PTE_SHIFT;
struct i915_page_directory * const pdp =
@@ -810,15 +810,15 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags);
}
-static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
- dma_addr_t addr,
- u64 offset,
- unsigned int pat_index,
- u32 flags)
+static void xehp_ppgtt_insert_entry(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ unsigned int pat_index,
+ u32 flags)
{
if (flags & PTE_LM)
- return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
- pat_index, flags);
+ return xehp_ppgtt_insert_entry_lm(vm, addr, offset,
+ pat_index, flags);
return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags);
}
@@ -961,6 +961,9 @@ static int gen8_init_rsvd(struct i915_address_space *vm)
struct i915_vma *vma;
int ret;
+ if (!intel_gt_needs_wa_16018031267(vm->gt))
+ return 0;
+
/* The memory will be used only by GPU. */
obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
I915_BO_ALLOC_VOLATILE |
@@ -1042,7 +1045,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
ppgtt->vm.insert_entries = gen8_ppgtt_insert;
if (HAS_64K_PAGES(gt->i915))
- ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
+ ppgtt->vm.insert_page = xehp_ppgtt_insert_entry;
else
ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index d650beb8ed22..cc866773ba6f 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -70,7 +70,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
if (!--b->irq_enabled)
b->irq_disable(b);
- WRITE_ONCE(b->irq_armed, 0);
+ WRITE_ONCE(b->irq_armed, NULL);
intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
}
@@ -263,8 +263,13 @@ static void signal_irq_work(struct irq_work *work)
i915_request_put(rq);
}
+ /* Lazy irq enabling after HW submission */
if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
intel_breadcrumbs_arm_irq(b);
+
+ /* And confirm that we still want irqs enabled before we yield */
+ if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active))
+ intel_breadcrumbs_disarm_irq(b);
}
struct intel_breadcrumbs *
@@ -315,13 +320,7 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
return;
/* Kick the work once more to drain the signalers, and disarm the irq */
- irq_work_sync(&b->irq_work);
- while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
- local_irq_disable();
- signal_irq_work(&b->irq_work);
- local_irq_enable();
- cond_resched();
- }
+ irq_work_queue(&b->irq_work);
}
void intel_breadcrumbs_free(struct kref *kref)
@@ -404,7 +403,7 @@ static void insert_breadcrumb(struct i915_request *rq)
* the request as it may have completed and raised the interrupt as
* we were attaching it into the lists.
*/
- if (!b->irq_armed || __i915_request_is_complete(rq))
+ if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq))
irq_work_queue(&b->irq_work);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index a2f1245741bb..b1b8695ba7c9 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -27,6 +27,8 @@ static void rcu_context_free(struct rcu_head *rcu)
struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
trace_intel_context_free(ce);
+ if (intel_context_has_own_state(ce))
+ fput(ce->default_state);
kmem_cache_free(slab_ce, ce);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 25564c01507e..9f523999acd1 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -375,6 +375,28 @@ intel_context_clear_nopreempt(struct intel_context *ce)
clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
}
+#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)
+static inline bool intel_context_has_own_state(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_OWN_STATE, &ce->flags);
+}
+
+static inline bool intel_context_set_own_state(struct intel_context *ce)
+{
+ return test_and_set_bit(CONTEXT_OWN_STATE, &ce->flags);
+}
+#else
+static inline bool intel_context_has_own_state(const struct intel_context *ce)
+{
+ return false;
+}
+
+static inline bool intel_context_set_own_state(struct intel_context *ce)
+{
+ return true;
+}
+#endif
+
u64 intel_context_get_total_runtime_ns(struct intel_context *ce);
u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 7eccbd70d89f..98c7f6052069 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -99,6 +99,8 @@ struct intel_context {
struct i915_address_space *vm;
struct i915_gem_context __rcu *gem_context;
+ struct file *default_state;
+
/*
* @signal_lock protects the list of requests that need signaling,
* @signals. While there are any requests that need signaling,
@@ -130,6 +132,8 @@ struct intel_context {
#define CONTEXT_PERMA_PIN 11
#define CONTEXT_IS_PARKING 12
#define CONTEXT_EXITING 13
+#define CONTEXT_LOW_LATENCY 14
+#define CONTEXT_OWN_STATE 15
struct {
u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 40269e4c1e31..325da0414d94 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -126,9 +126,6 @@ execlists_active(const struct intel_engine_execlists *execlists)
return active;
}
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
-
static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 40687806d22a..4d30a86016f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -497,9 +497,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
- if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
- __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
- engine->class == RENDER_CLASS)
+ if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) &&
+ __ffs(CCS_MASK(engine->gt) | RCS_MASK(engine->gt)) == engine->instance)
engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
/* features common between engines sharing EUs */
@@ -589,7 +588,7 @@ u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
* NB: The GuC API only supports 32bit values. However, the limit is further
* reduced due to internal calculations which would otherwise overflow.
*/
- if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+ if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
@@ -610,7 +609,7 @@ u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
* NB: The GuC API only supports 32bit values. However, the limit is further
* reduced due to internal calculations which would otherwise overflow.
*/
- if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+ if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
@@ -679,7 +678,7 @@ void intel_engines_release(struct intel_gt *gt)
*/
GEM_BUG_ON(intel_gt_pm_is_awake(gt));
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- __intel_gt_reset(gt, ALL_ENGINES);
+ intel_gt_reset_all_engines(gt);
/* Decouple the backend; but keep the layout for late GPU resets */
for_each_engine(engine, gt, id) {
@@ -694,6 +693,8 @@ void intel_engines_release(struct intel_gt *gt)
memset(&engine->reset, 0, sizeof(engine->reset));
}
+
+ llist_del_all(&gt->i915->uabi_engines_llist);
}
void intel_engine_free_request_pool(struct intel_engine_cs *engine)
@@ -765,14 +766,14 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt)
* and bits have disable semantices.
*/
media_fuse = intel_uncore_read(gt->uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
- if (MEDIA_VER_FULL(i915) < IP_VER(12, 50))
+ if (MEDIA_VER_FULL(i915) < IP_VER(12, 55))
media_fuse = ~media_fuse;
vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
GEN11_GT_VEBOX_DISABLE_SHIFT;
- if (MEDIA_VER_FULL(i915) >= IP_VER(12, 50)) {
+ if (MEDIA_VER_FULL(i915) >= IP_VER(12, 55)) {
fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
} else {
@@ -839,38 +840,6 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
}
}
-static void engine_mask_apply_copy_fuses(struct intel_gt *gt)
-{
- struct drm_i915_private *i915 = gt->i915;
- struct intel_gt_info *info = &gt->info;
- unsigned long meml3_mask;
- unsigned long quad;
-
- if (!(GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60) &&
- GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)))
- return;
-
- meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3);
- meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask);
-
- /*
- * Link Copy engines may be fused off according to meml3_mask. Each
- * bit is a quad that houses 2 Link Copy and two Sub Copy engines.
- */
- for_each_clear_bit(quad, &meml3_mask, GEN12_MAX_MSLICES) {
- unsigned int instance = quad * 2 + 1;
- intel_engine_mask_t mask = GENMASK(_BCS(instance + 1),
- _BCS(instance));
-
- if (mask & info->engine_mask) {
- gt_dbg(gt, "bcs%u fused off\n", instance);
- gt_dbg(gt, "bcs%u fused off\n", instance + 1);
-
- info->engine_mask &= ~mask;
- }
- }
-}
-
/*
* Determine which engines are fused off in our particular hardware.
* Note that we have a catch-22 situation where we need to be able to access
@@ -889,7 +858,6 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
engine_mask_apply_media_fuses(gt);
engine_mask_apply_compute_fuses(gt);
- engine_mask_apply_copy_fuses(gt);
/*
* The only use of the GSC CS is to load and communicate with the GSC
@@ -908,6 +876,29 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
info->engine_mask &= ~BIT(GSC0);
}
+ /*
+ * Do not create the command streamer for CCS slices beyond the first.
+ * All the workload submitted to the first engine will be shared among
+ * all the slices.
+ *
+ * Once the user will be allowed to customize the CCS mode, then this
+ * check needs to be removed.
+ */
+ if (IS_DG2(gt->i915)) {
+ u8 first_ccs = __ffs(CCS_MASK(gt));
+
+ /*
+ * Store the number of active cslices before
+ * changing the CCS engine configuration
+ */
+ gt->ccs.cslices = CCS_MASK(gt);
+
+ /* Mask off all the CCS engine */
+ info->engine_mask &= ~GENMASK(CCS3, CCS0);
+ /* Put back in the first CCS engine */
+ info->engine_mask |= BIT(_CCS(first_ccs));
+ }
+
return info->engine_mask;
}
@@ -1190,9 +1181,9 @@ static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
num = ARRAY_SIZE(xelpmp_regs);
}
} else {
- if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
+ if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 74) ||
+ GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) ||
- GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
regs = xehp_regs;
num = ARRAY_SIZE(xehp_regs);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 1a8e2b7db013..8d4bb95f8424 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -96,7 +96,8 @@ static void heartbeat_commit(struct i915_request *rq,
static void show_heartbeat(const struct i915_request *rq,
struct intel_engine_cs *engine)
{
- struct drm_printer p = drm_debug_printer("heartbeat");
+ struct drm_printer p =
+ drm_dbg_printer(&engine->i915->drm, DRM_UT_DRIVER, "heartbeat");
if (!rq) {
intel_engine_dump(engine, &p,
@@ -290,6 +291,9 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine)
heartbeat_commit(rq, &attr);
GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
+ /* Ensure the forced pulse gets a full period to execute */
+ next_heartbeat(engine);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 96bdb93a948d..fb7bff27b45a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -279,9 +279,6 @@ static int __engine_park(struct intel_wakeref *wf)
intel_engine_park_heartbeat(engine);
intel_breadcrumbs_park(engine->breadcrumbs);
- /* Must be reset upon idling, or we may miss the busy wakeup. */
- GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN);
-
if (engine->park)
engine->park(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index a8eac59e3779..1c4784cb296c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -15,6 +15,7 @@
#define HEAD_WRAP_COUNT 0xFFE00000
#define HEAD_WRAP_ONE 0x00200000
#define HEAD_ADDR 0x001FFFFC
+#define HEAD_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */
#define RING_START(base) _MMIO((base) + 0x38)
#define RING_CTL(base) _MMIO((base) + 0x3c)
#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */
@@ -26,7 +27,6 @@
#define RING_VALID_MASK 0x00000001
#define RING_VALID 0x00000001
#define RING_INVALID 0x00000000
-#define RING_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */
#define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */
#define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */
#define RING_SYNC_0(base) _MMIO((base) + 0x40)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 960e6be2042f..fe1f85e5dda3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -343,6 +343,11 @@ struct intel_engine_guc_stats {
* @start_gt_clk: GT clock time of last idle to active transition.
*/
u64 start_gt_clk;
+
+ /**
+ * @total: The last value of total returned
+ */
+ u64 total;
};
union intel_engine_tlb_inv_reg {
@@ -586,7 +591,7 @@ struct intel_engine_cs {
#define I915_ENGINE_HAS_RCS_REG_STATE BIT(9)
#define I915_ENGINE_HAS_EU_PRIORITY BIT(10)
#define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
-#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
+#define I915_ENGINE_USES_WA_HOLD_SWITCHOUT BIT(12)
unsigned int flags;
/*
@@ -696,10 +701,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
}
/* Wa_14014475959:dg2 */
+/* Wa_16019325821 */
+/* Wa_14019159160 */
static inline bool
-intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine)
+intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine)
{
- return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+ return engine->flags & I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
}
#endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 42aade0faf2d..4a80ffa1b962 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -405,15 +405,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
return active;
}
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
-{
- struct intel_engine_cs *engine =
- container_of(execlists, typeof(*engine), execlists);
-
- return __unwind_incomplete_requests(engine);
-}
-
static void
execlists_context_status_change(struct i915_request *rq, unsigned long status)
{
@@ -493,7 +484,7 @@ __execlists_schedule_in(struct i915_request *rq)
/* Use a fixed tag for OA and friends */
GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
ce->lrc.ccid = ce->tag;
- } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
+ } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) {
/* We don't need a strict matching tag, just different values */
unsigned int tag = ffs(READ_ONCE(engine->context_tag));
@@ -613,7 +604,7 @@ static void __execlists_schedule_out(struct i915_request * const rq,
intel_engine_add_retire(engine, ce->timeline);
ccid = ce->lrc.ccid;
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) {
ccid >>= XEHP_SW_CTX_ID_SHIFT - 32;
ccid &= XEHP_MAX_CONTEXT_HW_ID;
} else {
@@ -1907,7 +1898,7 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
head, upper_32_bits(csb), lower_32_bits(csb));
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
promote = xehp_csb_parse(csb);
else if (GRAPHICS_VER(engine->i915) >= 12)
promote = gen12_csb_parse(csb);
@@ -2898,7 +2889,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine)
drm_err(&engine->i915->drm,
"engine '%s' resumed still in error: %08x\n",
engine->name, status);
- __intel_gt_reset(engine->gt, engine->mask);
+ intel_gt_reset_engine(engine);
}
/*
@@ -3272,6 +3263,9 @@ static void execlists_park(struct intel_engine_cs *engine)
{
cancel_timer(&engine->execlists.timer);
cancel_timer(&engine->execlists.preempt);
+
+ /* Reset upon idling, or we may delay the busy wakeup. */
+ WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN);
}
static void add_to_engine(struct i915_request *rq)
@@ -3312,11 +3306,7 @@ static void remove_from_engine(struct i915_request *rq)
static bool can_preempt(struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER(engine->i915) > 8)
- return true;
-
- /* GPGPU on bdw requires extra w/a; not implemented */
- return engine->class != RENDER_CLASS;
+ return GRAPHICS_VER(engine->i915) > 8;
}
static void kick_execlists(const struct i915_request *rq, int prio)
@@ -3479,7 +3469,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
}
}
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) {
if (intel_engine_has_preemption(engine))
engine->emit_bb_start = xehp_emit_bb_start;
else
@@ -3582,7 +3572,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
if (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) {
+ GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 55)) {
execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 21a7e3191c18..f6c59f20832f 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -9,10 +9,9 @@
#include <linux/stop_machine.h>
#include <drm/drm_managed.h>
-#include <drm/i915_drm.h>
-#include <drm/intel-gtt.h>
+#include <drm/intel/i915_drm.h>
+#include <drm/intel/intel-gtt.h>
-#include "display/intel_display.h"
#include "gem/i915_gem_lmem.h"
#include "intel_context.h"
@@ -24,6 +23,7 @@
#include "intel_ring.h"
#include "i915_drv.h"
#include "i915_pci.h"
+#include "i915_reg.h"
#include "i915_request.h"
#include "i915_scatterlist.h"
#include "i915_utils.h"
@@ -107,11 +107,12 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
/**
* i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
* @vm: The VM to suspend the mappings for
+ * @evict_all: Evict all VMAs
*
* Suspend the memory mappings for all objects mapped to HW via the GGTT or a
* DPT page table.
*/
-void i915_ggtt_suspend_vm(struct i915_address_space *vm)
+void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all)
{
struct i915_vma *vma, *vn;
int save_skip_rewrite;
@@ -157,7 +158,7 @@ retry:
goto retry;
}
- if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
+ if (evict_all || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
i915_vma_wait_for_bind(vma);
__i915_vma_evict(vma, false);
@@ -172,13 +173,15 @@ retry:
vm->skip_pte_rewrite = save_skip_rewrite;
mutex_unlock(&vm->mutex);
+
+ drm_WARN_ON(&vm->i915->drm, evict_all && !list_empty(&vm->bound_list));
}
void i915_ggtt_suspend(struct i915_ggtt *ggtt)
{
struct intel_gt *gt;
- i915_ggtt_suspend_vm(&ggtt->vm);
+ i915_ggtt_suspend_vm(&ggtt->vm, false);
ggtt->invalidate(ggtt);
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
@@ -230,11 +233,8 @@ static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
intel_wakeref_t wakeref;
- with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
- struct intel_guc *guc = &gt->uc.guc;
-
- intel_guc_invalidate_tlb_guc(guc);
- }
+ with_intel_runtime_pm_if_active(uncore->rpm, wakeref)
+ intel_guc_invalidate_tlb_guc(gt_to_guc(gt));
}
static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
@@ -245,7 +245,7 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
gen8_ggtt_invalidate(ggtt);
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
- if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc))
+ if (intel_guc_tlb_invalidation_is_available(gt_to_guc(gt)))
guc_ggtt_ct_invalidate(gt);
else if (GRAPHICS_VER(i915) >= 12)
intel_uncore_write_fw(gt->uncore,
@@ -1152,13 +1152,20 @@ static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
{
struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
phys_addr_t phys_addr;
u32 pte_flags;
int ret;
GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
- phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
+
+ if (i915_direct_stolen_access(i915)) {
+ drm_dbg(&i915->drm, "Using direct GSM access\n");
+ phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK;
+ } else {
+ phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
+ }
if (needs_wc_ggtt_mapping(i915))
ggtt->gsm = ioremap_wc(phys_addr, size);
@@ -1541,6 +1548,7 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915)
/**
* i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM
* @vm: The VM to restore the mappings for
+ * @all_evicted: Were all VMAs expected to be evicted on suspend?
*
* Restore the memory mappings for all objects mapped to HW via the GGTT or a
* DPT page table.
@@ -1548,13 +1556,18 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915)
* Returns %true if restoring the mapping for any object that was in a write
* domain before suspend.
*/
-bool i915_ggtt_resume_vm(struct i915_address_space *vm)
+bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted)
{
struct i915_vma *vma;
bool write_domain_objs = false;
drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
+ if (all_evicted) {
+ drm_WARN_ON(&vm->i915->drm, !list_empty(&vm->bound_list));
+ return false;
+ }
+
/* First fill our portion of the GTT with scratch pages */
vm->clear_range(vm, 0, vm->total);
@@ -1594,7 +1607,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
intel_gt_check_and_clear_faults(gt);
- flush = i915_ggtt_resume_vm(&ggtt->vm);
+ flush = i915_ggtt_resume_vm(&ggtt->vm, false);
if (drm_mm_node_allocated(&ggtt->error_capture))
ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index 40371b8a9bbb..0ffba50981e3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -298,6 +298,7 @@ void i915_vma_revoke_fence(struct i915_vma *vma)
return;
GEM_BUG_ON(fence->vma != vma);
+ i915_active_wait(&fence->active);
GEM_BUG_ON(!i915_active_is_idle(&fence->active));
GEM_BUG_ON(atomic_read(&fence->pin_count));
@@ -417,7 +418,6 @@ out_unpin:
* For an untiled surface, this removes any existing fence.
*
* Returns:
- *
* 0 on success, negative error code on failure.
*/
int i915_vma_pin_fence(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
index 866c416afb73..59eed0a0ce90 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
@@ -5,7 +5,7 @@
#include "intel_ggtt_gmch.h"
-#include <drm/intel-gtt.h>
+#include <drm/intel/intel-gtt.h>
#include <linux/agp_backend.h>
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 2bd8d98d2110..5394bc7d4daf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -220,6 +220,7 @@
#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2)
+#define CMD_3DSTATE_MESH_CONTROL ((0x3 << 29) | (0x3 << 27) | (0x0 << 24) | (0x77 << 16) | (0x3))
#define XY_CTRL_SURF_INSTR_SIZE 5
#define MI_FLUSH_DW_SIZE 3
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c
index 6d440de8ba01..1e925c75fb08 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.c
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.c
@@ -103,19 +103,6 @@ static const struct gsc_def gsc_def_dg1[] = {
}
};
-static const struct gsc_def gsc_def_xehpsdv[] = {
- {
- /* HECI1 not enabled on the device. */
- },
- {
- .name = "mei-gscfi",
- .bar = DG1_GSC_HECI2_BASE,
- .bar_size = GSC_BAR_LENGTH,
- .use_polling = true,
- .slow_firmware = true,
- }
-};
-
static const struct gsc_def gsc_def_dg2[] = {
{
.name = "mei-gsc",
@@ -188,8 +175,6 @@ static void gsc_init_one(struct drm_i915_private *i915, struct intel_gsc *gsc,
if (IS_DG1(i915)) {
def = &gsc_def_dg1[intf_id];
- } else if (IS_XEHPSDV(i915)) {
- def = &gsc_def_xehpsdv[intf_id];
} else if (IS_DG2(i915)) {
def = &gsc_def_dg2[intf_id];
} else {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index a425db5ed3a2..c4a351ebf395 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -4,7 +4,7 @@
*/
#include <drm/drm_managed.h>
-#include <drm/intel-gtt.h>
+#include <drm/intel/intel-gtt.h>
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
@@ -185,7 +185,7 @@ int intel_gt_init_hw(struct intel_gt *gt)
if (IS_HASWELL(i915))
intel_uncore_write(uncore,
HSW_MI_PREDICATE_RESULT_2,
- IS_HASWELL_GT3(i915) ?
+ INTEL_INFO(i915)->gt == 3 ?
LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
/* Apply the GT workarounds... */
@@ -278,7 +278,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
intel_uncore_posting_read(uncore,
XELPMP_RING_FAULT_REG);
- } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
RING_FAULT_VALID, 0);
intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
@@ -302,7 +302,7 @@ static void gen6_check_faults(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- u32 fault;
+ unsigned long fault;
for_each_engine(engine, gt, id) {
fault = GEN6_RING_FAULT_REG_READ(engine);
@@ -310,8 +310,8 @@ static void gen6_check_faults(struct intel_gt *gt)
gt_dbg(gt, "Unexpected fault\n"
"\tAddr: 0x%08lx\n"
"\tAddress space: %s\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
+ "\tSource ID: %ld\n"
+ "\tType: %ld\n",
fault & PAGE_MASK,
fault & RING_FAULT_GTTSEL_MASK ?
"GGTT" : "PPGTT",
@@ -403,7 +403,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
/* From GEN8 onwards we only have one 'All Engine Fault Register' */
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
xehp_check_faults(gt);
else if (GRAPHICS_VER(i915) >= 8)
gen8_check_faults(gt);
@@ -832,7 +832,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
/* Scrub all HW state upon release */
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- __intel_gt_reset(gt, ALL_ENGINES);
+ intel_gt_reset_all_engines(gt);
}
void intel_gt_driver_release(struct intel_gt *gt)
@@ -1024,6 +1024,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
return I915_MAP_WC;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt)
+{
+ /* Wa_16018031267, Wa_16018063123 */
+ return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71));
+}
+
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
{
return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 608f5c872928..998ca029b73a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -82,17 +82,18 @@ struct drm_printer;
##__VA_ARGS__); \
} while (0)
-#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
- IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
- engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
-
static inline bool gt_is_root(struct intel_gt *gt)
{
return !gt->info.id;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt);
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+ intel_gt_needs_wa_16018031267(engine->gt) && \
+ engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
@@ -123,6 +124,11 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
return guc_to_gt(guc)->i915;
}
+static inline struct intel_guc *gt_to_guc(struct intel_gt *gt)
+{
+ return &gt->uc.guc;
+}
+
void intel_gt_common_init_early(struct intel_gt *gt);
int intel_root_gt_init_early(struct drm_i915_private *i915);
int intel_gt_assign_ggtt(struct intel_gt *gt);
@@ -168,7 +174,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
int intel_gt_probe_all(struct drm_i915_private *i915);
int intel_gt_tiles_init(struct drm_i915_private *i915);
-void intel_gt_release_all(struct drm_i915_private *i915);
#define for_each_gt(gt__, i915__, id__) \
for ((id__) = 0; \
@@ -202,4 +207,10 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
void intel_gt_bind_context_set_ready(struct intel_gt *gt);
void intel_gt_bind_context_set_unready(struct intel_gt *gt);
bool intel_gt_is_bind_context_ready(struct intel_gt *gt);
+
+static inline void intel_gt_set_wedged_async(struct intel_gt *gt)
+{
+ queue_work(system_highpri_wq, &gt->wedge);
+}
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index 000000000000..3c62a44e9106
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+ int cslice;
+ u32 mode = 0;
+ int first_ccs = __ffs(CCS_MASK(gt));
+
+ if (!IS_DG2(gt->i915))
+ return 0;
+
+ /* Build the value for the fixed CCS load balancing */
+ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+ if (gt->ccs.cslices & BIT(cslice))
+ /*
+ * If available, assign the cslice
+ * to the first available engine...
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+ else
+ /*
+ * ... otherwise, mark the cslice as
+ * unavailable if no CCS dispatches here
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice,
+ XEHP_CCS_MODE_CSLICE_MASK);
+ }
+
+ return mode;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index 000000000000..55547f2ff426
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
index 7c9be4fd1c8c..6e63505fe478 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -9,6 +9,7 @@
#include "intel_gt_clock_utils.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
+#include "soc/intel_dram.h"
static u32 read_reference_ts_freq(struct intel_uncore *uncore)
{
@@ -151,7 +152,7 @@ static u32 gen4_read_clock_frequency(struct intel_uncore *uncore)
*
* Testing on actual hardware has shown there is no /16.
*/
- return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000;
+ return DIV_ROUND_CLOSEST(i9xx_fsb_freq(uncore->i915), 4) * 1000;
}
static u32 read_clock_frequency(struct intel_uncore *uncore)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 77fb57223465..1240d44eeb85 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -68,9 +68,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
struct intel_gt *media_gt = gt->i915->media_gt;
if (instance == OTHER_GUC_INSTANCE)
- return guc_irq_handler(&gt->uc.guc, iir);
+ return guc_irq_handler(gt_to_guc(gt), iir);
if (instance == OTHER_MEDIA_GUC_INSTANCE && media_gt)
- return guc_irq_handler(&media_gt->uc.guc, iir);
+ return guc_irq_handler(gt_to_guc(media_gt), iir);
if (instance == OTHER_GTPM_INSTANCE)
return gen11_rps_irq_handler(&gt->rps, iir);
@@ -442,7 +442,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
iir = raw_reg_read(regs, GEN8_GT_IIR(2));
if (likely(iir)) {
gen6_rps_irq_handler(&gt->rps, iir);
- guc_irq_handler(&gt->uc.guc, iir >> 16);
+ guc_irq_handler(gt_to_guc(gt), iir >> 16);
raw_reg_write(regs, GEN8_GT_IIR(2), iir);
}
}
@@ -452,10 +452,10 @@ void gen8_gt_irq_reset(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
- GEN8_IRQ_RESET_NDX(uncore, GT, 0);
- GEN8_IRQ_RESET_NDX(uncore, GT, 1);
- GEN8_IRQ_RESET_NDX(uncore, GT, 2);
- GEN8_IRQ_RESET_NDX(uncore, GT, 3);
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(0));
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(1));
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(2));
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(3));
}
void gen8_gt_irq_postinstall(struct intel_gt *gt)
@@ -476,14 +476,14 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
gt->pm_ier = 0x0;
gt->pm_imr = ~gt->pm_ier;
- GEN8_IRQ_INIT_NDX(uncore, GT, 0, ~gt_interrupts[0], gt_interrupts[0]);
- GEN8_IRQ_INIT_NDX(uncore, GT, 1, ~gt_interrupts[1], gt_interrupts[1]);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(0), ~gt_interrupts[0], gt_interrupts[0]);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(1), ~gt_interrupts[1], gt_interrupts[1]);
/*
* RPS interrupts will get enabled/disabled on demand when RPS itself
* is enabled/disabled. Same wil be the case for GuC interrupts.
*/
- GEN8_IRQ_INIT_NDX(uncore, GT, 2, gt->pm_imr, gt->pm_ier);
- GEN8_IRQ_INIT_NDX(uncore, GT, 3, ~gt_interrupts[3], gt_interrupts[3]);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(2), gt->pm_imr, gt->pm_ier);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(3), ~gt_interrupts[3], gt_interrupts[3]);
}
static void gen5_gt_update_irq(struct intel_gt *gt,
@@ -514,9 +514,9 @@ void gen5_gt_irq_reset(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
- GEN3_IRQ_RESET(uncore, GT);
+ gen2_irq_reset(uncore, GT_IRQ_REGS);
if (GRAPHICS_VER(gt->i915) >= 6)
- GEN3_IRQ_RESET(uncore, GEN6_PM);
+ gen2_irq_reset(uncore, GEN6_PM_IRQ_REGS);
}
void gen5_gt_irq_postinstall(struct intel_gt *gt)
@@ -538,7 +538,7 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt)
else
gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
- GEN3_IRQ_INIT(uncore, GT, gt->gt_imr, gt_irqs);
+ gen2_irq_init(uncore, GT_IRQ_REGS, gt->gt_imr, gt_irqs);
if (GRAPHICS_VER(gt->i915) >= 6) {
/*
@@ -551,6 +551,6 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt)
}
gt->pm_imr = 0xffffffff;
- GEN3_IRQ_INIT(uncore, GEN6_PM, gt->pm_imr, pm_irqs);
+ gen2_irq_init(uncore, GEN6_PM_IRQ_REGS, gt->pm_imr, pm_irqs);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index e253750a51c5..b8912bd6c08e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -57,51 +57,18 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = {
* are of a "GAM" subclass that has special rules. Thus we use a separate
* GAM table farther down for those.
*/
-static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
+static const struct intel_mmio_range dg2_mslice_steering_table[] = {
{ 0x00DD00, 0x00DDFF },
{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
{},
};
-static const struct intel_mmio_range xehpsdv_gam_steering_table[] = {
- { 0x004000, 0x004AFF },
- { 0x00C800, 0x00CFFF },
- {},
-};
-
-static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
- { 0x00B000, 0x00B0FF },
- { 0x00D800, 0x00D8FF },
- {},
-};
-
static const struct intel_mmio_range dg2_lncf_steering_table[] = {
{ 0x00B000, 0x00B0FF },
{ 0x00D880, 0x00D8FF },
{},
};
-/*
- * We have several types of MCR registers on PVC where steering to (0,0)
- * will always provide us with a non-terminated value. We'll stick them
- * all in the same table for simplicity.
- */
-static const struct intel_mmio_range pvc_instance0_steering_table[] = {
- { 0x004000, 0x004AFF }, /* HALF-BSLICE */
- { 0x008800, 0x00887F }, /* CC */
- { 0x008A80, 0x008AFF }, /* TILEPSMI */
- { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */
- { 0x00B100, 0x00B3FF }, /* L3BANK */
- { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */
- { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */
- { 0x00DD00, 0x00DDFF }, /* BSLICE */
- { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */
- { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */
- { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */
- { 0x024180, 0x0241FF }, /* HALF-BSLICE */
- {},
-};
-
static const struct intel_mmio_range xelpg_instance0_steering_table[] = {
{ 0x000B00, 0x000BFF }, /* SQIDI */
{ 0x001000, 0x001FFF }, /* SQIDI */
@@ -185,22 +152,16 @@ void intel_gt_mcr_init(struct intel_gt *gt)
gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table;
gt->steering_table[L3BANK] = xelpg_l3bank_steering_table;
gt->steering_table[DSS] = xelpg_dss_steering_table;
- } else if (IS_PONTEVECCHIO(i915)) {
- gt->steering_table[INSTANCE0] = pvc_instance0_steering_table;
} else if (IS_DG2(i915)) {
- gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
+ gt->steering_table[MSLICE] = dg2_mslice_steering_table;
gt->steering_table[LNCF] = dg2_lncf_steering_table;
/*
* No need to hook up the GAM table since it has a dedicated
* steering control register on DG2 and can use implicit
* steering.
*/
- } else if (IS_XEHPSDV(i915)) {
- gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
- gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
- gt->steering_table[GAM] = xehpsdv_gam_steering_table;
} else if (GRAPHICS_VER(i915) >= 11 &&
- GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
+ GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) {
gt->steering_table[L3BANK] = icl_l3bank_steering_table;
gt->info.l3bank_mask =
~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
@@ -821,8 +782,6 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
for (int i = 0; i < NUM_STEERING_TYPES; i++)
if (gt->steering_table[i])
report_steering_type(p, gt, i, dump_table);
- } else if (IS_PONTEVECCHIO(gt->i915)) {
- report_steering_type(p, gt, INSTANCE0, dump_table);
} else if (HAS_MSLICE_STEERING(gt->i915)) {
report_steering_type(p, gt, MSLICE, dump_table);
report_steering_type(p, gt, LNCF, dump_table);
@@ -842,10 +801,7 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
unsigned int *group, unsigned int *instance)
{
- if (IS_PONTEVECCHIO(gt->i915)) {
- *group = dss / GEN_DSS_PER_CSLICE;
- *instance = dss % GEN_DSS_PER_CSLICE;
- } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) {
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 55)) {
*group = dss / GEN_DSS_PER_GSLICE;
*instance = dss % GEN_DSS_PER_GSLICE;
} else {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
index 01ac565a56a4..a67a4c35a4fa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
@@ -54,7 +54,7 @@ int intel_gt_mcr_wait_for_reg(struct intel_gt *gt,
* the topology, so we lookup the DSS ID directly in "slice 0."
*/
#define _HAS_SS(ss_, gt_, group_, instance_) ( \
- GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 50) ? \
+ GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 55) ? \
intel_sseu_has_subslice(&(gt_)->info.sseu, 0, ss_) : \
intel_sseu_has_subslice(&(gt_)->info.sseu, group_, instance_))
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 220ac4f92edf..c08fdb65cc69 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -159,7 +159,7 @@ static bool reset_engines(struct intel_gt *gt)
if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
return false;
- return __intel_gt_reset(gt, ALL_ENGINES) == 0;
+ return intel_gt_reset_all_engines(gt) == 0;
}
static void gt_sanitize(struct intel_gt *gt, bool force)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 911fd0160221..6f25c747bc29 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -35,7 +35,7 @@ static inline void __intel_gt_pm_get(struct intel_gt *gt)
static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt)
{
if (!intel_wakeref_get_if_active(&gt->wakeref))
- return 0;
+ return NULL;
return intel_wakeref_track(&gt->wakeref);
}
@@ -73,7 +73,7 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t ha
}
#define with_intel_gt_pm(gt, wf) \
- for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0)
+ for ((wf) = intel_gt_pm_get(gt); (wf); intel_gt_pm_put((gt), (wf)), (wf) = NULL)
/**
* with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
@@ -84,7 +84,7 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t ha
* @wf: pointer to a temporary wakeref.
*/
#define with_intel_gt_pm_if_awake(gt, wf) \
- for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0)
+ for ((wf) = intel_gt_pm_get_if_awake(gt); (wf); intel_gt_pm_put_async((gt), (wf)), (wf) = NULL)
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
@@ -105,9 +105,13 @@ int intel_gt_runtime_resume(struct intel_gt *gt);
ktime_t intel_gt_get_awake_time(const struct intel_gt *gt);
+#define INTEL_WAKEREF_MOCK_GT ERR_PTR(-ENODEV)
+
static inline bool is_mock_gt(const struct intel_gt *gt)
{
- return I915_SELFTEST_ONLY(gt->awake == -ENODEV);
+ BUILD_BUG_ON(INTEL_WAKEREF_DEF == INTEL_WAKEREF_MOCK_GT);
+
+ return I915_SELFTEST_ONLY(gt->awake == INTEL_WAKEREF_MOCK_GT);
}
#endif /* INTEL_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 7114c116e928..b635aa2820d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -71,6 +71,8 @@ static int fw_domains_show(struct seq_file *m, void *data)
struct intel_uncore_forcewake_domain *fw_domain;
unsigned int tmp;
+ spin_lock_irq(&uncore->lock);
+
seq_printf(m, "user.bypass_count = %u\n",
uncore->user_forcewake_count);
@@ -79,6 +81,8 @@ static int fw_domains_show(struct seq_file *m, void *data)
intel_uncore_forcewake_domain_to_str(fw_domain->id),
READ_ONCE(fw_domain->wake_count));
+ spin_unlock_irq(&uncore->lock);
+
return 0;
}
DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
@@ -367,7 +371,6 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
vlv_punit_put(i915);
drm_printf(p, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
- drm_printf(p, "DDR freq: %d MHz\n", i915->mem_freq);
drm_printf(p, "actual GPU freq: %d MHz\n",
intel_gpu_freq(rps, (freq_sts >> 8) & 0xff));
@@ -392,10 +395,6 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
drm_puts(p, "no P-state info available\n");
}
- drm_printf(p, "Current CD clock frequency: %d kHz\n", i915->display.cdclk.hw.cdclk);
- drm_printf(p, "Max CD clock frequency: %d kHz\n", i915->display.cdclk.max_cdclk_freq);
- drm_printf(p, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
-
intel_runtime_pm_put(uncore->rpm, wakeref);
}
@@ -432,7 +431,7 @@ static int llc_show(struct seq_file *m, void *data)
max_gpu_freq /= GEN9_FREQ_SCALER;
}
- seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
+ seq_puts(m, "GPU freq (MHz)\tEffective GPU freq (MHz)\tEffective Ring freq (MHz)\n");
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
@@ -538,7 +537,7 @@ static bool rps_eval(void *data)
{
struct intel_gt *gt = data;
- if (intel_guc_slpc_is_used(&gt->uc.guc))
+ if (intel_guc_slpc_is_used(gt_to_guc(gt)))
return false;
else
return HAS_RPS(gt->i915);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 50962cfd1353..6dba65e54cdb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -432,6 +432,7 @@
#define XEHPG_INSTDONE_GEOM_SVG MCR_REG(0x666c)
#define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */
+#define DISABLE_REPACKING_FOR_COMPRESSION REG_BIT(15) /* jsl+ */
#define RC_OP_FLUSH_ENABLE (1 << 0)
#define HIZ_RAW_STALL_OPT_DISABLE (1 << 2)
#define CACHE_MODE_1 _MMIO(0x7004) /* IVB+ */
@@ -718,44 +719,11 @@
#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434)
#define VFUNIT_CLKGATE_DIS REG_BIT(20)
-#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */
#define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */
#define GAMEDIA_CLKGATE_DIS REG_BIT(11)
#define HSUNIT_CLKGATE_DIS REG_BIT(8)
#define VSUNIT_CLKGATE_DIS REG_BIT(3)
-#define UNSLCGCTL9440 _MMIO(0x9440)
-#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28)
-#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27)
-#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26)
-#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24)
-#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23)
-#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22)
-#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21)
-#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17)
-#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16)
-#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15)
-#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14)
-#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6)
-
-#define UNSLCGCTL9444 _MMIO(0x9444)
-#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30)
-#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29)
-#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28)
-#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27)
-#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26)
-#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25)
-#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24)
-#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23)
-#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22)
-#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21)
-#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20)
-#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19)
-#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18)
-#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17)
-#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
-#define LTCDD_CLKGATE_DIS REG_BIT(10)
-
#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4)
#define SARBUNIT_CLKGATE_DIS (1 << 5)
@@ -765,9 +733,6 @@
#define L3_CLKGATE_DIS REG_BIT(16)
#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
-#define SCCGCTL94DC MCR_REG(0x94dc)
-#define CG3DDISURB REG_BIT(14)
-
#define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4)
#define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19)
#define PSDUNIT_CLKGATE_DIS REG_BIT(5)
@@ -989,10 +954,6 @@
#define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C
#define GEN7_L3AGDIS (1 << 19)
-#define XEHPC_LNCFMISCCFGREG0 MCR_REG(0xb01c)
-#define XEHPC_HOSTCACHEEN REG_BIT(1)
-#define XEHPC_OVRLSCCC REG_BIT(0)
-
#define GEN7_L3CNTLREG2 _MMIO(0xb020)
/* MOCS (Memory Object Control State) registers */
@@ -1046,20 +1007,9 @@
#define XEHP_L3SQCREG5 MCR_REG(0xb158)
#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
-#define MLTICTXCTL MCR_REG(0xb170)
-#define TDONRENDER REG_BIT(2)
-
#define XEHP_L3SCQREG7 MCR_REG(0xb188)
#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
-#define XEHPC_L3SCRUB MCR_REG(0xb18c)
-#define SCRUB_CL_DWNGRADE_SHARED REG_BIT(12)
-#define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0)
-#define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6)
-
-#define L3SQCREG1_CCS0 MCR_REG(0xb200)
-#define FLUSHALLNONCOH REG_BIT(5)
-
#define GEN11_GLBLINVL _MMIO(0xb404)
#define GEN11_BANK_HASH_ADDR_EXCL_MASK (0x7f << 5)
#define GEN11_BANK_HASH_ADDR_EXCL_BIT0 (1 << 5)
@@ -1109,7 +1059,6 @@
#define XEHP_COMPCTX_TLB_INV_CR MCR_REG(0xcf04)
#define XELPMP_GSC_TLB_INV_CR _MMIO(0xcf04) /* media GT only */
-#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28)
#define RENDER_MOD_CTRL MCR_REG(0xcf2c)
#define COMP_MOD_CTRL MCR_REG(0xcf30)
#define XELPMP_GSC_MOD_CTRL _MMIO(0xcf30) /* media GT only */
@@ -1185,7 +1134,6 @@
#define EU_PERF_CNTL4 PERF_REG(0xe45c)
#define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c)
-#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13)
#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11)
#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
#define GEN11_DIS_PICK_2ND_EU REG_BIT(7)
@@ -1202,7 +1150,6 @@
#define FLOW_CONTROL_ENABLE REG_BIT(15)
#define UGM_BACKUP_MODE REG_BIT(13)
#define MDQ_ARBITRATION_MODE REG_BIT(12)
-#define SYSTOLIC_DOP_CLOCK_GATING_DIS REG_BIT(10)
#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8)
#define STALL_DOP_GATING_DISABLE REG_BIT(5)
#define THROTTLE_12_5 REG_GENMASK(4, 2)
@@ -1215,6 +1162,7 @@
#define GEN12_DISABLE_EARLY_READ REG_BIT(14)
#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12)
#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8)
+#define XELPG_DISABLE_TDL_SVHS_GATING REG_BIT(1)
#define GEN12_DISABLE_DOP_GATING REG_BIT(0)
#define RT_CTRL MCR_REG(0xe530)
@@ -1477,8 +1425,14 @@
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
#define GEN12_RCU_MODE _MMIO(0x14800)
+#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
+#define XEHP_CCS_MODE _MMIO(0x14804)
+#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
+#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
+#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
+
#define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10)
#define CHV_FGT_DISABLE_SS1 (1 << 11)
@@ -1519,6 +1473,10 @@
GEN6_PM_RP_DOWN_THRESHOLD | \
GEN6_PM_RP_DOWN_TIMEOUT)
+#define GEN6_PM_IRQ_REGS I915_IRQ_REGS(GEN6_PMIMR, \
+ GEN6_PMIER, \
+ GEN6_PMIIR)
+
#define GEN7_GT_SCRATCH(i) _MMIO(0x4f100 + (i) * 4)
#define GEN7_GT_SCRATCH_REG_NUM 8
@@ -1600,6 +1558,8 @@
#define VLV_RENDER_C0_COUNT _MMIO(0x138118)
#define VLV_MEDIA_C0_COUNT _MMIO(0x13811c)
+#define PCU_PWM_FAN_SPEED _MMIO(0x138140)
+
#define GEN12_RPSTAT1 _MMIO(0x1381b4)
#define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0)
#define GEN12_CAGF_MASK REG_GENMASK(19, 11)
@@ -1679,11 +1639,6 @@
#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000)
-#define GT0_PACKAGE_ENERGY_STATUS _MMIO(0x250004)
-#define GT0_PACKAGE_RAPL_LIMIT _MMIO(0x250008)
-#define GT0_PACKAGE_POWER_SKU_UNIT _MMIO(0x250068)
-#define GT0_PLATFORM_ENERGY_STATUS _MMIO(0x25006c)
-
/*
* Standalone Media's non-engine GT registers are located at their regular GT
* offsets plus 0x380000. This extra offset is stored inside the intel_uncore
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index f0dea54880af..d7784650e4d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -176,27 +176,13 @@ static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id)
return DIV_ROUND_CLOSEST_ULL(res, 1000);
}
-static u8 get_rc6_mask(struct intel_gt *gt)
-{
- u8 mask = 0;
-
- if (HAS_RC6(gt->i915))
- mask |= BIT(0);
- if (HAS_RC6p(gt->i915))
- mask |= BIT(1);
- if (HAS_RC6pp(gt->i915))
- mask |= BIT(2);
-
- return mask;
-}
-
static ssize_t rc6_enable_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
- return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
+ return sysfs_emit(buff, "%x\n", gt->rc6.enabled);
}
static ssize_t rc6_enable_dev_show(struct device *dev,
@@ -205,7 +191,7 @@ static ssize_t rc6_enable_dev_show(struct device *dev,
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name);
- return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
+ return sysfs_emit(buff, "%x\n", gt->rc6.enabled);
}
static u32 __rc6_residency_ms_show(struct intel_gt *gt)
@@ -456,7 +442,7 @@ static ssize_t slpc_ignore_eff_freq_show(struct kobject *kobj,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
return sysfs_emit(buff, "%u\n", slpc->ignore_eff_freq);
}
@@ -466,7 +452,7 @@ static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj,
const char *buff, size_t count)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
int err;
u32 val;
@@ -587,7 +573,6 @@ static ssize_t media_freq_factor_show(struct kobject *kobj,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
intel_wakeref_t wakeref;
u32 mode;
@@ -595,20 +580,12 @@ static ssize_t media_freq_factor_show(struct kobject *kobj,
* Retrieve media_ratio_mode from GEN6_RPNSWREQ bit 13 set by
* GuC. GEN6_RPNSWREQ:13 value 0 represents 1:2 and 1 represents 1:1
*/
- if (IS_XEHPSDV(gt->i915) &&
- slpc->media_ratio_mode == SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL) {
- /*
- * For XEHPSDV dynamic mode GEN6_RPNSWREQ:13 does not contain
- * the media_ratio_mode, just return the cached media ratio
- */
- mode = slpc->media_ratio_mode;
- } else {
- with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- mode = intel_uncore_read(gt->uncore, GEN6_RPNSWREQ);
- mode = REG_FIELD_GET(GEN12_MEDIA_FREQ_RATIO, mode) ?
- SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE :
- SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO;
- }
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ mode = intel_uncore_read(gt->uncore, GEN6_RPNSWREQ);
+
+ mode = REG_FIELD_GET(GEN12_MEDIA_FREQ_RATIO, mode) ?
+ SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE :
+ SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO;
return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode));
}
@@ -618,7 +595,7 @@ static ssize_t media_freq_factor_store(struct kobject *kobj,
const char *buff, size_t count)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
u32 factor, mode;
int err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index def7dd0eb6f1..bcee084b1f27 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -207,6 +207,14 @@ struct intel_gt {
[MAX_ENGINE_INSTANCE + 1];
enum intel_submission_method submission_method;
+ struct {
+ /*
+ * Mask of the non fused CCS slices
+ * to be used for the load balancing
+ */
+ intel_engine_mask_t cslices;
+ } ccs;
+
/*
* Default address space (either GGTT or ppGTT depending on arch).
*
@@ -284,6 +292,8 @@ struct intel_gt {
struct gt_defaults defaults;
struct kobject *sysfs_defaults;
+ struct work_struct wedge;
+
struct i915_perf_gt perf;
/** link: &ggtt.gt_list */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 86f73fe558ca..30b128b1fde7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -24,7 +24,8 @@
bool i915_ggtt_require_binder(struct drm_i915_private *i915)
{
/* Wa_13010847436 & Wa_14019519902 */
- return MEDIA_VER_FULL(i915) == IP_VER(13, 0);
+ return !i915_direct_stolen_access(i915) &&
+ MEDIA_VER_FULL(i915) == IP_VER(13, 0);
}
static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
@@ -679,7 +680,7 @@ void setup_private_pat(struct intel_gt *gt)
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
xelpg_setup_private_ppat(gt);
- else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
xehp_setup_private_ppat(gt);
else if (GRAPHICS_VER(i915) >= 12)
tgl_setup_private_ppat(uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 6b85222ee3ea..0a36ea751b63 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -608,8 +608,8 @@ int i915_ppgtt_init_hw(struct intel_gt *gt);
struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
unsigned long lmem_pt_obj_flags);
-void i915_ggtt_suspend_vm(struct i915_address_space *vm);
-bool i915_ggtt_resume_vm(struct i915_address_space *vm);
+void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all);
+bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7c367ba8d9dc..51847a846002 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -546,47 +546,6 @@ static const u8 gen12_rcs_offsets[] = {
END
};
-static const u8 xehp_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- END
-};
-
static const u8 dg2_rcs_offsets[] = {
NOP(1),
LRI(15, POSTED),
@@ -695,8 +654,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
return mtl_rcs_offsets;
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_rcs_offsets;
- else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
- return xehp_rcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_rcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 11)
@@ -719,7 +676,7 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return 0x70;
else if (GRAPHICS_VER(engine->i915) >= 12)
return 0x60;
@@ -733,7 +690,7 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return 0x80;
else if (GRAPHICS_VER(engine->i915) >= 12)
return 0x70;
@@ -748,7 +705,7 @@ static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return 0x84;
else if (GRAPHICS_VER(engine->i915) >= 12)
return 0x74;
@@ -795,7 +752,7 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
/*
* Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
* simply to match the RCS context image layout.
@@ -863,8 +820,10 @@ static bool ctx_needs_runalone(const struct intel_context *ce)
bool ctx_is_protected = false;
/*
- * On MTL and newer platforms, protected contexts require setting
- * the LRC run-alone bit or else the encryption will not happen.
+ * Wa_14019159160 - Case 2.
+ * On some platforms, protected contexts require setting
+ * the LRC run-alone bit or else the encryption/decryption will not happen.
+ * NOTE: Case 2 only applies to PXP use-case of said workaround.
*/
if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) &&
(ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) {
@@ -893,6 +852,7 @@ static void init_common_regs(u32 * const regs,
if (GRAPHICS_VER(engine->i915) < 11)
ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
+ /* Wa_14019159160 - Case 2.*/
if (ctx_needs_runalone(ce))
ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE);
regs[CTX_CONTEXT_CONTROL] = ctl;
@@ -1060,9 +1020,8 @@ void lrc_init_state(struct intel_context *ce,
set_redzone(state, engine);
- if (engine->default_state) {
- shmem_read(engine->default_state, 0,
- state, engine->context_size);
+ if (ce->default_state) {
+ shmem_read(ce->default_state, 0, state, engine->context_size);
__set_bit(CONTEXT_VALID_BIT, &ce->flags);
inhibit = false;
}
@@ -1174,6 +1133,9 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
GEM_BUG_ON(ce->state);
+ if (!intel_context_has_own_state(ce))
+ ce->default_state = engine->default_state;
+
vma = __lrc_alloc_state(ce, engine);
if (IS_ERR(vma))
return PTR_ERR(vma);
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 576e5ef0289b..6f7af4077135 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -35,9 +35,9 @@ static bool engine_supports_migration(struct intel_engine_cs *engine)
return true;
}
-static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
- struct i915_page_table *pt,
- void *data)
+static void xehp_toggle_pdes(struct i915_address_space *vm,
+ struct i915_page_table *pt,
+ void *data)
{
struct insert_pte_data *d = data;
@@ -52,9 +52,9 @@ static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
d->offset += SZ_2M;
}
-static void xehpsdv_insert_pte(struct i915_address_space *vm,
- struct i915_page_table *pt,
- void *data)
+static void xehp_insert_pte(struct i915_address_space *vm,
+ struct i915_page_table *pt,
+ void *data)
{
struct insert_pte_data *d = data;
@@ -120,7 +120,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
* 512 entry layout using 4K GTT pages. The other two windows just map
* lmem pages and must use the new compact 32 entry layout using 64K GTT
* pages, which ensures we can address any lmem object that the user
- * throws at us. We then also use the xehpsdv_toggle_pdes as a way of
+ * throws at us. We then also use the xehp_toggle_pdes as a way of
* just toggling the PDE bit(GEN12_PDE_64K) for us, to enable the
* compact layout for each of these page-tables, that fall within the
* [CHUNK_SIZE, 3 * CHUNK_SIZE) range.
@@ -209,12 +209,12 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
/* Now allow the GPU to rewrite the PTE via its own ppGTT */
if (HAS_64K_PAGES(gt->i915)) {
vm->vm.foreach(&vm->vm, base, d.offset - base,
- xehpsdv_insert_pte, &d);
+ xehp_insert_pte, &d);
d.offset = base + CHUNK_SZ;
vm->vm.foreach(&vm->vm,
d.offset,
2 * CHUNK_SZ,
- xehpsdv_toggle_pdes, &d);
+ xehp_toggle_pdes, &d);
} else {
vm->vm.foreach(&vm->vm, base, d.offset - base,
insert_pte, &d);
@@ -925,7 +925,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size,
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
ring_sz = XY_FAST_COLOR_BLT_DW;
else if (ver >= 8)
ring_sz = 8;
@@ -936,7 +936,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size,
if (IS_ERR(cs))
return PTR_ERR(cs);
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
(XY_FAST_COLOR_BLT_DW - 2);
*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 353f93baaca0..d791d63d49b4 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -53,7 +53,6 @@ struct drm_i915_mocs_table {
/* Helper defines */
#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
-#define PVC_NUM_MOCS_ENTRIES 3
#define MTL_NUM_MOCS_ENTRIES 16
/* (e)LLC caching options */
@@ -367,31 +366,6 @@ static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
L3_3_WB),
};
-static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = {
- /* wa_1608975824 */
- MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)),
-
- /* UC - Coherent; GO:L3 */
- MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)),
- /* UC - Coherent; GO:Memory */
- MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Non-Coherent; GO:Memory */
- MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)),
- /* UC - Non-Coherent; GO:L3 */
- MOCS_ENTRY(4, 0, L3_1_UC),
-
- /* WB */
- MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)),
-
- /* HW Reserved - SW program but never use. */
- MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)),
- MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)),
- MOCS_ENTRY(60, 0, L3_1_UC),
- MOCS_ENTRY(61, 0, L3_1_UC),
- MOCS_ENTRY(62, 0, L3_1_UC),
- MOCS_ENTRY(63, 0, L3_1_UC),
-};
-
static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
/* UC - Coherent; GO:L3 */
MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
@@ -404,17 +378,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
};
-static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
- /* Error */
- MOCS_ENTRY(0, 0, L3_3_WB),
-
- /* UC */
- MOCS_ENTRY(1, 0, L3_1_UC),
-
- /* WB */
- MOCS_ENTRY(2, 0, L3_3_WB),
-};
-
static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
/* Error - Reserved for Non-Use */
MOCS_ENTRY(0,
@@ -495,31 +458,18 @@ static unsigned int get_mocs_settings(struct drm_i915_private *i915,
memset(table, 0, sizeof(struct drm_i915_mocs_table));
table->unused_entries_index = I915_MOCS_PTE;
- if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) {
+ if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 74))) {
table->size = ARRAY_SIZE(mtl_mocs_table);
table->table = mtl_mocs_table;
table->n_entries = MTL_NUM_MOCS_ENTRIES;
table->uc_index = 9;
table->unused_entries_index = 1;
- } else if (IS_PONTEVECCHIO(i915)) {
- table->size = ARRAY_SIZE(pvc_mocs_table);
- table->table = pvc_mocs_table;
- table->n_entries = PVC_NUM_MOCS_ENTRIES;
- table->uc_index = 1;
- table->wb_index = 2;
- table->unused_entries_index = 2;
} else if (IS_DG2(i915)) {
table->size = ARRAY_SIZE(dg2_mocs_table);
table->table = dg2_mocs_table;
table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->unused_entries_index = 3;
- } else if (IS_XEHPSDV(i915)) {
- table->size = ARRAY_SIZE(xehpsdv_mocs_table);
- table->table = xehpsdv_mocs_table;
- table->uc_index = 2;
- table->n_entries = GEN9_NUM_MOCS_ENTRIES;
- table->unused_entries_index = 5;
} else if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
@@ -670,7 +620,7 @@ static void init_l3cc_table(struct intel_gt *gt,
intel_gt_mcr_lock(gt, &flags);
for_each_l3cc(l3cc, table, i)
- if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 55))
intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc);
else
intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc);
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 7090e4be29cb..9378d5901c49 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -109,7 +109,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
* thus allowing GuC to control RC6 entry/exit fully instead.
* We will not set the HW ENABLE and EI bits
*/
- if (!intel_guc_rc_enable(&gt->uc.guc))
+ if (!intel_guc_rc_enable(gt_to_guc(gt)))
rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE;
else
rc6->ctl_enable =
@@ -123,7 +123,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
* temporary wa and should be removed after fixing real cause
* of forcewake timeouts.
*/
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
pg_enable =
GEN9_MEDIA_PG_ENABLE |
GEN11_MEDIA_SAMPLER_PG_ENABLE;
@@ -133,7 +133,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN9_MEDIA_PG_ENABLE |
GEN11_MEDIA_SAMPLER_PG_ENABLE;
- if (GRAPHICS_VER(gt->i915) >= 12) {
+ if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) {
for (i = 0; i < I915_MAX_VCS; i++)
if (HAS_ENGINE(gt, _VCS(i)))
pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
@@ -569,7 +569,7 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
struct intel_gt *gt = rc6_to_gt(rc6);
/* Take control of RC6 back from GuC */
- intel_guc_rc_disable(&gt->uc.guc);
+ intel_guc_rc_disable(gt_to_guc(gt));
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
if (GRAPHICS_VER(i915) >= 9)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index f8512aee58a8..51bb27e10a4f 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -144,8 +144,8 @@ region_lmem_init(struct intel_memory_region *mem)
int ret;
if (!io_mapping_init_wc(&mem->iomap,
- mem->io_start,
- mem->io_size))
+ mem->io.start,
+ resource_size(&mem->io)))
return -EIO;
ret = intel_region_ttm_init(mem);
@@ -240,7 +240,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
lmem_size -= tile_stolen;
} else {
/* Stolen starts from GSMBASE without CCS */
- lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE);
+ lmem_size = intel_uncore_read64(&i915->uncore, GEN6_GSMBASE);
}
i915_resize_lmem_bar(i915, lmem_size);
@@ -273,14 +273,6 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
if (err)
goto err_region_put;
- drm_dbg(&i915->drm, "Local memory: %pR\n", &mem->region);
- drm_dbg(&i915->drm, "Local memory IO start: %pa\n",
- &mem->io_start);
- drm_info(&i915->drm, "Local memory IO size: %pa\n",
- &mem->io_size);
- drm_info(&i915->drm, "Local memory available: %pa\n",
- &lmem_size);
-
if (io_size < lmem_size)
drm_info(&i915->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' or similar, if available in the BIOS.\n",
(u64)io_size >> 20);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 6801f8b95c53..aae5a081cb53 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -764,7 +764,7 @@ wa_14015076503_end(struct intel_gt *gt, intel_engine_mask_t engine_mask)
HECI_H_GS1_ER_PREP, 0);
}
-int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+static int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
reset_func reset;
@@ -879,8 +879,17 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
intel_engine_mask_t awake = 0;
enum intel_engine_id id;
- /* For GuC mode, ensure submission is disabled before stopping ring */
- intel_uc_reset_prepare(&gt->uc);
+ /**
+ * For GuC mode with submission enabled, ensure submission
+ * is disabled before stopping ring.
+ *
+ * For GuC mode with submission disabled, ensure that GuC is not
+ * sanitized, do that after engine reset. reset_prepare()
+ * is followed by engine reset which in this mode requires GuC to
+ * process any CSB FIFO entries generated by the resets.
+ */
+ if (intel_uc_uses_guc_submission(&gt->uc))
+ intel_uc_reset_prepare(&gt->uc);
for_each_engine(engine, gt, id) {
if (intel_engine_pm_get_if_awake(engine))
@@ -978,7 +987,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
/* Even if the GPU reset fails, it should still stop the engines */
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- __intel_gt_reset(gt, ALL_ENGINES);
+ intel_gt_reset_all_engines(gt);
for_each_engine(engine, gt, id)
engine->submit_request = nop_submit_request;
@@ -1004,6 +1013,15 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
GT_TRACE(gt, "end\n");
}
+static void set_wedged_work(struct work_struct *w)
+{
+ struct intel_gt *gt = container_of(w, struct intel_gt, wedge);
+ intel_wakeref_t wf;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wf)
+ __intel_gt_set_wedged(gt);
+}
+
void intel_gt_set_wedged(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
@@ -1015,7 +1033,8 @@ void intel_gt_set_wedged(struct intel_gt *gt)
mutex_lock(&gt->reset.mutex);
if (GEM_SHOW_DEBUG()) {
- struct drm_printer p = drm_debug_printer(__func__);
+ struct drm_printer p = drm_dbg_printer(&gt->i915->drm,
+ DRM_UT_DRIVER, NULL);
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1088,12 +1107,13 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
/* We must reset pending GPU events before restoring our submission */
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
+ ok = intel_gt_reset_all_engines(gt) == 0;
if (!ok) {
/*
* Warn CI about the unrecoverable wedged condition.
* Time for a reboot.
*/
+ gt_err(gt, "Unrecoverable wedged condition\n");
add_taint_for_CI(gt->i915, TAINT_WARN);
return false;
}
@@ -1132,10 +1152,10 @@ static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
{
int err, i;
- err = __intel_gt_reset(gt, ALL_ENGINES);
+ err = intel_gt_reset_all_engines(gt);
for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
msleep(10 * (i + 1));
- err = __intel_gt_reset(gt, ALL_ENGINES);
+ err = intel_gt_reset_all_engines(gt);
}
if (err)
return err;
@@ -1179,6 +1199,7 @@ void intel_gt_reset(struct intel_gt *gt,
intel_engine_mask_t stalled_mask,
const char *reason)
{
+ struct intel_display *display = &gt->i915->display;
intel_engine_mask_t awake;
int ret;
@@ -1214,7 +1235,7 @@ void intel_gt_reset(struct intel_gt *gt,
}
if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_disable_interrupts(gt->i915);
+ intel_irq_suspend(gt->i915);
if (do_reset(gt, stalled_mask)) {
gt_err(gt, "Failed to reset chip\n");
@@ -1222,10 +1243,13 @@ void intel_gt_reset(struct intel_gt *gt,
}
if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_enable_interrupts(gt->i915);
+ intel_irq_resume(gt->i915);
- intel_overlay_reset(gt->i915);
+ intel_overlay_reset(display);
+ /* sanitize uC after engine reset */
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ intel_uc_reset_prepare(&gt->uc);
/*
* Next we need to restore the context, but we don't use those
* yet either...
@@ -1241,8 +1265,10 @@ void intel_gt_reset(struct intel_gt *gt,
}
ret = resume(gt);
- if (ret)
+ if (ret) {
+ gt_err(gt, "Failed to resume (%d)\n", ret);
goto taint;
+ }
finish:
reset_finish(gt, awake);
@@ -1269,7 +1295,30 @@ error:
goto finish;
}
-static int intel_gt_reset_engine(struct intel_engine_cs *engine)
+/**
+ * intel_gt_reset_all_engines() - Reset all engines in the given gt.
+ * @gt: the GT to reset all engines for.
+ *
+ * This function resets all engines within the given gt.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int intel_gt_reset_all_engines(struct intel_gt *gt)
+{
+ return __intel_gt_reset(gt, ALL_ENGINES);
+}
+
+/**
+ * intel_gt_reset_engine() - Reset a specific engine within a gt.
+ * @engine: engine to be reset.
+ *
+ * This function resets the specified engine within a gt.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int intel_gt_reset_engine(struct intel_engine_cs *engine)
{
return __intel_gt_reset(engine->gt, engine->mask);
}
@@ -1562,6 +1611,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
/* Wedged on init is non-recoverable */
+ gt_err(gt, "Non-recoverable wedged on init\n");
add_taint_for_CI(gt->i915, TAINT_WARN);
}
@@ -1578,6 +1628,7 @@ void intel_gt_init_reset(struct intel_gt *gt)
init_waitqueue_head(&gt->reset.queue);
mutex_init(&gt->reset.mutex);
init_srcu_struct(&gt->reset.backoff_srcu);
+ INIT_WORK(&gt->wedge, set_wedged_work);
/*
* While undesirable to wait inside the shrinker, complain anyway.
@@ -1604,7 +1655,7 @@ static void intel_wedge_me(struct work_struct *work)
struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name);
- intel_gt_set_wedged(w->gt);
+ set_wedged_work(&w->gt->wedge);
}
void __intel_init_wedge(struct intel_wedge_me *w,
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index f615b30b81c5..c00de353075c 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -54,7 +54,8 @@ int intel_gt_terminally_wedged(struct intel_gt *gt);
void intel_gt_set_wedged_on_init(struct intel_gt *gt);
void intel_gt_set_wedged_on_fini(struct intel_gt *gt);
-int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
+int intel_gt_reset_engine(struct intel_engine_cs *engine);
+int intel_gt_reset_all_engines(struct intel_gt *gt);
int intel_reset_guc(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 59da4b7bd262..b74d9205c0f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -308,30 +308,6 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
return cs;
}
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct i915_request *rq)
-{
- int num_dwords;
- void *cs;
-
- num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
- if (num_dwords == 0)
- return 0;
-
- num_dwords = CACHELINE_DWORDS - num_dwords;
- GEM_BUG_ON(num_dwords & 1);
-
- cs = intel_ring_begin(rq, num_dwords);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
- intel_ring_advance(rq, cs + num_dwords);
-
- GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
- return 0;
-}
-
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_ring.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index 1b32dadfb8c3..64b322e25f36 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -16,7 +16,6 @@ struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
-int intel_ring_cacheline_align(struct i915_request *rq);
unsigned int intel_ring_update_space(struct intel_ring *ring);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 92085ffd23de..458e29d89978 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -26,6 +26,7 @@
#include "shmem_utils.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
+#include "intel_gt_print.h"
/* Rough estimate of the typical request size, performing a flush,
* set-context and then emitting the batch.
@@ -192,6 +193,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
static int xcs_resume(struct intel_engine_cs *engine)
{
struct intel_ring *ring = engine->legacy.ring;
+ ktime_t kt;
ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
ring->head, ring->tail);
@@ -229,10 +231,33 @@ static int xcs_resume(struct intel_engine_cs *engine)
set_pp_dir(engine);
- /* First wake the ring up to an empty/idle ring */
- ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
+ /*
+ * First wake the ring up to an empty/idle ring.
+ * Use 50ms of delay to let the engine write successfully
+ * for all platforms. Experimented with different values and
+ * determined that 50ms works best based on testing.
+ */
+ for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC);
+ ktime_before(ktime_get(), (kt)); cpu_relax()) {
+ /*
+ * In case of resets fails because engine resumes from
+ * incorrect RING_HEAD and then GPU may be then fed
+ * to invalid instrcutions, which may lead to unrecoverable
+ * hang. So at first write doesn't succeed then try again.
+ */
+ ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
+ if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head)
+ break;
+ }
+
ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
- ENGINE_POSTING_READ(engine, RING_TAIL);
+ if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) {
+ ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n",
+ ENGINE_READ_FW(engine, RING_HEAD),
+ ENGINE_READ_FW(engine, RING_TAIL),
+ ring->head);
+ goto err;
+ }
ENGINE_WRITE_FW(engine, RING_CTL,
RING_CTL_SIZE(ring->size) | RING_VALID);
@@ -241,12 +266,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
if (__intel_wait_for_register_fw(engine->uncore,
RING_CTL(engine->mmio_base),
RING_VALID, RING_VALID,
- 5000, 0, NULL))
+ 5000, 0, NULL)) {
+ ENGINE_TRACE(engine, "failed to restart\n");
goto err;
+ }
- if (GRAPHICS_VER(engine->i915) > 2)
+ if (GRAPHICS_VER(engine->i915) > 2) {
ENGINE_WRITE_FW(engine,
RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+ ENGINE_POSTING_READ(engine, RING_MI_MODE);
+ }
/* Now awake, let it get started */
if (ring->tail != ring->head) {
@@ -259,16 +288,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
return 0;
err:
- drm_err(&engine->i915->drm,
- "%s initialization failed; "
- "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
- engine->name,
- ENGINE_READ(engine, RING_CTL),
- ENGINE_READ(engine, RING_CTL) & RING_VALID,
- ENGINE_READ(engine, RING_HEAD), ring->head,
- ENGINE_READ(engine, RING_TAIL), ring->tail,
- ENGINE_READ(engine, RING_START),
- i915_ggtt_offset(ring->vma));
+ gt_err(engine->gt, "%s initialization failed\n", engine->name);
+ ENGINE_TRACE(engine,
+ "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_CTL) & RING_VALID,
+ ENGINE_READ(engine, RING_HEAD), ring->head,
+ ENGINE_READ(engine, RING_TAIL), ring->tail,
+ ENGINE_READ(engine, RING_START),
+ i915_ggtt_offset(ring->vma));
+ GEM_TRACE_DUMP();
return -EIO;
}
@@ -474,8 +503,7 @@ static int ring_context_init_default_state(struct intel_context *ce,
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
- shmem_read(ce->engine->default_state, 0,
- vaddr, ce->engine->context_size);
+ shmem_read(ce->default_state, 0, vaddr, ce->engine->context_size);
i915_gem_object_flush_map(obj);
__i915_gem_object_release_map(obj);
@@ -491,7 +519,7 @@ static int ring_context_pre_pin(struct intel_context *ce,
struct i915_address_space *vm;
int err = 0;
- if (ce->engine->default_state &&
+ if (ce->default_state &&
!test_bit(CONTEXT_VALID_BIT, &ce->flags)) {
err = ring_context_init_default_state(ce, ww);
if (err)
@@ -570,6 +598,9 @@ static int ring_context_alloc(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
+ if (!intel_context_has_own_state(ce))
+ ce->default_state = engine->default_state;
+
/* One ringbuffer to rule them all */
GEM_BUG_ON(!engine->legacy.ring);
ce->ring = engine->legacy.ring;
@@ -1088,9 +1119,6 @@ static void setup_irq(struct intel_engine_cs *engine)
} else if (GRAPHICS_VER(i915) >= 5) {
engine->irq_enable = gen5_irq_enable;
engine->irq_disable = gen5_irq_disable;
- } else if (GRAPHICS_VER(i915) >= 3) {
- engine->irq_enable = gen3_irq_enable;
- engine->irq_disable = gen3_irq_disable;
} else {
engine->irq_enable = gen2_irq_enable;
engine->irq_disable = gen2_irq_disable;
@@ -1144,7 +1172,7 @@ static void setup_common(struct intel_engine_cs *engine)
* equivalent to our next initial bread so we can elide
* engine->emit_init_breadcrumb().
*/
- engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
+ engine->emit_fini_breadcrumb = gen2_emit_breadcrumb;
if (GRAPHICS_VER(i915) == 5)
engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
@@ -1157,7 +1185,7 @@ static void setup_common(struct intel_engine_cs *engine)
else if (IS_I830(i915) || IS_I845G(i915))
engine->emit_bb_start = i830_emit_bb_start;
else
- engine->emit_bb_start = gen3_emit_bb_start;
+ engine->emit_bb_start = gen2_emit_bb_start;
}
static void setup_rcs(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 4feef874e6d6..fa304ea088e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -5,7 +5,7 @@
#include <linux/string_helpers.h>
-#include <drm/i915_drm.h>
+#include <drm/intel/i915_drm.h>
#include "display/intel_display.h"
#include "display/intel_display_irq.h"
@@ -52,7 +52,7 @@ static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- return &gt->uc.guc.slpc;
+ return &gt_to_guc(gt)->slpc;
}
static bool rps_uses_slpc(struct intel_rps *rps)
@@ -265,10 +265,10 @@ static const struct cparams {
u16 c;
} cparams[] = {
{ 1, 1333, 301, 28664 },
- { 1, 1066, 294, 24460 },
+ { 1, 1067, 294, 24460 },
{ 1, 800, 294, 25192 },
{ 0, 1333, 276, 27605 },
- { 0, 1066, 276, 27605 },
+ { 0, 1067, 276, 27605 },
{ 0, 800, 231, 23784 },
};
@@ -280,15 +280,16 @@ static void gen5_rps_init(struct intel_rps *rps)
u32 rgvmodectl;
int c_m, i;
- if (i915->fsb_freq <= 3200)
+ if (i915->fsb_freq <= 3200000)
c_m = 0;
- else if (i915->fsb_freq <= 4800)
+ else if (i915->fsb_freq <= 4800000)
c_m = 1;
else
c_m = 2;
for (i = 0; i < ARRAY_SIZE(cparams); i++) {
- if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
+ if (cparams[i].i == c_m &&
+ cparams[i].t == DIV_ROUND_CLOSEST(i915->mem_freq, 1000)) {
rps->ips.m = cparams[i].m;
rps->ips.c = cparams[i].c;
break;
@@ -1013,6 +1014,10 @@ void intel_rps_boost(struct i915_request *rq)
if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
return;
+ /* Waitboost is not needed for contexts marked with a Freq hint */
+ if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags))
+ return;
+
/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
@@ -1086,11 +1091,7 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
- if (IS_PONTEVECCHIO(i915))
- return intel_uncore_read(uncore, PVC_RP_STATE_CAP);
- else if (IS_XEHPSDV(i915))
- return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
- else if (IS_GEN9_LP(i915))
+ if (IS_GEN9_LP(i915))
return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
else
return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6a3246240e81..c8fadf58d836 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -214,13 +214,8 @@ static void xehp_sseu_info_init(struct intel_gt *gt)
int num_compute_regs, num_geometry_regs;
int eu;
- if (IS_PONTEVECCHIO(gt->i915)) {
- num_geometry_regs = 0;
- num_compute_regs = 2;
- } else {
- num_geometry_regs = 1;
- num_compute_regs = 1;
- }
+ num_geometry_regs = 1;
+ num_compute_regs = 1;
/*
* The concept of slice has been removed in Xe_HP. To be compatible
@@ -642,7 +637,7 @@ void intel_sseu_info_init(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
xehp_sseu_info_init(gt);
else if (GRAPHICS_VER(i915) >= 12)
gen12_sseu_info_init(gt);
@@ -851,7 +846,7 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
{
if (sseu->max_slices == 0)
drm_printf(p, "Unavailable\n");
- else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
sseu_print_xehp_topology(sseu, p);
else
sseu_print_hsw_topology(sseu, p);
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
index 4bb13d1890e3..2487768bc230 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -122,7 +122,7 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
{
intel_wakeref_t wakeref;
- if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ if (is_mock_gt(gt))
return;
if (intel_gt_is_wedged(gt))
@@ -132,7 +132,7 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
return;
with_intel_gt_pm_if_awake(gt, wakeref) {
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
mutex_lock(&gt->tlb.invalidate_lock);
if (tlb_seqno_passed(gt, seqno))
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3eacbc50caf8..570c91878189 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -10,12 +10,15 @@
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_ring.h"
#include "intel_workarounds.h"
+#include "display/intel_fbc_regs.h"
+
/**
* DOC: Hardware workarounds
*
@@ -51,7 +54,8 @@
* registers belonging to BCS, VCS or VECS should be implemented in
* xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
* engine's MMIO range but that are part of of the common RCS/CCS reset domain
- * should be implemented in general_render_compute_wa_init().
+ * should be implemented in general_render_compute_wa_init(). The settings
+ * about the CCS load balancing should be added in ccs_engine_wa_mode().
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -107,9 +111,8 @@ static void wa_init_finish(struct i915_wa_list *wal)
{
/* Trim unused entries. */
if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
- struct i915_wa *list = kmemdup(wal->list,
- wal->count * sizeof(*list),
- GFP_KERNEL);
+ struct i915_wa *list = kmemdup_array(wal->list, wal->count,
+ sizeof(*list), GFP_KERNEL);
if (list) {
kfree(wal->list);
@@ -258,12 +261,6 @@ wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
}
static void
-wa_mcr_write(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
-{
- wa_mcr_write_clr_set(wal, reg, ~0, set);
-}
-
-static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{
wa_write_clr_set(wal, reg, set, set);
@@ -421,7 +418,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
/* WaForceContextSaveRestoreNonCoherent:bdw */
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
/* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
- (IS_BROADWELL_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
+ (INTEL_INFO(i915)->gt == 3 ? HDC_FENCE_DEST_SLM_DISABLE : 0));
}
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -789,8 +786,13 @@ static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
dg2_ctx_gt_tuning_init(engine, wal);
- if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
- IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
+ /*
+ * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
+ * gen12_emit_indirect_ctx_rcs() rather than here on some early
+ * steppings.
+ */
+ if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
}
@@ -820,6 +822,9 @@ static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_18019271663 */
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
+
+ /* Wa_14019877138 */
+ wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
}
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
@@ -908,14 +913,10 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
goto done;
- if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
+ if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_ctx_workarounds_init(engine, wal);
- else if (IS_PONTEVECCHIO(i915))
- ; /* noop; none at this time */
else if (IS_DG2(i915))
dg2_ctx_workarounds_init(engine, wal);
- else if (IS_XEHPSDV(i915))
- ; /* noop; none at this time */
else if (IS_DG1(i915))
dg1_ctx_workarounds_init(engine, wal);
else if (GRAPHICS_VER(i915) == 12)
@@ -972,7 +973,12 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (ret)
return ret;
- cs = intel_ring_begin(rq, (wal->count * 2 + 2));
+ if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
+ IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS)
+ cs = intel_ring_begin(rq, (wal->count * 2 + 6));
+ else
+ cs = intel_ring_begin(rq, (wal->count * 2 + 2));
+
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -1002,6 +1008,15 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
}
*cs++ = MI_NOOP;
+ /* Wa_14019789679 */
+ if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
+ IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) {
+ *cs++ = CMD_3DSTATE_MESH_CONTROL;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+ }
+
intel_uncore_forcewake_put__locked(uncore, fw);
spin_unlock(&uncore->lock);
intel_gt_mcr_unlock(wal->gt, flags);
@@ -1233,7 +1248,8 @@ static void __set_mcr_steering(struct i915_wa_list *wal,
static void debug_dump_steering(struct intel_gt *gt)
{
- struct drm_printer p = drm_debug_printer("MCR Steering:");
+ struct drm_printer p = drm_dbg_printer(&gt->i915->drm, DRM_UT_DRIVER,
+ "MCR Steering:");
if (drm_debug_enabled(DRM_UT_DRIVER))
intel_gt_mcr_report_steering(&p, gt, false);
@@ -1341,9 +1357,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
gt->steering_table[MSLICE] = NULL;
}
- if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
- gt->steering_table[GAM] = NULL;
-
slice = __ffs(slice_mask);
subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
GEN_DSS_PER_GSLICE;
@@ -1371,20 +1384,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
}
static void
-pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
-{
- unsigned int dss;
-
- /*
- * Setup implicit steering for COMPUTE and DSS ranges to the first
- * non-fused-off DSS. All other types of MCR registers will be
- * explicitly steered.
- */
- dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
- __add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
-}
-
-static void
icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = gt->i915;
@@ -1511,76 +1510,6 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
}
static void
-xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
-{
- struct drm_i915_private *i915 = gt->i915;
-
- xehp_init_mcr(gt, wal);
-
- /* Wa_1409757795:xehpsdv */
- wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
-
- /* Wa_18011725039:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
- wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
- wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
- }
-
- /* Wa_16011155590:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
- TSGUNIT_CLKGATE_DIS);
-
- /* Wa_14011780169:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
- wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
- GAMTLBVDBOX7_CLKGATE_DIS |
- GAMTLBVDBOX6_CLKGATE_DIS |
- GAMTLBVDBOX5_CLKGATE_DIS |
- GAMTLBVDBOX4_CLKGATE_DIS |
- GAMTLBVDBOX3_CLKGATE_DIS |
- GAMTLBVDBOX2_CLKGATE_DIS |
- GAMTLBVDBOX1_CLKGATE_DIS |
- GAMTLBVDBOX0_CLKGATE_DIS |
- GAMTLBKCR_CLKGATE_DIS |
- GAMTLBGUC_CLKGATE_DIS |
- GAMTLBBLT_CLKGATE_DIS);
- wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
- GAMTLBGFXA1_CLKGATE_DIS |
- GAMTLBCOMPA0_CLKGATE_DIS |
- GAMTLBCOMPA1_CLKGATE_DIS |
- GAMTLBCOMPB0_CLKGATE_DIS |
- GAMTLBCOMPB1_CLKGATE_DIS |
- GAMTLBCOMPC0_CLKGATE_DIS |
- GAMTLBCOMPC1_CLKGATE_DIS |
- GAMTLBCOMPD0_CLKGATE_DIS |
- GAMTLBCOMPD1_CLKGATE_DIS |
- GAMTLBMERT_CLKGATE_DIS |
- GAMTLBVEBOX3_CLKGATE_DIS |
- GAMTLBVEBOX2_CLKGATE_DIS |
- GAMTLBVEBOX1_CLKGATE_DIS |
- GAMTLBVEBOX0_CLKGATE_DIS);
- }
-
- /* Wa_16012725990:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
-
- /* Wa_14011060649:xehpsdv */
- wa_14011060649(gt, wal);
-
- /* Wa_14012362059:xehpsdv */
- wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
-
- /* Wa_14014368820:xehpsdv */
- wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
- INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
-
- /* Wa_14010670810:xehpsdv */
- wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
-}
-
-static void
dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
xehp_init_mcr(gt, wal);
@@ -1623,27 +1552,10 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
}
static void
-pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
-{
- pvc_init_mcr(gt, wal);
-
- /* Wa_14015795083 */
- wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
-
- /* Wa_18018781329 */
- wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
- wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
- wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
- wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
-
- /* Wa_16016694945 */
- wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
-}
-
-static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- /* Wa_14018778641 / Wa_18018781329 */
+ /* Wa_14018575942 / Wa_18018781329 */
+ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_22016670082 */
@@ -1691,6 +1603,14 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
*/
wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
+ /*
+ * Wa_14018575942
+ *
+ * Issue is seen on media KPI test running on VDBOX engine
+ * especially VP9 encoding WLs
+ */
+ wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+
/* Wa_22016670082 */
wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
@@ -1710,17 +1630,11 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
*/
static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
{
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) {
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
}
- if (IS_PONTEVECCHIO(gt->i915)) {
- wa_mcr_write(wal, XEHPC_L3SCRUB,
- SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
- wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
- }
-
if (IS_DG2(gt->i915)) {
wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
@@ -1743,14 +1657,10 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
return;
}
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_gt_workarounds_init(gt, wal);
- else if (IS_PONTEVECCHIO(i915))
- pvc_gt_workarounds_init(gt, wal);
else if (IS_DG2(i915))
dg2_gt_workarounds_init(gt, wal);
- else if (IS_XEHPSDV(i915))
- xehpsdv_gt_workarounds_init(gt, wal);
else if (IS_DG1(i915))
dg1_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 12)
@@ -2161,37 +2071,13 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine)
case RENDER_CLASS:
/* Required by recommended tuning setting (not a workaround) */
whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
-
+ whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
break;
default:
break;
}
}
-static void blacklist_trtt(struct intel_engine_cs *engine)
-{
- struct i915_wa_list *w = &engine->whitelist;
-
- /*
- * Prevent read/write access to [0x4400, 0x4600) which covers
- * the TRTT range across all engines. Note that normally userspace
- * cannot access the other engines' trtt control, but for simplicity
- * we cover the entire range on each engine.
- */
- whitelist_reg_ext(w, _MMIO(0x4400),
- RING_FORCE_TO_NONPRIV_DENY |
- RING_FORCE_TO_NONPRIV_RANGE_64);
- whitelist_reg_ext(w, _MMIO(0x4500),
- RING_FORCE_TO_NONPRIV_DENY |
- RING_FORCE_TO_NONPRIV_RANGE_64);
-}
-
-static void pvc_whitelist_build(struct intel_engine_cs *engine)
-{
- /* Wa_16014440446:pvc */
- blacklist_trtt(engine);
-}
-
static void xelpg_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
@@ -2200,7 +2086,7 @@ static void xelpg_whitelist_build(struct intel_engine_cs *engine)
case RENDER_CLASS:
/* Required by recommended tuning setting (not a workaround) */
whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
-
+ whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
break;
default:
break;
@@ -2216,14 +2102,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
if (engine->gt->type == GT_MEDIA)
; /* none yet */
- else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
+ else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_whitelist_build(engine);
- else if (IS_PONTEVECCHIO(i915))
- pvc_whitelist_build(engine);
else if (IS_DG2(i915))
dg2_whitelist_build(engine);
- else if (IS_XEHPSDV(i915))
- ; /* none needed */
else if (GRAPHICS_VER(i915) == 12)
tgl_whitelist_build(engine);
else if (GRAPHICS_VER(i915) == 11)
@@ -2417,6 +2299,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
}
+ if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) {
+ /*
+ * "Disable Repacking for Compression (masked R/W access)
+ * before rendering compressed surfaces for display."
+ */
+ wa_masked_en(wal, CACHE_MODE_0_GEN7,
+ DISABLE_REPACKING_FOR_COMPRESSION);
+ }
+
if (GRAPHICS_VER(i915) == 11) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
@@ -2655,7 +2546,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_FF_DS_SCHED_HW);
/* WaDisablePSDDualDispatchEnable:ivb */
- if (IS_IVB_GT1(i915))
+ if (INTEL_INFO(i915)->gt == 1)
wa_masked_en(wal,
GEN7_HALF_SLICE_CHICKEN1,
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
@@ -2804,10 +2695,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
static void
ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
- if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
- /* Wa_14014999345:pvc */
- wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
- }
+ /* boilerplate for any CCS engine workaround */
}
/*
@@ -2828,7 +2716,7 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
{
struct drm_i915_private *i915 = gt->i915;
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
/*
@@ -2840,10 +2728,34 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
THREAD_EX_ARB_MODE_RR_AFTER_DEP);
- if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+ if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
}
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct intel_gt *gt = engine->gt;
+ u32 mode;
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /*
+ * Wa_14019159160: This workaround, along with others, leads to
+ * significant challenges in utilizing load balancing among the
+ * CCS slices. Consequently, an architectural decision has been
+ * made to completely disable automatic CCS load balancing.
+ */
+ wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+
+ /*
+ * After having disabled automatic load balancing we need to
+ * assign all slices to a single CCS. We will call it CCS mode 1
+ */
+ mode = intel_gt_apply_ccs_mode(gt);
+ wa_masked_en(wal, XEHP_CCS_MODE, mode);
+}
+
/*
* The workarounds in this function apply to shared registers in
* the general render reset domain that aren't tied to a
@@ -2881,10 +2793,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
}
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
- IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) {
/* Wa_14017856879 */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
+ /* Wa_14020495402 */
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, XELPG_DISABLE_TDL_SVHS_GATING);
+ }
+
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
/*
@@ -2912,21 +2829,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
- IS_PONTEVECCHIO(i915) ||
IS_DG2(i915)) {
/* Wa_22014226127 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
}
- if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) {
+ if (IS_DG2(i915)) {
/* Wa_14015227452:dg2,pvc */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
- /* Wa_16015675438:dg2,pvc */
- wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
- }
-
- if (IS_DG2(i915)) {
/*
* Wa_16011620976:dg2_g11
* Wa_22015475538:dg2
@@ -2962,22 +2873,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
0 /* write-only, so skip validation */,
true);
}
-
- if (IS_XEHPSDV(i915)) {
- /* Wa_1409954639 */
- wa_mcr_masked_en(wal,
- GEN8_ROW_CHICKEN,
- SYSTOLIC_DOP_CLOCK_GATING_DIS);
-
- /* Wa_1607196519 */
- wa_mcr_masked_en(wal,
- GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_GRF_CLEAR);
-
- /* Wa_14010449647:xehpsdv */
- wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
- GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
- }
}
static void
@@ -2993,8 +2888,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
* to a single RCS/CCS engine's workaround list since
* they're reset as part of the general render domain reset.
*/
- if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+ ccs_engine_wa_mode(engine, wal);
+ }
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
@@ -3058,7 +2955,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset)
const struct i915_range *mcr_ranges;
int i;
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
mcr_ranges = mcr_ranges_xehp;
else if (GRAPHICS_VER(i915) >= 12)
mcr_ranges = mcr_ranges_gen12;
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 47070cba7eb1..5eb46700dc4e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -285,7 +285,8 @@ out_engine:
intel_engine_pm_flush(engine);
if (intel_engine_pm_is_awake(engine)) {
- struct drm_printer p = drm_debug_printer(__func__);
+ struct drm_printer p = drm_dbg_printer(&engine->i915->drm,
+ DRM_UT_DRIVER, NULL);
intel_engine_dump(engine, &p,
"%s is still awake:%d after idle-barriers\n",
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index bc441ce7b380..9e4f0e417b3b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -122,9 +122,9 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
if (engine_sync_barrier(engine)) {
- struct drm_printer m = drm_err_printer("pulse");
+ struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
- pr_err("%s: no heartbeat pulse?\n", engine->name);
+ drm_printf(&m, "%s: no heartbeat pulse?\n", engine->name);
intel_engine_dump(engine, &m, "%s", engine->name);
err = -ETIME;
@@ -136,10 +136,10 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
pulse_unlock_wait(p); /* synchronize with the retirement callback */
if (!i915_active_is_idle(&p->active)) {
- struct drm_printer m = drm_err_printer("pulse");
+ struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
- pr_err("%s: heartbeat pulse did not flush idle tasks\n",
- engine->name);
+ drm_printf(&m, "%s: heartbeat pulse did not flush idle tasks\n",
+ engine->name);
i915_active_print(&p->active, &m);
err = -EINVAL;
@@ -193,115 +193,6 @@ static int live_idle_pulse(void *arg)
return err;
}
-static int cmp_u32(const void *_a, const void *_b)
-{
- const u32 *a = _a, *b = _b;
-
- return *a - *b;
-}
-
-static int __live_heartbeat_fast(struct intel_engine_cs *engine)
-{
- const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
- struct intel_context *ce;
- struct i915_request *rq;
- ktime_t t0, t1;
- u32 times[5];
- int err;
- int i;
-
- ce = intel_context_create(engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
-
- intel_engine_pm_get(engine);
-
- err = intel_engine_set_heartbeat(engine, 1);
- if (err)
- goto err_pm;
-
- for (i = 0; i < ARRAY_SIZE(times); i++) {
- do {
- /* Manufacture a tick */
- intel_engine_park_heartbeat(engine);
- GEM_BUG_ON(engine->heartbeat.systole);
- engine->serial++; /* pretend we are not idle! */
- intel_engine_unpark_heartbeat(engine);
-
- flush_delayed_work(&engine->heartbeat.work);
- if (!delayed_work_pending(&engine->heartbeat.work)) {
- pr_err("%s: heartbeat %d did not start\n",
- engine->name, i);
- err = -EINVAL;
- goto err_pm;
- }
-
- rcu_read_lock();
- rq = READ_ONCE(engine->heartbeat.systole);
- if (rq)
- rq = i915_request_get_rcu(rq);
- rcu_read_unlock();
- } while (!rq);
-
- t0 = ktime_get();
- while (rq == READ_ONCE(engine->heartbeat.systole))
- yield(); /* work is on the local cpu! */
- t1 = ktime_get();
-
- i915_request_put(rq);
- times[i] = ktime_us_delta(t1, t0);
- }
-
- sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
-
- pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
- engine->name,
- times[ARRAY_SIZE(times) / 2],
- times[0],
- times[ARRAY_SIZE(times) - 1]);
-
- /*
- * Ideally, the upper bound on min work delay would be something like
- * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we
- * are, even with system_wq_highpri, at the mercy of the CPU scheduler
- * and may be stuck behind some slow work for many millisecond. Such
- * as our very own display workers.
- */
- if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
- pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
- engine->name,
- times[ARRAY_SIZE(times) / 2],
- error_threshold);
- err = -EINVAL;
- }
-
- reset_heartbeat(engine);
-err_pm:
- intel_engine_pm_put(engine);
- intel_context_put(ce);
- return err;
-}
-
-static int live_heartbeat_fast(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err = 0;
-
- /* Check that the heartbeat ticks at the desired rate. */
- if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
- return 0;
-
- for_each_engine(engine, gt, id) {
- err = __live_heartbeat_fast(engine);
- if (err)
- break;
- }
-
- return err;
-}
-
static int __live_heartbeat_off(struct intel_engine_cs *engine)
{
int err;
@@ -372,7 +263,6 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
static const struct i915_subtest tests[] = {
SUBTEST(live_idle_flush),
SUBTEST(live_idle_pulse),
- SUBTEST(live_heartbeat_fast),
SUBTEST(live_heartbeat_off),
};
int saved_hangcheck;
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 4202df5b8c12..81c31396eceb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -93,7 +93,7 @@ static int wait_for_reset(struct intel_engine_cs *engine,
return -EINVAL;
}
- /* Give the request a jiffie to complete after flushing the worker */
+ /* Give the request a jiffy to complete after flushing the worker */
if (i915_request_wait(rq, 0,
max(0l, (long)(timeout - jiffies)) + 1) < 0) {
pr_err("%s: hanging request %llx:%lld did not complete\n",
@@ -3426,7 +3426,7 @@ static int live_preempt_timeout(void *arg)
cpu_relax();
saved_timeout = engine->props.preempt_timeout_ms;
- engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
+ engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffy */
i915_request_get(rq);
i915_request_add(rq);
@@ -3574,7 +3574,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
arg[id].batch = NULL;
arg[id].count = 0;
- worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
+ worker[id] = kthread_run_worker(0, "igt/smoke:%d", id);
if (IS_ERR(worker[id])) {
err = PTR_ERR(worker[id]);
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 0dd4d00ee894..9d3aeb237295 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -319,7 +319,7 @@ static int igt_hang_sanitycheck(void *arg)
i915_request_add(rq);
timeout = 0;
- intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */)
+ intel_wedge_on_timeout(&w, gt, HZ / 5 /* 200ms */)
timeout = i915_request_wait(rq, 0,
MAX_SCHEDULE_TIMEOUT);
if (intel_gt_is_wedged(gt))
@@ -1025,7 +1025,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
threads[tmp].engine = other;
threads[tmp].flags = flags;
- worker = kthread_create_worker(0, "igt/%s",
+ worker = kthread_run_worker(0, "igt/%s",
other->name);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index 3eff364ccf3a..1bf7b88d9a9d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -262,7 +262,7 @@ static int clear(struct intel_migrate *migrate,
{
struct drm_i915_private *i915 = migrate->context->engine->i915;
struct drm_i915_gem_object *obj;
- struct i915_request *rq;
+ struct i915_request *rq = NULL;
struct i915_gem_ww_ctx ww;
u32 *vaddr, val = 0;
bool ccs_cap = false;
@@ -336,7 +336,7 @@ static int clear(struct intel_migrate *migrate,
if (vaddr[x] != val) {
pr_err("%ps failed, (%u != %u), offset: %zu\n",
- fn, vaddr[x], val, x * sizeof(u32));
+ fn, vaddr[x], val, x * sizeof(u32));
igt_hexdump(vaddr + i * 1024, 4096);
err = -EINVAL;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index a7189c2d660c..27b6d51ef145 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -8,6 +8,7 @@
#include "intel_gpu_commands.h"
#include "intel_gt_requests.h"
#include "intel_ring.h"
+#include "intel_rps.h"
#include "selftest_rc6.h"
#include "selftests/i915_random.h"
@@ -38,6 +39,9 @@ int live_rc6_manual(void *arg)
ktime_t dt;
u64 res[2];
int err = 0;
+ u32 rc0_freq = 0;
+ u32 rc6_freq = 0;
+ struct intel_rps *rps = &gt->rps;
/*
* Our claim is that we can "encourage" the GPU to enter rc6 at will.
@@ -62,12 +66,13 @@ int live_rc6_manual(void *arg)
dt = ktime_get();
rc0_power = librapl_energy_uJ();
- msleep(250);
+ msleep(1000);
rc0_power = librapl_energy_uJ() - rc0_power;
dt = ktime_sub(ktime_get(), dt);
res[1] = rc6_residency(rc6);
+ rc0_freq = intel_rps_read_actual_frequency_fw(rps);
if ((res[1] - res[0]) >> 10) {
- pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
+ pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n",
(res[1] - res[0]) >> 10);
err = -EINVAL;
goto out_unlock;
@@ -77,7 +82,11 @@ int live_rc6_manual(void *arg)
rc0_power = div64_u64(NSEC_PER_SEC * rc0_power,
ktime_to_ns(dt));
if (!rc0_power) {
- pr_err("No power measured while in RC0\n");
+ if (rc0_freq)
+ pr_debug("No power measured while in RC0! GPU Freq: %u in RC0\n",
+ rc0_freq);
+ else
+ pr_err("No power and freq measured while in RC0\n");
err = -EINVAL;
goto out_unlock;
}
@@ -90,7 +99,8 @@ int live_rc6_manual(void *arg)
intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
dt = ktime_get();
rc6_power = librapl_energy_uJ();
- msleep(100);
+ msleep(1000);
+ rc6_freq = intel_rps_read_actual_frequency_fw(rps);
rc6_power = librapl_energy_uJ() - rc6_power;
dt = ktime_sub(ktime_get(), dt);
res[1] = rc6_residency(rc6);
@@ -108,7 +118,8 @@ int live_rc6_manual(void *arg)
pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
rc0_power, rc6_power);
if (2 * rc6_power > rc0_power) {
- pr_err("GPU leaked energy while in RC6!\n");
+ pr_err("GPU leaked energy while in RC6! GPU Freq: %u in RC6 and %u in RC0\n",
+ rc6_freq, rc0_freq);
err = -EINVAL;
goto out_unlock;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index f40de408cd3a..2cfc23c58e90 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -281,7 +281,7 @@ static int igt_atomic_reset(void *arg)
awake = reset_prepare(gt);
p->critical_section_begin();
- err = __intel_gt_reset(gt, ALL_ENGINES);
+ err = intel_gt_reset_all_engines(gt);
p->critical_section_end();
reset_finish(gt, awake);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index dcef8d498919..c207a4fb03bf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -1125,6 +1125,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq)
static u64 measure_power_at(struct intel_rps *rps, int *freq)
{
*freq = rps_set_check(rps, *freq);
+ msleep(100);
return measure_power(rps, freq);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 302d0540295d..e218b229681f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -53,7 +53,7 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
static int slpc_set_freq(struct intel_gt *gt, u32 freq)
{
int err;
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
err = slpc_set_max_freq(slpc, freq);
if (err) {
@@ -182,7 +182,7 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine)
{
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
struct {
u64 power;
int freq;
@@ -262,7 +262,7 @@ static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
static int run_test(struct intel_gt *gt, int test_type)
{
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
struct intel_rps *rps = &gt->rps;
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -489,7 +489,7 @@ static int live_slpc_tile_interaction(void *arg)
return -ENOMEM;
for_each_gt(gt, i915, i) {
- threads[i].worker = kthread_create_worker(0, "igt/slpc_parallel:%d", gt->info.id);
+ threads[i].worker = kthread_run_worker(0, "igt/slpc_parallel:%d", gt->info.id);
if (IS_ERR(threads[i].worker)) {
ret = PTR_ERR(threads[i].worker);
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
index 00b872b6380b..3941f2d6fa47 100644
--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -206,8 +206,8 @@ static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
* of pages. To succeed with both allocations, especially in case of Small
* BAR, try to allocate no more than quarter of mappable memory.
*/
- if (mr && size > mr->io_size / 4)
- size = mr->io_size / 4;
+ if (mr && size > resource_size(&mr->io) / 4)
+ size = resource_size(&mr->io) / 4;
return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
}
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index bccc3a1200bc..bb696b29ee2c 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/shmem_fs.h>
+#include <linux/vmalloc.h>
#include "i915_drv.h"
#include "gem/i915_gem_object.h"
@@ -39,7 +40,7 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
if (i915_gem_object_is_shmem(obj)) {
file = obj->base.filp;
- atomic_long_inc(&file->f_count);
+ get_file(file);
return file;
}
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 021f51d9b456..aab2759067d2 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -530,9 +530,8 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
err_object:
kobject_put(kobj);
err_engine:
- dev_err(kdev, "Failed to add sysfs engine '%s'\n",
- engine->name);
- break;
+ dev_warn(kdev, "Failed to add sysfs engine '%s'\n",
+ engine->name);
}
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
index 811add10c30d..c34674e797c6 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
@@ -207,6 +207,27 @@ struct slpc_shared_data {
u8 reserved_mode_definition[4096];
} __packed;
+struct slpc_context_frequency_request {
+ u32 frequency_request:16;
+ u32 reserved:12;
+ u32 is_compute:1;
+ u32 ignore_busyness:1;
+ u32 is_minimum:1;
+ u32 is_predefined:1;
+} __packed;
+
+#define SLPC_CTX_FREQ_REQ_IS_COMPUTE REG_BIT(28)
+
+struct slpc_optimized_strategies {
+ u32 compute:1;
+ u32 async_flip:1;
+ u32 media:1;
+ u32 vsync_flip:1;
+ u32 reserved:28;
+} __packed;
+
+#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0)
+
/**
* DOC: SLPC H2G MESSAGE FORMAT
*
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index dabeaf4f245f..00d6402333f8 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -36,6 +36,7 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73,
INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74,
+ INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR = 0x75,
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
INTEL_GUC_LOAD_STATUS_READY = 0xF0,
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
index 58012edd4eb0..0c709e6c15be 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -29,9 +29,9 @@
*/
#define GUC_KLV_LEN_MIN 1u
-#define GUC_KLV_0_KEY (0xffff << 16)
-#define GUC_KLV_0_LEN (0xffff << 0)
-#define GUC_KLV_n_VALUE (0xffffffff << 0)
+#define GUC_KLV_0_KEY (0xffffu << 16)
+#define GUC_KLV_0_LEN (0xffffu << 0)
+#define GUC_KLV_n_VALUE (0xffffffffu << 0)
/**
* DOC: GuC Self Config KLVs
@@ -101,4 +101,13 @@ enum {
GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
};
+/*
+ * Workaround keys:
+ */
+enum {
+ GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001,
+ GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002,
+ GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE = 0x9006,
+};
+
#endif /* _ABI_GUC_KLVS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
index e2e42b3e0d5d..5dc0ccd07636 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
@@ -80,6 +80,7 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s
const struct intel_gsc_cpd_header_v2 *cpd_header = NULL;
const struct intel_gsc_cpd_entry *cpd_entry = NULL;
const struct intel_gsc_manifest_header *manifest;
+ struct intel_uc_fw_ver min_ver = { 0 };
size_t min_size = sizeof(*layout);
int i;
@@ -212,6 +213,50 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s
}
}
+ /*
+ * ARL SKUs require newer firmwares, but the blob is actually common
+ * across all MTL and ARL SKUs, so we need to do an explicit version check
+ * here rather than using a separate table entry. If a too old version
+ * is found, then just don't use GSC rather than aborting the driver load.
+ * Note that the major number in the GSC FW version is used to indicate
+ * the platform, so we expect it to always be 102 for MTL/ARL binaries.
+ */
+ if (IS_ARROWLAKE_S(gt->i915))
+ min_ver = (struct intel_uc_fw_ver){ 102, 0, 10, 1878 };
+ else if (IS_ARROWLAKE_H(gt->i915) || IS_ARROWLAKE_U(gt->i915))
+ min_ver = (struct intel_uc_fw_ver){ 102, 1, 15, 1926 };
+
+ if (IS_METEORLAKE(gt->i915) && gsc->release.major != 102) {
+ gt_info(gt, "Invalid GSC firmware for MTL/ARL, got %d.%d.%d.%d but need 102.x.x.x",
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build);
+ return -EINVAL;
+ }
+
+ if (min_ver.major) {
+ bool too_old = false;
+
+ if (gsc->release.minor < min_ver.minor) {
+ too_old = true;
+ } else if (gsc->release.minor == min_ver.minor) {
+ if (gsc->release.patch < min_ver.patch) {
+ too_old = true;
+ } else if (gsc->release.patch == min_ver.patch) {
+ if (gsc->release.build < min_ver.build)
+ too_old = true;
+ }
+ }
+
+ if (too_old) {
+ gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least %d.%d.%d.%d",
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build,
+ min_ver.major, min_ver.minor,
+ min_ver.patch, min_ver.build);
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -298,7 +343,7 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
memcpy_toio(gsc->local_vaddr, src, gsc->fw.size);
memset_io(gsc->local_vaddr + gsc->fw.size, 0, gsc->local->size - gsc->fw.size);
- intel_guc_write_barrier(&gt->uc.guc);
+ intel_guc_write_barrier(gt_to_guc(gt));
i915_gem_object_unpin_map(gsc->fw.obj);
@@ -351,7 +396,7 @@ static int gsc_fw_query_compatibility_version(struct intel_gsc_uc *gsc)
void *vaddr;
int err;
- err = intel_guc_allocate_and_map_vma(&gt->uc.guc, GSC_VER_PKT_SZ * 2,
+ err = intel_guc_allocate_and_map_vma(gt_to_guc(gt), GSC_VER_PKT_SZ * 2,
&vma, &vaddr);
if (err) {
gt_err(gt, "failed to allocate vma for GSC version query\n");
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
index 40817ebcca71..d8edd7c054c8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -5,8 +5,8 @@
#include <linux/component.h>
-#include <drm/i915_component.h>
-#include <drm/i915_gsc_proxy_mei_interface.h>
+#include <drm/intel/i915_component.h>
+#include <drm/intel/i915_gsc_proxy_mei_interface.h>
#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
@@ -358,7 +358,8 @@ static int proxy_channel_alloc(struct intel_gsc_uc *gsc)
void *vaddr;
int err;
- err = intel_guc_allocate_and_map_vma(&gt->uc.guc, GSC_PROXY_CHANNEL_SIZE,
+ err = intel_guc_allocate_and_map_vma(gt_to_guc(gt),
+ GSC_PROXY_CHANNEL_SIZE,
&vma, &vaddr);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
index 453d855dd1de..3d3191deb0ab 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
@@ -302,7 +302,7 @@ void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc)
{
struct intel_gt *gt = gsc_uc_to_gt(gsc);
- if (!intel_uc_fw_is_loadable(&gsc->fw))
+ if (!intel_uc_fw_is_loadable(&gsc->fw) || intel_uc_fw_is_in_error(&gsc->fw))
return;
if (intel_gsc_uc_fw_init_done(gsc))
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 2b450c43bbd7..5949ff0b0161 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -239,8 +239,16 @@ static u32 guc_ctl_debug_flags(struct intel_guc *guc)
static u32 guc_ctl_feature_flags(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
u32 flags = 0;
+ /*
+ * Enable PXP GuC autoteardown flow.
+ * NB: MTL does things differently.
+ */
+ if (HAS_PXP(gt->i915) && !IS_METEORLAKE(gt->i915))
+ flags |= GUC_CTL_ENABLE_GUC_PXP_CTL;
+
if (!intel_guc_submission_is_used(guc))
flags |= GUC_CTL_DISABLE_SCHEDULER;
@@ -286,7 +294,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
/* Wa_22012773006:gen11,gen12 < XeHP */
if (GRAPHICS_VER(gt->i915) >= 11 &&
- GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
+ GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 55))
flags |= GUC_WA_POLLCS;
/* Wa_14014475959 */
@@ -294,6 +302,11 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
IS_DG2(gt->i915))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+ /* Wa_16019325821 */
+ /* Wa_14019159160 */
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
+ flags |= GUC_WA_RCS_CCS_SWITCHOUT;
+
/*
* Wa_14012197797
* Wa_22011391025
@@ -315,15 +328,12 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
if (IS_DG2_G11(gt->i915))
flags |= GUC_WA_CONTEXT_ISOLATION;
- /* Wa_16015675438 */
- if (!RCS_MASK(gt))
- flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
-
- /* Wa_14018913170 */
- if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0)) {
- if (IS_DG2(gt->i915) || IS_METEORLAKE(gt->i915) || IS_PONTEVECCHIO(gt->i915))
- flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
- }
+ /*
+ * Wa_14018913170: Applicable to all platforms supported by i915 so
+ * don't bother testing for all X/Y/Z platforms explicitly.
+ */
+ if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0))
+ flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
return flags;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 813cc888e6fa..57b903132776 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -204,10 +204,10 @@ struct intel_guc {
struct guc_mmio_reg *ads_regset;
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
+ /** @ads_waklv_size: size of workaround KLVs */
+ u32 ads_waklv_size;
/** @ads_capture_size: size of register lists in the ADS used for error capture */
u32 ads_capture_size;
- /** @ads_engine_usage_size: size of engine usage in the ADS */
- u32 ads_engine_usage_size;
/** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */
struct i915_vma *lrc_desc_pool_v69;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 63724e17829a..46fabbfc775e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -46,6 +46,10 @@
* +---------------------------------------+
* | padding |
* +---------------------------------------+ <== 4K aligned
+ * | w/a KLVs |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
* | capture lists |
* +---------------------------------------+
* | padding |
@@ -88,6 +92,11 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
return PAGE_ALIGN(guc->ads_golden_ctxt_size);
}
+static u32 guc_ads_waklv_size(struct intel_guc *guc)
+{
+ return PAGE_ALIGN(guc->ads_waklv_size);
+}
+
static u32 guc_ads_capture_size(struct intel_guc *guc)
{
return PAGE_ALIGN(guc->ads_capture_size);
@@ -113,7 +122,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
}
-static u32 guc_ads_capture_offset(struct intel_guc *guc)
+static u32 guc_ads_waklv_offset(struct intel_guc *guc)
{
u32 offset;
@@ -123,6 +132,16 @@ static u32 guc_ads_capture_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
}
+static u32 guc_ads_capture_offset(struct intel_guc *guc)
+{
+ u32 offset;
+
+ offset = guc_ads_waklv_offset(guc) +
+ guc_ads_waklv_size(guc);
+
+ return PAGE_ALIGN(offset);
+}
+
static u32 guc_ads_private_data_offset(struct intel_guc *guc)
{
u32 offset;
@@ -377,8 +396,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
CCS_MASK(engine->gt))
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true);
+ /*
+ * some of the WA registers are MCR registers. As it is safe to
+ * use MCR form for non-MCR registers, for code simplicity, all
+ * WA registers are added with MCR form.
+ */
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- ret |= GUC_MMIO_REG_ADD(gt, regset, wa->reg, wa->masked_reg);
+ ret |= GUC_MCR_REG_ADD(gt, regset, wa->mcr_reg, wa->masked_reg);
/* Be extra paranoid and include all whitelist registers. */
for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
@@ -388,19 +412,19 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
/* add in local MOCS registers */
for (i = 0; i < LNCFCMOCS_REG_COUNT; i++)
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false);
else
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
if (GRAPHICS_VER(engine->i915) >= 12) {
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL0)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL1)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL2)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL3)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL4)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL5)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL6)), false);
}
return ret ? -1 : 0;
@@ -498,7 +522,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt,
#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
#define XEHP_LR_HW_CONTEXT_SIZE (96 * sizeof(u32))
-#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50) ? \
+#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55) ? \
XEHP_LR_HW_CONTEXT_SIZE : \
LR_HW_CONTEXT_SIZE)
#define LRC_SKIP_SIZE(i915) (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SZ(i915))
@@ -791,6 +815,73 @@ engine_instance_list:
return PAGE_ALIGN(total_size);
}
+static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id)
+{
+ u32 size;
+ u32 klv_entry[] = {
+ /* 16:16 key/length */
+ FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
+ FIELD_PREP(GUC_KLV_0_LEN, 0),
+ /* 0 dwords data */
+ };
+
+ size = sizeof(klv_entry);
+ GEM_BUG_ON(*remain < size);
+
+ iosys_map_memcpy_to(&guc->ads_map, *offset, klv_entry, size);
+ *offset += size;
+ *remain -= size;
+}
+
+static void guc_waklv_init(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ u32 offset, addr_ggtt, remain, size;
+
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ return;
+
+ if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0))
+ return;
+
+ GEM_BUG_ON(iosys_map_is_null(&guc->ads_map));
+ offset = guc_ads_waklv_offset(guc);
+ remain = guc_ads_waklv_size(guc);
+
+ /* Wa_14019159160 */
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
+ guc_waklv_enable_simple(guc, &offset, &remain,
+ GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE);
+ guc_waklv_enable_simple(guc, &offset, &remain,
+ GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE);
+ }
+
+ /* Wa_16021333562 */
+ if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) &&
+ (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) ||
+ IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) ||
+ IS_DG2(gt->i915)))
+ guc_waklv_enable_simple(guc, &offset, &remain,
+ GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
+
+ size = guc_ads_waklv_size(guc) - remain;
+ if (!size)
+ return;
+
+ offset = guc_ads_waklv_offset(guc);
+ addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
+
+ ads_blob_write(guc, ads.wa_klv_addr_lo, addr_ggtt);
+ ads_blob_write(guc, ads.wa_klv_addr_hi, 0);
+ ads_blob_write(guc, ads.wa_klv_size, size);
+}
+
+static int guc_prep_waklv(struct intel_guc *guc)
+{
+ /* Fudge something chunky for now: */
+ return PAGE_SIZE;
+}
+
static void __guc_ads_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -838,6 +929,9 @@ static void __guc_ads_init(struct intel_guc *guc)
/* MMIO save/restore list */
guc_mmio_reg_state_init(guc);
+ /* Workaround KLV list */
+ guc_waklv_init(guc);
+
/* Private Data */
ads_blob_write(guc, ads.private_data, base +
guc_ads_private_data_offset(guc));
@@ -881,6 +975,12 @@ int intel_guc_ads_create(struct intel_guc *guc)
return ret;
guc->ads_capture_size = ret;
+ /* And don't forget the workaround KLVs: */
+ ret = guc_prep_waklv(guc);
+ if (ret < 0)
+ return ret;
+ guc->ads_waklv_size = ret;
+
/* Now the total size can be determined: */
size = guc_ads_blob_size(guc);
@@ -956,7 +1056,7 @@ u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine)
{
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u8 guc_class = engine_class_to_guc_class(engine->class);
size_t offset = offsetof(struct __guc_ads_blob,
engine_usage.engines[guc_class][ilog2(engine->logical_mask)]);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index a1cd40d80517..9547fff672bd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -51,6 +51,7 @@
{ RING_ESR(0), 0, 0, "ESR" }, \
{ RING_DMA_FADD(0), 0, 0, "RING_DMA_FADD_LDW" }, \
{ RING_DMA_FADD_UDW(0), 0, 0, "RING_DMA_FADD_UDW" }, \
+ { RING_EIR(0), 0, 0, "EIR" }, \
{ RING_IPEIR(0), 0, 0, "IPEIR" }, \
{ RING_IPEHR(0), 0, 0, "IPEHR" }, \
{ RING_INSTPS(0), 0, 0, "INSTPS" }, \
@@ -80,9 +81,6 @@
{ GEN8_RING_PDP_LDW(0, 3), 0, 0, "PDP3_LDW" }, \
{ GEN8_RING_PDP_UDW(0, 3), 0, 0, "PDP3_UDW" }
-#define COMMON_BASE_HAS_EU \
- { EIR, 0, 0, "EIR" }
-
#define COMMON_BASE_RENDER \
{ GEN7_SC_INSTDONE, 0, 0, "GEN7_SC_INSTDONE" }
@@ -105,7 +103,6 @@ static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
/* XE_LP Render / Compute Per-Class */
static const struct __guc_mmio_reg_descr xe_lp_rc_class_regs[] = {
- COMMON_BASE_HAS_EU,
COMMON_BASE_RENDER,
COMMON_GEN12BASE_RENDER,
};
@@ -148,7 +145,6 @@ static const struct __guc_mmio_reg_descr gen8_global_regs[] = {
};
static const struct __guc_mmio_reg_descr gen8_rc_class_regs[] = {
- COMMON_BASE_HAS_EU,
COMMON_BASE_RENDER,
};
@@ -1441,7 +1437,7 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf,
if (!cap || !ee->engine)
return -ENODEV;
- guc = &ee->engine->gt->uc.guc;
+ guc = gt_to_guc(ee->engine->gt);
i915_error_printf(ebuf, "global --- GuC Error Capture on %s command stream:\n",
ee->engine->name);
@@ -1543,7 +1539,7 @@ bool intel_guc_capture_is_matching_engine(struct intel_gt *gt,
if (!gt || !ce || !engine)
return false;
- guc = &gt->uc.guc;
+ guc = gt_to_guc(gt);
if (!guc->capture)
return false;
@@ -1573,7 +1569,7 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt,
if (!gt || !ee || !ce)
return;
- guc = &gt->uc.guc;
+ guc = gt_to_guc(gt);
if (!guc->capture)
return;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 0f79cb658518..fe53e8eccf4b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -26,7 +26,7 @@ static void guc_prepare_xfer(struct intel_gt *gt)
GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
GUC_ENABLE_MIA_CLOCK_GATING;
- if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 55))
shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
GUC_ENABLE_MIA_CACHING;
@@ -115,6 +115,7 @@ static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool
case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID:
case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID:
case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
*success = false;
return true;
}
@@ -144,7 +145,7 @@ static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool
* an end user should hit the timeout is in case of extreme thermal throttling.
* And a system that is that hot during boot is probably dead anyway!
*/
-#if defined(CONFIG_DRM_I915_DEBUG_GEM)
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
#define GUC_LOAD_RETRY_LIMIT 20
#else
#define GUC_LOAD_RETRY_LIMIT 3
@@ -184,7 +185,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
* in the seconds range. However, there is a limit on how long an
* individual wait_for() can wait. So wrap it in a loop.
*/
- before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps);
+ before_freq = intel_rps_read_actual_frequency(&gt->rps);
before = ktime_get();
for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) {
ret = wait_for(guc_load_done(uncore, &status, &success), 1000);
@@ -192,7 +193,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
break;
guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz, status = 0x%08X [0x%02X/%02X]\n",
- count, intel_rps_read_actual_frequency(&uncore->gt->rps), status,
+ count, intel_rps_read_actual_frequency(&gt->rps), status,
REG_FIELD_GET(GS_BOOTROM_MASK, status),
REG_FIELD_GET(GS_UKERNEL_MASK, status));
}
@@ -204,7 +205,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n",
- status, delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), ret);
+ status, delta_ms, intel_rps_read_actual_frequency(&gt->rps), ret);
guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
REG_FIELD_GET(GS_MIA_IN_RESET, status),
bootrom, ukernel,
@@ -241,6 +242,11 @@ static int guc_wait_ucode(struct intel_guc *guc)
ret = -EPERM;
break;
+ case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
+ guc_info(guc, "invalid w/a KLV entry\n");
+ ret = -EINVAL;
+ break;
+
case INTEL_GUC_LOAD_STATUS_HWCONFIG_START:
guc_info(guc, "still extracting hwconfig table.\n");
ret = -ETIMEDOUT;
@@ -254,11 +260,11 @@ static int guc_wait_ucode(struct intel_guc *guc)
guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
delta_ms, status, count, ret);
guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n",
- intel_rps_read_actual_frequency(&uncore->gt->rps), before_freq,
+ intel_rps_read_actual_frequency(&gt->rps), before_freq,
intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
} else {
guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
- delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps),
+ delta_ms, intel_rps_read_actual_frequency(&gt->rps),
before_freq, status, count, ret);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 8ae1846431da..4ce6e2332a63 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -96,14 +96,16 @@
#define GUC_WA_GAM_CREDITS BIT(10)
#define GUC_WA_DUAL_QUEUE BIT(11)
#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13)
-#define GUC_WA_CONTEXT_ISOLATION BIT(15)
#define GUC_WA_PRE_PARSER BIT(14)
+#define GUC_WA_CONTEXT_ISOLATION BIT(15)
+#define GUC_WA_RCS_CCS_SWITCHOUT BIT(16)
#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17)
#define GUC_WA_POLLCS BIT(18)
#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21)
#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22)
#define GUC_CTL_FEATURE 2
+#define GUC_CTL_ENABLE_GUC_PXP_CTL BIT(1)
#define GUC_CTL_ENABLE_SLPC BIT(2)
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
@@ -294,7 +296,7 @@ struct guc_update_scheduling_policy_header {
} __packed;
/*
- * Can't dynmically allocate memory for the scheduling policy KLV because
+ * Can't dynamically allocate memory for the scheduling policy KLV because
* it will be sent from within the reset path. Need a fixed size lump on
* the stack instead :(.
*
@@ -430,7 +432,10 @@ struct guc_ads {
u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
- u32 reserved[14];
+ u32 wa_klv_addr_lo;
+ u32 wa_klv_addr_hi;
+ u32 wa_klv_size;
+ u32 reserved[11];
} __packed;
/* Engine usage stats */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
index cc9569af7f0c..b67a15f74276 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -111,7 +111,7 @@ static bool has_table(struct drm_i915_private *i915)
static int guc_hwconfig_init(struct intel_gt *gt)
{
struct intel_hwconfig *hwconfig = &gt->info.hwconfig;
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
int ret;
if (!has_table(gt->i915))
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index bf16351c9349..e8a04e476c57 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -14,11 +14,11 @@
#include "intel_guc_log.h"
#include "intel_guc_print.h"
-#if defined(CONFIG_DRM_I915_DEBUG_GUC)
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
-#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
+#elif IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 3e681ab6fbf9..706fffca698b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -537,6 +537,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val)
return ret;
}
+int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val)
+{
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ intel_wakeref_t wakeref;
+ int ret = 0;
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_STRATEGIES,
+ val);
+
+ return ret;
+}
+
int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
@@ -711,6 +725,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Set cached media freq ratio mode */
intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
+ /* Enable SLPC Optimized Strategy for compute */
+ intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 6ac6503c39d4..1cb5fd44f05c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -45,5 +45,6 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val);
+int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a259f1118c5a..3fce5c000144 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -236,6 +236,13 @@ set_context_destroyed(struct intel_context *ce)
ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
}
+static inline void
+clr_context_destroyed(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED;
+}
+
static inline bool context_pending_disable(struct intel_context *ce)
{
return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
@@ -391,7 +398,7 @@ static inline void set_context_guc_id_invalid(struct intel_context *ce)
static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
{
- return &ce->engine->gt->uc.guc;
+ return gt_to_guc(ce->engine->gt);
}
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@@ -613,6 +620,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
u32 g2h_len_dw,
bool loop)
{
+ int ret;
+
/*
* We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
* so we don't handle the case where we don't get a reply because we
@@ -623,7 +632,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
if (g2h_len_dw)
atomic_inc(&guc->outstanding_submission_g2h);
- return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ if (ret)
+ atomic_dec(&guc->outstanding_submission_g2h);
+
+ return ret;
}
int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
@@ -1230,10 +1243,25 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine,
} while (++i < 6);
}
+static void __set_engine_usage_record(struct intel_engine_cs *engine,
+ u32 last_in, u32 id, u32 total)
+{
+ struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
+
+#define record_write(map_, field_, val_) \
+ iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_)
+
+ record_write(&rec_map, last_switch_in_stamp, last_in);
+ record_write(&rec_map, current_context_index, id);
+ record_write(&rec_map, total_runtime, total);
+
+#undef record_write
+}
+
static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
{
struct intel_engine_guc_stats *stats = &engine->stats.guc;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u32 last_switch, ctx_id, total;
lockdep_assert_held(&guc->timestamp.lock);
@@ -1298,7 +1326,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
struct intel_gt *gt = engine->gt;
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
u64 total, gt_stamp_saved;
unsigned long flags;
u32 reset_count;
@@ -1326,7 +1354,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
* start_gt_clk is derived from GuC state. To get a consistent
* view of activity, we query the GuC state only if gt is awake.
*/
- wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt);
+ wakeref = in_reset ? NULL : intel_gt_pm_get_if_awake(gt);
if (wakeref) {
stats_saved = *stats;
gt_stamp_saved = guc->timestamp.gt_stamp;
@@ -1350,9 +1378,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
total += intel_gt_clock_interval_to_ns(gt, clk);
}
+ if (total > stats->total)
+ stats->total = total;
+
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
- return ns_to_ktime(total);
+ return ns_to_ktime(stats->total);
}
static void guc_enable_busyness_worker(struct intel_guc *guc)
@@ -1362,7 +1393,48 @@ static void guc_enable_busyness_worker(struct intel_guc *guc)
static void guc_cancel_busyness_worker(struct intel_guc *guc)
{
- cancel_delayed_work_sync(&guc->timestamp.work);
+ /*
+ * There are many different call stacks that can get here. Some of them
+ * hold the reset mutex. The busyness worker also attempts to acquire the
+ * reset mutex. Synchronously flushing a worker thread requires acquiring
+ * the worker mutex. Lockdep sees this as a conflict. It thinks that the
+ * flush can deadlock because it holds the worker mutex while waiting for
+ * the reset mutex, but another thread is holding the reset mutex and might
+ * attempt to use other worker functions.
+ *
+ * In practice, this scenario does not exist because the busyness worker
+ * does not block waiting for the reset mutex. It does a try-lock on it and
+ * immediately exits if the lock is already held. Unfortunately, the mutex
+ * in question (I915_RESET_BACKOFF) is an i915 implementation which has lockdep
+ * annotation but not to the extent of explaining the 'might lock' is also a
+ * 'does not need to lock'. So one option would be to add more complex lockdep
+ * annotations to ignore the issue (if at all possible). A simpler option is to
+ * just not flush synchronously when a rest in progress. Given that the worker
+ * will just early exit and re-schedule itself anyway, there is no advantage
+ * to running it immediately.
+ *
+ * If a reset is not in progress, then the synchronous flush may be required.
+ * As noted many call stacks lead here, some during suspend and driver unload
+ * which do require a synchronous flush to make sure the worker is stopped
+ * before memory is freed.
+ *
+ * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
+ * every possible call stack is unfeasible. It would be too intrusive to many
+ * areas that really don't care about the GuC backend. However, there is the
+ * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked.
+ * So just use those. Note that testing both is required due to the hideously
+ * complex nature of the i915 driver's reset code paths.
+ *
+ * And note that in the case of a reset occurring during driver unload
+ * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the
+ * reset flag/mutex are set) is fine because there is another explicit cancel in
+ * intel_guc_submission_fini (when the reset flag/mutex are not).
+ */
+ if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) ||
+ test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags))
+ cancel_delayed_work(&guc->timestamp.work);
+ else
+ cancel_delayed_work_sync(&guc->timestamp.work);
}
static void __reset_guc_busyness_stats(struct intel_guc *guc)
@@ -1373,19 +1445,43 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
unsigned long flags;
ktime_t unused;
- guc_cancel_busyness_worker(guc);
-
spin_lock_irqsave(&guc->timestamp.lock, flags);
guc_update_pm_timestamp(guc, &unused);
for_each_engine(engine, gt, id) {
+ struct intel_engine_guc_stats *stats = &engine->stats.guc;
+
guc_update_engine_gt_clks(engine);
- engine->stats.guc.prev_total = 0;
+
+ /*
+ * If resetting a running context, accumulate the active
+ * time as well since there will be no context switch.
+ */
+ if (stats->running) {
+ u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
+
+ stats->total_gt_clks += clk;
+ }
+ stats->prev_total = 0;
+ stats->running = 0;
}
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
}
+static void __update_guc_busyness_running_state(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ for_each_engine(engine, gt, id)
+ engine->stats.guc.running = false;
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
static void __update_guc_busyness_stats(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -1491,6 +1587,9 @@ err_trylock:
static int guc_action_enable_usage_stats(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
u32 offset = intel_guc_engine_usage_offset(guc);
u32 action[] = {
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
@@ -1498,6 +1597,9 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc)
0,
};
+ for_each_engine(engine, gt, id)
+ __set_engine_usage_record(engine, 0, 0xffffffff, 0);
+
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
@@ -1525,11 +1627,14 @@ static void guc_fini_engine_stats(struct intel_guc *guc)
void intel_guc_busyness_park(struct intel_gt *gt)
{
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
if (!guc_submission_initialized(guc))
return;
+ /* Assume no engines are running and set running state to false */
+ __update_guc_busyness_running_state(guc);
+
/*
* There is a race with suspend flow where the worker runs after suspend
* and causes an unclaimed register access warning. Cancel the worker
@@ -1552,7 +1657,7 @@ void intel_guc_busyness_park(struct intel_gt *gt)
void intel_guc_busyness_unpark(struct intel_gt *gt)
{
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
unsigned long flags;
ktime_t unused;
@@ -1613,6 +1718,11 @@ static void guc_flush_submissions(struct intel_guc *guc)
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
+void intel_guc_submission_flush_work(struct intel_guc *guc)
+{
+ flush_work(&guc->submission_state.destroyed_worker);
+}
+
static void guc_flush_destroyed_contexts(struct intel_guc *guc);
void intel_guc_submission_reset_prepare(struct intel_guc *guc)
@@ -1631,6 +1741,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
spin_lock_irq(guc_to_gt(guc)->irq_lock);
spin_unlock_irq(guc_to_gt(guc)->irq_lock);
+ /* Flush tasklet */
+ tasklet_disable(&guc->ct.receive_tasklet);
+ tasklet_enable(&guc->ct.receive_tasklet);
+
guc_flush_submissions(guc);
guc_flush_destroyed_contexts(guc);
flush_work(&guc->ct.requests.worker);
@@ -1948,19 +2062,25 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
+ int outstanding;
+
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
+ !intel_guc_is_fw_running(guc) ||
intel_gt_is_wedged(guc_to_gt(guc)))) {
return;
}
/*
* Technically possible for either of these values to be non-zero here,
- * but very unlikely + harmless. Regardless let's add a warn so we can
+ * but very unlikely + harmless. Regardless let's add an error so we can
* see in CI if this happens frequently / a precursor to taking down the
* machine.
*/
- GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
+ outstanding = atomic_read(&guc->outstanding_submission_g2h);
+ if (outstanding)
+ guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n",
+ outstanding);
atomic_set(&guc->outstanding_submission_g2h, 0);
intel_guc_global_policies_update(guc);
@@ -2072,6 +2192,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
if (!guc->submission_initialized)
return;
+ guc_fini_engine_stats(guc);
guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
@@ -2130,7 +2251,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
static void guc_submit_request(struct i915_request *rq)
{
struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
- struct intel_guc *guc = &rq->engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(rq->engine->gt);
unsigned long flags;
/* Will be called from irq-context when using foreign fences. */
@@ -2156,11 +2277,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
order_base_2(ce->parallel.number_children
+ 1));
else
- ret = ida_simple_get(&guc->submission_state.guc_ids,
- NUMBER_MULTI_LRC_GUC_ID(guc),
- guc->submission_state.num_guc_ids,
- GFP_KERNEL | __GFP_RETRY_MAYFAIL |
- __GFP_NOWARN);
+ ret = ida_alloc_range(&guc->submission_state.guc_ids,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
+ guc->submission_state.num_guc_ids - 1,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(ret < 0))
return ret;
@@ -2183,8 +2303,8 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
+ 1));
} else {
--guc->submission_state.guc_ids_in_use;
- ida_simple_remove(&guc->submission_state.guc_ids,
- ce->guc_id.id);
+ ida_free(&guc->submission_state.guc_ids,
+ ce->guc_id.id);
}
clr_ctx_id_mapping(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
@@ -2581,6 +2701,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
+MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY)
#undef MAKE_CONTEXT_POLICY_ADD
@@ -2596,10 +2717,11 @@ static int __guc_context_set_context_policies(struct intel_guc *guc,
static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
struct context_policy policy;
u32 execution_quantum;
u32 preemption_timeout;
+ u32 slpc_ctx_freq_req = 0;
unsigned long flags;
int ret;
@@ -2611,11 +2733,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
execution_quantum = engine->props.timeslice_duration_ms * 1000;
preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+ if (ce->flags & BIT(CONTEXT_LOW_LATENCY))
+ slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
+
__guc_context_policy_start_klv(&policy, ce->guc_id.id);
__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
__guc_context_policy_add_execution_quantum(&policy, execution_quantum);
__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+ __guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req);
if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
__guc_context_policy_add_preempt_to_idle(&policy, 1);
@@ -2672,7 +2798,7 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
static void prepare_context_registration_info_v69(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u32 ctx_id = ce->guc_id.id;
struct guc_lrc_desc_v69 *desc;
struct intel_context *child;
@@ -2741,7 +2867,7 @@ static void prepare_context_registration_info_v70(struct intel_context *ce,
struct guc_ctxt_registration_info *info)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u32 ctx_id = ce->guc_id.id;
GEM_BUG_ON(!engine->mask);
@@ -2778,9 +2904,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce,
ce->parallel.guc.wqi_tail = 0;
ce->parallel.guc.wqi_head = 0;
- wq_desc_offset = i915_ggtt_offset(ce->state) +
+ wq_desc_offset = (u64)i915_ggtt_offset(ce->state) +
__get_parent_scratch_offset(ce);
- wq_base_offset = i915_ggtt_offset(ce->state) +
+ wq_base_offset = (u64)i915_ggtt_offset(ce->state) +
__get_wq_offset(ce);
info->wq_desc_lo = lower_32_bits(wq_desc_offset);
info->wq_desc_hi = upper_32_bits(wq_desc_offset);
@@ -2804,7 +2930,7 @@ static int try_context_registration(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
intel_wakeref_t wakeref;
u32 ctx_id = ce->guc_id.id;
bool context_registered;
@@ -3283,12 +3409,13 @@ static void guc_context_close(struct intel_context *ce)
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
}
-static inline void guc_lrc_desc_unpin(struct intel_context *ce)
+static inline int guc_lrc_desc_unpin(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
struct intel_gt *gt = guc_to_gt(guc);
unsigned long flags;
bool disabled;
+ int ret;
GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
@@ -3299,18 +3426,41 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
spin_lock_irqsave(&ce->guc_state.lock, flags);
disabled = submission_disabled(guc);
if (likely(!disabled)) {
+ /*
+ * Take a gt-pm ref and change context state to be destroyed.
+ * NOTE: a G2H IRQ that comes after will put this gt-pm ref back
+ */
__intel_gt_pm_get(gt);
set_context_destroyed(ce);
clr_context_registered(ce);
}
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
if (unlikely(disabled)) {
release_guc_id(guc, ce);
__guc_context_destroy(ce);
- return;
+ return 0;
}
- deregister_context(ce, ce->guc_id.id);
+ /*
+ * GuC is active, lets destroy this context, but at this point we can still be racing
+ * with suspend, so we undo everything if the H2G fails in deregister_context so
+ * that GuC reset will find this context during clean up.
+ */
+ ret = deregister_context(ce, ce->guc_id.id);
+ if (ret) {
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ set_context_registered(ce);
+ clr_context_destroyed(ce);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ /*
+ * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements
+ * the wakeref immediately but per function spec usage call this after unlock.
+ */
+ intel_wakeref_put_async(&gt->wakeref);
+ }
+
+ return ret;
}
static void __guc_context_destroy(struct intel_context *ce)
@@ -3378,7 +3528,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc)
if (!ce)
break;
- guc_lrc_desc_unpin(ce);
+ if (guc_lrc_desc_unpin(ce)) {
+ /*
+ * This means GuC's CT link severed mid-way which could happen
+ * in suspend-resume corner cases. In this case, put the
+ * context back into the destroyed_contexts list which will
+ * get picked up on the next context deregistration event or
+ * purged in a GuC sanitization event (reset/unload/wedged/...).
+ */
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ list_add_tail(&ce->destroyed_link,
+ &guc->submission_state.destroyed_contexts);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+ /* Bail now since the list might never be emptied if h2gs fail */
+ break;
+ }
+
}
}
@@ -3389,6 +3554,17 @@ static void destroyed_worker_func(struct work_struct *w)
struct intel_gt *gt = guc_to_gt(guc);
intel_wakeref_t wakeref;
+ /*
+ * In rare cases we can get here via async context-free fence-signals that
+ * come very late in suspend flow or very early in resume flows. In these
+ * cases, GuC won't be ready but just skipping it here is fine as these
+ * pending-destroy-contexts get destroyed totally at GuC reset time at the
+ * end of suspend.. OR.. this worker can be picked up later on the next
+ * context destruction trigger after resume-completes
+ */
+ if (!intel_guc_is_ready(guc))
+ return;
+
with_intel_gt_pm(gt, wakeref)
deregister_destroyed_contexts(guc);
}
@@ -4153,20 +4329,25 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq,
u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
/* Short circuit function */
- if (prio < I915_PRIORITY_NORMAL ||
- rq->guc_prio == GUC_PRIO_FINI ||
- (rq->guc_prio != GUC_PRIO_INIT &&
- !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
+ if (prio < I915_PRIORITY_NORMAL)
return;
spin_lock(&ce->guc_state.lock);
- if (rq->guc_prio != GUC_PRIO_FINI) {
- if (rq->guc_prio != GUC_PRIO_INIT)
- sub_context_inflight_prio(ce, rq->guc_prio);
- rq->guc_prio = new_guc_prio;
- add_context_inflight_prio(ce, rq->guc_prio);
- update_context_prio(ce);
- }
+
+ if (rq->guc_prio == GUC_PRIO_FINI)
+ goto exit;
+
+ if (!new_guc_prio_higher(rq->guc_prio, new_guc_prio))
+ goto exit;
+
+ if (rq->guc_prio != GUC_PRIO_INIT)
+ sub_context_inflight_prio(ce, rq->guc_prio);
+
+ rq->guc_prio = new_guc_prio;
+ add_context_inflight_prio(ce, rq->guc_prio);
+ update_context_prio(ce);
+
+exit:
spin_unlock(&ce->guc_state.lock);
}
@@ -4382,7 +4563,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
if (engine->class == COMPUTE_CLASS)
if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(engine->i915))
- engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
+
+ /* Wa_16019325821 */
+ /* Wa_14019159160 */
+ if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) &&
+ IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
/*
* TODO: GuC supports timeslicing and semaphores as well, but they're
@@ -4393,7 +4580,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
*/
engine->emit_bb_start = gen8_emit_bb_start;
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
engine->emit_bb_start = xehp_emit_bb_start;
}
@@ -4435,7 +4622,7 @@ static void guc_sched_engine_destroy(struct kref *kref)
int intel_guc_submission_setup(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
/*
* The setup relies on several assumptions (e.g. irqs always enabled)
@@ -5194,7 +5381,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
void intel_guc_find_hung_context(struct intel_engine_cs *engine)
{
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
struct intel_context *ce;
struct i915_request *rq;
unsigned long index;
@@ -5256,7 +5443,7 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
struct i915_request *hung_rq,
struct drm_printer *m)
{
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
struct intel_context *ce;
unsigned long index;
unsigned long flags;
@@ -5348,12 +5535,20 @@ static inline void guc_log_context(struct drm_printer *p,
{
drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
- drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
- ce->ring->head,
- ce->lrc_reg_state[CTX_RING_HEAD]);
- drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
- ce->ring->tail,
- ce->lrc_reg_state[CTX_RING_TAIL]);
+ if (intel_context_pin_if_active(ce)) {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
+ ce->ring->head,
+ ce->lrc_reg_state[CTX_RING_HEAD]);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
+ ce->ring->tail,
+ ce->lrc_reg_state[CTX_RING_TAIL]);
+ intel_context_unpin(ce);
+ } else {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n",
+ ce->ring->head);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n",
+ ce->ring->tail);
+ }
drm_printf(p, "\t\tContext Pin Count: %u\n",
atomic_read(&ce->pin_count));
drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
@@ -5708,7 +5903,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
if (!ve)
return ERR_PTR(-ENOMEM);
- guc = &siblings[0]->gt->uc.guc;
+ guc = gt_to_guc(siblings[0]->gt);
ve->base.i915 = siblings[0]->i915;
ve->base.gt = siblings[0]->gt;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index c57b29cdb1a6..b6df75622d3b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -38,6 +38,8 @@ int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
bool interruptible,
long timeout);
+void intel_guc_submission_flush_work(struct intel_guc *guc);
+
static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
{
return guc->submission_supported;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ba9e07fc2b57..b3cbf85c00cb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -6,6 +6,7 @@
#include <linux/types.h>
#include "gt/intel_gt.h"
+#include "gt/intel_rps.h"
#include "intel_guc_reg.h"
#include "intel_huc.h"
#include "intel_huc_print.h"
@@ -384,7 +385,7 @@ int intel_huc_init(struct intel_huc *huc)
if (HAS_ENGINE(gt, GSC0)) {
struct i915_vma *vma;
- vma = intel_guc_allocate_vma(&gt->uc.guc, PXP43_HUC_AUTH_INOUT_SIZE * 2);
+ vma = intel_guc_allocate_vma(gt_to_guc(gt), PXP43_HUC_AUTH_INOUT_SIZE * 2);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
huc_info(huc, "Failed to allocate heci pkt\n");
@@ -426,19 +427,6 @@ void intel_huc_fini(struct intel_huc *huc)
intel_uc_fw_fini(&huc->fw);
}
-void intel_huc_suspend(struct intel_huc *huc)
-{
- if (!intel_uc_fw_is_loadable(&huc->fw))
- return;
-
- /*
- * in the unlikely case that we're suspending before the GSC has
- * completed its loading sequence, just stop waiting. We'll restart
- * on resume.
- */
- delayed_huc_load_complete(huc);
-}
-
static const char *auth_mode_string(struct intel_huc *huc,
enum intel_huc_authentication_type type)
{
@@ -447,17 +435,68 @@ static const char *auth_mode_string(struct intel_huc *huc,
return partial ? "clear media" : "all workloads";
}
+/*
+ * Use a longer timeout for debug builds so that problems can be detected
+ * and analysed. But a shorter timeout for releases so that user's don't
+ * wait forever to find out there is a problem. Note that the only reason
+ * an end user should hit the timeout is in case of extreme thermal throttling.
+ * And a system that is that hot during boot is probably dead anyway!
+ */
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+#define HUC_LOAD_RETRY_LIMIT 20
+#else
+#define HUC_LOAD_RETRY_LIMIT 3
+#endif
+
int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
- int ret;
+ struct intel_uncore *uncore = gt->uncore;
+ ktime_t before, after, delta;
+ int ret, count;
+ u64 delta_ms;
+ u32 before_freq;
- ret = __intel_wait_for_register(gt->uncore,
- huc->status[type].reg,
- huc->status[type].mask,
- huc->status[type].value,
- 2, 50, NULL);
+ /*
+ * The KMD requests maximum frequency during driver load, however thermal
+ * throttling can force the frequency down to minimum (although the board
+ * really should never get that hot in real life!). IFWI issues have been
+ * seen to cause sporadic failures to grant the higher frequency. And at
+ * minimum frequency, the authentication time can be in the seconds range.
+ * Note that there is a limit on how long an individual wait_for() can wait.
+ * So wrap it in a loop.
+ */
+ before_freq = intel_rps_read_actual_frequency(&gt->rps);
+ before = ktime_get();
+ for (count = 0; count < HUC_LOAD_RETRY_LIMIT; count++) {
+ ret = __intel_wait_for_register(gt->uncore,
+ huc->status[type].reg,
+ huc->status[type].mask,
+ huc->status[type].value,
+ 2, 1000, NULL);
+ if (!ret)
+ break;
+
+ huc_dbg(huc, "auth still in progress, count = %d, freq = %dMHz, status = 0x%08X\n",
+ count, intel_rps_read_actual_frequency(&gt->rps),
+ huc->status[type].reg.reg);
+ }
+ after = ktime_get();
+ delta = ktime_sub(after, before);
+ delta_ms = ktime_to_ms(delta);
+
+ if (delta_ms > 50) {
+ huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
+ delta_ms, huc->status[type].reg.reg, count, ret);
+ huc_warn(huc, "excessive auth time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n",
+ intel_rps_read_actual_frequency(&gt->rps), before_freq,
+ intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
+ } else {
+ huc_dbg(huc, "auth took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
+ delta_ms, intel_rps_read_actual_frequency(&gt->rps),
+ before_freq, huc->status[type].reg.reg, count, ret);
+ }
/* mark the load process as complete even if the wait failed */
delayed_huc_load_complete(huc);
@@ -488,7 +527,7 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
int ret;
if (!intel_uc_fw_is_loaded(&huc->fw))
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index ba5cb08e9e7b..d5e441b9e08d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -57,7 +57,6 @@ int intel_huc_sanitize(struct intel_huc *huc);
void intel_huc_init_early(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc);
-void intel_huc_suspend(struct intel_huc *huc);
int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type);
int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
enum intel_huc_authentication_type type);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 3872d309ed31..5b8080ec5315 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -50,10 +50,6 @@ static void uc_expand_default_options(struct intel_uc *uc)
/* Default: enable HuC authentication and GuC submission */
i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION;
-
- /* XEHPSDV and PVC do not use HuC */
- if (IS_XEHPSDV(i915) || IS_PONTEVECCHIO(i915))
- i915->params.enable_guc &= ~ENABLE_GUC_LOAD_HUC;
}
/* Reset GuC providing us with fresh state for both GuC and HuC.
@@ -103,7 +99,7 @@ static void __confirm_options(struct intel_uc *uc)
}
if (!intel_uc_supports_guc(uc))
- gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
+ gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
i915->params.enable_guc, "GuC is not supported!");
if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
@@ -637,10 +633,14 @@ void intel_uc_reset_finish(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
+ /*
+ * NB: The wedge code path results in prepare -> prepare -> finish -> finish.
+ * So this function is sometimes called with the in-progress flag not set.
+ */
uc->reset_in_progress = false;
/* Firmware expected to be running when this function is called */
- if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc))
+ if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_reset_finish(guc);
}
@@ -690,6 +690,8 @@ void intel_uc_suspend(struct intel_uc *uc)
return;
}
+ intel_guc_submission_flush_work(guc);
+
with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) {
err = intel_guc_suspend(guc);
if (err)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 756093eaf2ad..ec33ad942115 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -698,12 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt,
const struct firmware *fw,
struct intel_uc_fw *uc_fw)
{
+ int ret;
+
switch (uc_fw->type) {
case INTEL_UC_FW_TYPE_HUC:
- intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ ret = intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ if (ret)
+ return ret;
break;
case INTEL_UC_FW_TYPE_GSC:
- intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ ret = intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ if (ret)
+ return ret;
break;
default:
MISSING_CASE(uc_fw->type);
@@ -807,7 +813,7 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **
static int check_mtl_huc_guc_compatibility(struct intel_gt *gt,
struct intel_uc_fw_file *huc_selected)
{
- struct intel_uc_fw_file *guc_selected = &gt->uc.guc.fw.file_selected;
+ struct intel_uc_fw_file *guc_selected = &gt_to_guc(gt)->fw.file_selected;
struct intel_uc_fw_ver *huc_ver = &huc_selected->ver;
struct intel_uc_fw_ver *guc_ver = &guc_selected->ver;
bool new_huc, new_guc;
@@ -1209,7 +1215,7 @@ static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw)
* since its GGTT offset will be GuC accessible.
*/
GEM_BUG_ON(uc_fw->rsa_size > PAGE_SIZE);
- vma = intel_guc_allocate_vma(&gt->uc.guc, PAGE_SIZE);
+ vma = intel_guc_allocate_vma(gt_to_guc(gt), PAGE_SIZE);
if (IS_ERR(vma))
return PTR_ERR(vma);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 9a431726c8d5..ac7b3aad2222 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -258,6 +258,11 @@ static inline bool intel_uc_fw_is_running(struct intel_uc_fw *uc_fw)
return __intel_uc_fw_status(uc_fw) == INTEL_UC_FIRMWARE_RUNNING;
}
+static inline bool intel_uc_fw_is_in_error(struct intel_uc_fw *uc_fw)
+{
+ return intel_uc_fw_status_to_error(__intel_uc_fw_status(uc_fw)) != 0;
+}
+
static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw)
{
return uc_fw->user_overridden;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index c900aac85adb..68feb55654f7 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -144,7 +144,7 @@ err:
static int intel_guc_steal_guc_ids(void *arg)
{
struct intel_gt *gt = arg;
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
int ret, sv, context_index = 0;
intel_wakeref_t wakeref;
struct intel_engine_cs *engine;