summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.c35
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c10
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.c3
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c225
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h21
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c197
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c30
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h45
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h36
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c303
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h58
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c60
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c92
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c566
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.c28
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c425
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h112
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_defines.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c68
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c118
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c78
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h45
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c33
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_print.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h281
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c231
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c86
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h62
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c273
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c83
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c144
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c233
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c69
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c312
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset_types.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c33
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c96
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c310
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sa_media.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c83
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.c173
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_wopcm.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_wopcm.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c1134
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c10
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c24
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c122
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c27
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c7
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c12
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.c1
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c82
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c70
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c5
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c63
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c20
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c44
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_slpc.c72
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c18
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c405
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/README6
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm2
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm2
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c14
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c78
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h33
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h26
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h19
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h24
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h135
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c388
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c429
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c265
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h40
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c39
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c231
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h93
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c120
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h125
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c160
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c367
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c208
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h19
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c168
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h57
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c43
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_print.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c302
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c845
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c330
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h39
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c225
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_print.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c105
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c612
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h35
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c161
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c25
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c11
156 files changed, 9568 insertions, 3992 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
index 1c82caf525c3..8c01fb6d4e7b 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
@@ -76,7 +76,7 @@ int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
cmd = MI_FLUSH;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_EXE_FLUSH;
- if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
+ if (IS_G4X(rq->i915) || GRAPHICS_VER(rq->i915) == 5)
cmd |= MI_INVALIDATE_ISP;
}
@@ -169,7 +169,7 @@ static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
return cs;
}
-u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
return __gen2_emit_breadcrumb(rq, cs, 16, 8);
}
@@ -179,7 +179,7 @@ u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
return __gen2_emit_breadcrumb(rq, cs, 8, 8);
}
-/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
+/* Just userspace ABI convention to limit the wa batch bo to a reasonable size */
#define I830_BATCH_LIMIT SZ_256K
#define I830_TLB_ENTRIES (2)
#define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
@@ -248,7 +248,7 @@ int i830_emit_bb_start(struct i915_request *rq,
return 0;
}
-int gen3_emit_bb_start(struct i915_request *rq,
+int gen2_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags)
{
@@ -292,32 +292,15 @@ int gen4_emit_bb_start(struct i915_request *rq,
void gen2_irq_enable(struct intel_engine_cs *engine)
{
- struct drm_i915_private *i915 = engine->i915;
-
- i915->irq_mask &= ~engine->irq_enable_mask;
- intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
- ENGINE_POSTING_READ16(engine, RING_IMR);
-}
-
-void gen2_irq_disable(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
-
- i915->irq_mask |= engine->irq_enable_mask;
- intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
-}
-
-void gen3_irq_enable(struct intel_engine_cs *engine)
-{
- engine->i915->irq_mask &= ~engine->irq_enable_mask;
- intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
+ engine->i915->gen2_imr_mask &= ~engine->irq_enable_mask;
+ intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->gen2_imr_mask);
intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
}
-void gen3_irq_disable(struct intel_engine_cs *engine)
+void gen2_irq_disable(struct intel_engine_cs *engine)
{
- engine->i915->irq_mask |= engine->irq_enable_mask;
- intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
+ engine->i915->gen2_imr_mask |= engine->irq_enable_mask;
+ intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->gen2_imr_mask);
}
void gen5_irq_enable(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
index a5cd64a65c9e..7b37560fc356 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h
@@ -15,13 +15,13 @@ int gen2_emit_flush(struct i915_request *rq, u32 mode);
int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode);
int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode);
-u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs);
+u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs);
u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs);
int i830_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags);
-int gen3_emit_bb_start(struct i915_request *rq,
+int gen2_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags);
int gen4_emit_bb_start(struct i915_request *rq,
@@ -30,8 +30,6 @@ int gen4_emit_bb_start(struct i915_request *rq,
void gen2_irq_enable(struct intel_engine_cs *engine);
void gen2_irq_disable(struct intel_engine_cs *engine);
-void gen3_irq_enable(struct intel_engine_cs *engine);
-void gen3_irq_disable(struct intel_engine_cs *engine);
void gen5_irq_enable(struct intel_engine_cs *engine);
void gen5_irq_disable(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 5aaacc53fa4c..c2bdc133c89a 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -109,7 +109,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags)
{
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
@@ -117,7 +117,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE;
unsigned int act_pt = first_entry / GEN6_PTES;
unsigned int act_pte = first_entry % GEN6_PTES;
- const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
+ const u32 pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_dma iter = sgt_dma(vma_res);
gen6_pte_t *vaddr;
@@ -227,7 +227,9 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
vm->scratch[0]->encode =
vm->pte_encode(px_dma(vm->scratch[0]),
- I915_CACHE_NONE, PTE_READ_ONLY);
+ i915_gem_get_pat_index(vm->i915,
+ I915_CACHE_NONE),
+ PTE_READ_ONLY);
vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
if (IS_ERR(vm->scratch[1])) {
@@ -278,7 +280,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
static void pd_vma_bind(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 unused)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
index d38b914d1206..6e89112f68ae 100644
--- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -399,7 +399,8 @@ static void emit_batch(struct i915_vma * const vma,
batch_add(&cmds, MI_LOAD_REGISTER_IMM(2));
batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7));
batch_add(&cmds, 0xffff0000 |
- ((IS_IVB_GT1(i915) || IS_VALLEYVIEW(i915)) ?
+ (((IS_IVYBRIDGE(i915) && INTEL_INFO(i915)->gt == 1) ||
+ IS_VALLEYVIEW(i915)) ?
HIZ_RAW_STALL_OPT_DISABLE :
0));
batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1));
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index e1c76e5bfa82..071c1cc45257 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -3,10 +3,12 @@
* Copyright © 2014 Intel Corporation
*/
+#include <drm/drm_print.h>
+
#include "gen8_engine_cs.h"
-#include "i915_drv.h"
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
+#include "intel_gt.h"
#include "intel_lrc.h"
#include "intel_ring.h"
@@ -39,11 +41,11 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
* pipe control.
*/
- if (GRAPHICS_VER(rq->engine->i915) == 9)
+ if (GRAPHICS_VER(rq->i915) == 9)
vf_flush_wa = true;
/* WaForGAMHang:kbl */
- if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0))
+ if (IS_KABYLAKE(rq->i915) && IS_GRAPHICS_STEP(rq->i915, 0, STEP_C0))
dc_flush_wa = true;
}
@@ -165,58 +167,155 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
+static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine)
+{
+ switch (engine->id) {
+ case RCS0:
+ return GEN12_CCS_AUX_INV;
+ case BCS0:
+ return GEN12_BCS0_AUX_INV;
+ case VCS0:
+ return GEN12_VD0_AUX_INV;
+ case VCS2:
+ return GEN12_VD2_AUX_INV;
+ case VECS0:
+ return GEN12_VE0_AUX_INV;
+ case CCS0:
+ return GEN12_CCS0_AUX_INV;
+ default:
+ return INVALID_MMIO_REG;
+ }
+}
+
+static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
{
- u32 gsi_offset = gt->uncore->gsi_offset;
+ i915_reg_t reg = gen12_get_aux_inv_reg(engine);
+
+ /*
+ * So far platforms supported by i915 having flat ccs do not require
+ * AUX invalidation. Check also whether the engine requires it.
+ */
+ return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915);
+}
+
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
+{
+ i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine);
+ u32 gsi_offset = engine->gt->uncore->gsi_offset;
+
+ if (!gen12_needs_ccs_aux_inv(engine))
+ return cs;
*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
*cs++ = AUX_INV;
- *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+ MI_SEMAPHORE_REGISTER_POLL |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
+ *cs++ = 0;
+ *cs++ = 0;
return cs;
}
+static int mtl_dummy_pipe_control(struct i915_request *rq)
+{
+ /* Wa_14016712196 */
+ if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
+ IS_DG2(rq->i915)) {
+ u32 *cs;
+
+ /* dummy PIPE_CONTROL + depth flush */
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+ cs = gen12_emit_pipe_control(cs,
+ 0,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH,
+ LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(rq, cs);
+ }
+
+ return 0;
+}
+
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
struct intel_engine_cs *engine = rq->engine;
- if (mode & EMIT_FLUSH) {
- u32 flags = 0;
+ /*
+ * On Aux CCS platforms the invalidation of the Aux
+ * table requires quiescing memory traffic beforehand
+ */
+ if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
+ u32 bit_group_0 = 0;
+ u32 bit_group_1 = 0;
+ int err;
u32 *cs;
- flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
- flags |= PIPE_CONTROL_FLUSH_L3;
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ err = mtl_dummy_pipe_control(rq);
+ if (err)
+ return err;
+
+ bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+ /*
+ * When required, in MTL and beyond platforms we
+ * need to set the CCS_FLUSH bit in the pipe control
+ */
+ if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
+ bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+
+ /*
+ * L3 fabric flush is needed for AUX CCS invalidation
+ * which happens as part of pipe-control so we can
+ * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3
+ * deals with Protected Memory which is not needed for
+ * AUX CCS invalidation and lead to unwanted side effects.
+ */
+ if ((mode & EMIT_FLUSH) &&
+ GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70))
+ bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+
+ bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl,adl-p */
- flags |= PIPE_CONTROL_DEPTH_STALL;
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
+ bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_STORE_DATA_INDEX;
- flags |= PIPE_CONTROL_QW_WRITE;
+ bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX;
+ bit_group_1 |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_CS_STALL;
+ bit_group_1 |= PIPE_CONTROL_CS_STALL;
if (!HAS_3D_PIPELINE(engine->i915))
- flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (engine->class == COMPUTE_CLASS)
- flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen12_emit_pipe_control(cs,
- PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
- flags, LRC_PPHWSP_SCRATCH_ADDR);
+ cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1,
+ LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
if (mode & EMIT_INVALIDATE) {
u32 flags = 0;
u32 *cs, count;
+ int err;
+
+ err = mtl_dummy_pipe_control(rq);
+ if (err)
+ return err;
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -236,10 +335,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
else if (engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
- if (!HAS_FLAT_CCS(rq->engine->i915))
- count = 8 + 4;
- else
- count = 8;
+ count = 8;
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ count += 8;
cs = intel_ring_begin(rq, count);
if (IS_ERR(cs))
@@ -254,11 +352,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
- if (!HAS_FLAT_CCS(rq->engine->i915)) {
- /* hsdes: 1809175790 */
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(engine, cs);
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
@@ -269,21 +363,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
- intel_engine_mask_t aux_inv = 0;
- u32 cmd, *cs;
+ u32 cmd = 4;
+ u32 *cs;
- cmd = 4;
if (mode & EMIT_INVALIDATE) {
cmd += 2;
- if (!HAS_FLAT_CCS(rq->engine->i915) &&
- (rq->engine->class == VIDEO_DECODE_CLASS ||
- rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
- aux_inv = rq->engine->mask &
- ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
- if (aux_inv)
- cmd += 4;
- }
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ cmd += 8;
}
cs = intel_ring_begin(rq, cmd);
@@ -307,6 +394,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
+
+ if (gen12_needs_ccs_aux_inv(rq->engine) &&
+ rq->engine->class == COPY_ENGINE_CLASS)
+ cmd |= MI_FLUSH_DW_CCS;
}
*cs++ = cmd;
@@ -314,14 +405,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
- if (aux_inv) { /* hsdes: 1809175790 */
- if (rq->engine->class == VIDEO_DECODE_CLASS)
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VD0_AUX_NV);
- else
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VE0_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(rq->engine, cs);
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
@@ -658,21 +742,25 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
}
/* Wa_14014475959:dg2 */
-#define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540
-static u32 ccs_semaphore_offset(struct i915_request *rq)
+/* Wa_16019325821 */
+/* Wa_14019159160 */
+#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540
+static u32 hold_switchout_semaphore_offset(struct i915_request *rq)
{
return i915_ggtt_offset(rq->context->state) +
- (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
+ (LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET;
}
/* Wa_14014475959:dg2 */
-static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
+/* Wa_16019325821 */
+/* Wa_14019159160 */
+static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs)
{
int i;
*cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
MI_ATOMIC_MOVE;
- *cs++ = ccs_semaphore_offset(rq);
+ *cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
*cs++ = 1;
@@ -688,7 +776,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
- *cs++ = ccs_semaphore_offset(rq);
+ *cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
return cs;
@@ -705,8 +793,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
cs = gen12_emit_preempt_busywait(rq, cs);
/* Wa_14014475959:dg2 */
- if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
- cs = ccs_emit_wa_busywait(rq, cs);
+ /* Wa_16019325821 */
+ /* Wa_14019159160 */
+ if (intel_engine_uses_wa_hold_switchout(rq->engine))
+ cs = hold_switchout_emit_wa_busywait(rq, cs);
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
@@ -723,21 +813,30 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
+ struct intel_gt *gt = rq->engine->gt;
u32 flags = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
- PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
- if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70))
+ flags |= PIPE_CONTROL_FLUSH_L3;
+
+ /* Wa_14016712196 */
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
+ /* dummy PIPE_CONTROL + depth flush */
+ cs = gen12_emit_pipe_control(cs, 0,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
+
+ if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
/* Wa_1409600907 */
flags |= PIPE_CONTROL_DEPTH_STALL;
- if (!HAS_3D_PIPELINE(rq->engine->i915))
+ if (!HAS_3D_PIPELINE(rq->i915))
flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (rq->engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
index 655e5c00ddc2..867ba697aceb 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -13,6 +13,7 @@
#include "intel_gt_regs.h"
#include "intel_gpu_commands.h"
+struct intel_engine_cs;
struct intel_gt;
struct i915_request;
@@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
static inline u32 *
-__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+__gen8_emit_pipe_control(u32 *batch, u32 bit_group_0,
+ u32 bit_group_1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
- batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
- batch[1] = flags1;
+ batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
+ batch[1] = bit_group_1;
batch[2] = offset;
return batch + 6;
}
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+static inline u32 *gen8_emit_pipe_control(u32 *batch,
+ u32 bit_group_1, u32 offset)
{
- return __gen8_emit_pipe_control(batch, 0, flags, offset);
+ return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset);
}
-static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0,
+ u32 bit_group_1, u32 offset)
{
- return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
+ return __gen8_emit_pipe_control(batch, bit_group_0,
+ bit_group_1, offset);
}
static inline u32 *
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..398d60a66410 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -5,6 +5,7 @@
#include <linux/log2.h>
+#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
#include "gen8_ppgtt.h"
@@ -29,7 +30,7 @@ static u64 gen8_pde_encode(const dma_addr_t addr,
}
static u64 gen8_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
@@ -37,10 +38,12 @@ static u64 gen8_pte_encode(dma_addr_t addr,
if (unlikely(flags & PTE_READ_ONLY))
pte &= ~GEN8_PAGE_RW;
- if (flags & PTE_LM)
- pte |= GEN12_PPGTT_PTE_LM;
-
- switch (level) {
+ /*
+ * For pre-gen12 platforms pat_index is the same as enum
+ * i915_cache_level, so the switch-case here is still valid.
+ * See translation table defined by LEGACY_CACHELEVEL.
+ */
+ switch (pat_index) {
case I915_CACHE_NONE:
pte |= PPAT_UNCACHED;
break;
@@ -55,6 +58,33 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
}
+static u64 gen12_pte_encode(dma_addr_t addr,
+ unsigned int pat_index,
+ u32 flags)
+{
+ gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+ if (unlikely(flags & PTE_READ_ONLY))
+ pte &= ~GEN8_PAGE_RW;
+
+ if (flags & PTE_LM)
+ pte |= GEN12_PPGTT_PTE_LM;
+
+ if (pat_index & BIT(0))
+ pte |= GEN12_PPGTT_PTE_PAT0;
+
+ if (pat_index & BIT(1))
+ pte |= GEN12_PPGTT_PTE_PAT1;
+
+ if (pat_index & BIT(2))
+ pte |= GEN12_PPGTT_PTE_PAT2;
+
+ if (pat_index & BIT(3))
+ pte |= MTL_PPGTT_PTE_PAT3;
+
+ return pte;
+}
+
static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
{
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -193,6 +223,9 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
{
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ if (vm->rsvd.obj)
+ i915_gem_object_put(vm->rsvd.obj);
+
if (intel_vgpu_active(vm->i915))
gen8_ppgtt_notify_vgt(ppgtt, false);
@@ -213,9 +246,9 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
len = gen8_pd_range(start, end, lvl--, &idx);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
- __func__, vm, lvl + 1, start, end,
- idx, len, atomic_read(px_used(pd)));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, start, end,
+ idx, len, atomic_read(px_used(pd)));
GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
do {
@@ -223,8 +256,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
gen8_pd_contains(start, end, lvl)) {
- DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
- __func__, vm, lvl + 1, idx, start, end);
+ GTT_TRACE("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+ __func__, vm, lvl + 1, idx, start, end);
clear_pd_entry(pd, idx, scratch);
__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
start += (u64)I915_PDES << gen8_pd_shift(lvl);
@@ -241,10 +274,10 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
u64 *vaddr;
count = gen8_pt_count(start, end);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
- __func__, vm, lvl, start, end,
- gen8_pd_index(start, 0), count,
- atomic_read(&pt->used));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
+ __func__, vm, lvl, start, end,
+ gen8_pd_index(start, 0), count,
+ atomic_read(&pt->used));
GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
num_ptes = count;
@@ -296,9 +329,9 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
len = gen8_pd_range(*start, end, lvl--, &idx);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
- __func__, vm, lvl + 1, *start, end,
- idx, len, atomic_read(px_used(pd)));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, *start, end,
+ idx, len, atomic_read(px_used(pd)));
GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
spin_lock(&pd->lock);
@@ -309,8 +342,8 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
if (!pt) {
spin_unlock(&pd->lock);
- DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
- __func__, vm, lvl + 1, idx);
+ GTT_TRACE("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+ __func__, vm, lvl + 1, idx);
pt = stash->pt[!!lvl];
__i915_gem_object_pin_pages(pt->base);
@@ -340,10 +373,10 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
} else {
unsigned int count = gen8_pt_count(*start, end);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
- __func__, vm, lvl, *start, end,
- gen8_pd_index(*start, 0), count,
- atomic_read(&pt->used));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
+ __func__, vm, lvl, *start, end,
+ gen8_pd_index(*start, 0), count,
+ atomic_read(&pt->used));
atomic_add(count, &pt->used);
/* All other pdes may be simultaneously removed */
@@ -423,11 +456,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
struct i915_page_directory *pdp,
struct sgt_dma *iter,
u64 idx,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags)
{
struct i915_page_directory *pd;
- const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+ const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags);
gen8_pte_t *vaddr;
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -467,13 +500,13 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
}
static void
-xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
- struct i915_vma_resource *vma_res,
- struct sgt_dma *iter,
- enum i915_cache_level cache_level,
- u32 flags)
+xehp_ppgtt_insert_huge(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ struct sgt_dma *iter,
+ unsigned int pat_index,
+ u32 flags)
{
- const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
+ const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
u64 end = start + vma_res->vma_size;
@@ -570,6 +603,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
}
} while (rem >= page_size && index < max);
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
vma_res->page_sizes_gtt |= page_size;
} while (iter->sg && sg_dma_len(iter->sg));
}
@@ -577,10 +611,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
struct sgt_dma *iter,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags)
{
- const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+ const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
@@ -700,17 +734,17 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
static void gen8_ppgtt_insert(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags)
{
struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
struct sgt_dma iter = sgt_dma(vma_res);
if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
- if (HAS_64K_PAGES(vm->i915))
- xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
+ if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 55))
+ xehp_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
else
- gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
+ gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
} else {
u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
@@ -719,7 +753,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
gen8_pdp_for_page_index(vm, idx);
idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
- cache_level, flags);
+ pat_index, flags);
} while (idx);
vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
@@ -729,7 +763,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
u64 idx = offset >> GEN8_PTE_SHIFT;
@@ -743,15 +777,15 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
GEM_BUG_ON(pt->is_compact);
vaddr = px_vaddr(pt);
- vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+ vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags);
drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
}
-static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
- dma_addr_t addr,
- u64 offset,
- enum i915_cache_level level,
- u32 flags)
+static void xehp_ppgtt_insert_entry_lm(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ unsigned int pat_index,
+ u32 flags)
{
u64 idx = offset >> GEN8_PTE_SHIFT;
struct i915_page_directory * const pdp =
@@ -773,20 +807,20 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
}
vaddr = px_vaddr(pt);
- vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+ vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags);
}
-static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
- dma_addr_t addr,
- u64 offset,
- enum i915_cache_level level,
- u32 flags)
+static void xehp_ppgtt_insert_entry(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ unsigned int pat_index,
+ u32 flags)
{
if (flags & PTE_LM)
- return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
- level, flags);
+ return xehp_ppgtt_insert_entry_lm(vm, addr, offset,
+ pat_index, flags);
- return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags);
+ return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags);
}
static int gen8_init_scratch(struct i915_address_space *vm)
@@ -820,8 +854,10 @@ static int gen8_init_scratch(struct i915_address_space *vm)
pte_flags |= PTE_LM;
vm->scratch[0]->encode =
- gen8_pte_encode(px_dma(vm->scratch[0]),
- I915_CACHE_NONE, pte_flags);
+ vm->pte_encode(px_dma(vm->scratch[0]),
+ i915_gem_get_pat_index(vm->i915,
+ I915_CACHE_NONE),
+ pte_flags);
for (i = 1; i <= vm->top; i++) {
struct drm_i915_gem_object *obj;
@@ -918,6 +954,44 @@ err_pd:
return ERR_PTR(err);
}
+static int gen8_init_rsvd(struct i915_address_space *vm)
+{
+ struct drm_i915_private *i915 = vm->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int ret;
+
+ if (!intel_gt_needs_wa_16018031267(vm->gt))
+ return 0;
+
+ /* The memory will be used only by GPU. */
+ obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
+ I915_BO_ALLOC_VOLATILE |
+ I915_BO_ALLOC_GPU_ONLY);
+ if (IS_ERR(obj))
+ obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto unref;
+ }
+
+ ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ if (ret)
+ goto unref;
+
+ vm->rsvd.vma = i915_vma_make_unshrinkable(vma);
+ vm->rsvd.obj = obj;
+ vm->total -= vma->node.size;
+ return 0;
+unref:
+ i915_gem_object_put(obj);
+ return ret;
+}
+
/*
* GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
* with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -963,12 +1037,15 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
*/
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
- ppgtt->vm.pte_encode = gen8_pte_encode;
+ if (GRAPHICS_VER(gt->i915) >= 12)
+ ppgtt->vm.pte_encode = gen12_pte_encode;
+ else
+ ppgtt->vm.pte_encode = gen8_pte_encode;
ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
ppgtt->vm.insert_entries = gen8_ppgtt_insert;
if (HAS_64K_PAGES(gt->i915))
- ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
+ ppgtt->vm.insert_page = xehp_ppgtt_insert_entry;
else
ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
@@ -996,6 +1073,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
if (intel_vgpu_active(gt->i915))
gen8_ppgtt_notify_vgt(ppgtt, true);
+ err = gen8_init_rsvd(&ppgtt->vm);
+ if (err)
+ goto err_put;
+
return ppgtt;
err_put:
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index f541d19264b4..19c635441642 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -10,13 +10,12 @@
struct i915_address_space;
struct intel_gt;
-enum i915_cache_level;
struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
unsigned long lmem_pt_obj_flags);
u64 gen8_ggtt_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags);
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index ecc990ec1b95..bf6117d5fc57 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -8,6 +8,8 @@
#include <trace/events/dma_fence.h>
#include <uapi/linux/sched/types.h>
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_breadcrumbs.h"
@@ -28,11 +30,14 @@ static void irq_disable(struct intel_breadcrumbs *b)
static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
+ intel_wakeref_t wakeref;
+
/*
* Since we are waiting on a request, the GPU should be busy
* and should have its own rpm reference.
*/
- if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
+ wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt);
+ if (GEM_WARN_ON(!wakeref))
return;
/*
@@ -41,7 +46,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
* which we can add a new waiter and avoid the cost of re-enabling
* the irq.
*/
- WRITE_ONCE(b->irq_armed, true);
+ WRITE_ONCE(b->irq_armed, wakeref);
/* Requests may have completed before we could enable the interrupt. */
if (!b->irq_enabled++ && b->irq_enable(b))
@@ -61,12 +66,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{
+ intel_wakeref_t wakeref = b->irq_armed;
+
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
b->irq_disable(b);
- WRITE_ONCE(b->irq_armed, false);
- intel_gt_pm_put_async(b->irq_engine->gt);
+ WRITE_ONCE(b->irq_armed, NULL);
+ intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
}
static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
@@ -258,8 +265,13 @@ static void signal_irq_work(struct irq_work *work)
i915_request_put(rq);
}
+ /* Lazy irq enabling after HW submission */
if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
intel_breadcrumbs_arm_irq(b);
+
+ /* And confirm that we still want irqs enabled before we yield */
+ if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active))
+ intel_breadcrumbs_disarm_irq(b);
}
struct intel_breadcrumbs *
@@ -310,13 +322,7 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
return;
/* Kick the work once more to drain the signalers, and disarm the irq */
- irq_work_sync(&b->irq_work);
- while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
- local_irq_disable();
- signal_irq_work(&b->irq_work);
- local_irq_enable();
- cond_resched();
- }
+ irq_work_queue(&b->irq_work);
}
void intel_breadcrumbs_free(struct kref *kref)
@@ -399,7 +405,7 @@ static void insert_breadcrumb(struct i915_request *rq)
* the request as it may have completed and raised the interrupt as
* we were attaching it into the lists.
*/
- if (!b->irq_armed || __i915_request_is_complete(rq))
+ if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq))
irq_work_queue(&b->irq_work);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 72dfd3748c4c..bdf09fd67b6e 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include "intel_engine_types.h"
+#include "intel_wakeref.h"
/*
* Rather than have every client wait upon all user interrupts,
@@ -43,7 +44,7 @@ struct intel_breadcrumbs {
spinlock_t irq_lock; /* protects the interrupt from hardirq context */
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
- bool irq_armed;
+ intel_wakeref_t irq_armed;
/* Not all breadcrumbs are attached to physical HW */
intel_engine_mask_t engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 2aa63ec521b8..b1b8695ba7c9 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -6,6 +6,7 @@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_pm.h"
+#include "i915_drm_client.h"
#include "i915_drv.h"
#include "i915_trace.h"
@@ -26,6 +27,8 @@ static void rcu_context_free(struct rcu_head *rcu)
struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
trace_intel_context_free(ce);
+ if (intel_context_has_own_state(ce))
+ fput(ce->default_state);
kmem_cache_free(slab_ce, ce);
}
@@ -50,6 +53,7 @@ intel_context_create(struct intel_engine_cs *engine)
int intel_context_alloc_state(struct intel_context *ce)
{
+ struct i915_gem_context *ctx;
int err = 0;
if (mutex_lock_interruptible(&ce->pin_mutex))
@@ -66,6 +70,18 @@ int intel_context_alloc_state(struct intel_context *ce)
goto unlock;
set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+
+ rcu_read_lock();
+ ctx = rcu_dereference(ce->gem_context);
+ if (ctx && !kref_get_unless_zero(&ctx->ref))
+ ctx = NULL;
+ rcu_read_unlock();
+ if (ctx) {
+ if (ctx->client)
+ i915_drm_client_add_context_objects(ctx->client,
+ ce);
+ i915_gem_context_put(ctx);
+ }
}
unlock:
@@ -578,10 +594,13 @@ void intel_context_bind_parent_child(struct intel_context *parent,
child->parallel.parent = parent;
}
-u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
+u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
{
u64 total, active;
+ if (ce->ops->update_stats)
+ ce->ops->update_stats(ce);
+
total = ce->stats.runtime.total;
if (ce->ops->flags & COPS_RUNTIME_CYCLES)
total *= ce->engine->gt->clock_period_ns;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 0a8d553da3f4..9f523999acd1 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -14,6 +14,7 @@
#include "i915_drv.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
+#include "intel_gt_pm.h"
#include "intel_ring_types.h"
#include "intel_timeline_types.h"
#include "i915_trace.h"
@@ -96,7 +97,7 @@ void intel_context_bind_parent_child(struct intel_context *parent,
/**
* intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
- * @ce - the context
+ * @ce: the context
*
* Acquire a lock on the pinned status of the HW context, such that the context
* can neither be bound to the GPU or unbound whilst the lock is held, i.e.
@@ -110,7 +111,7 @@ static inline int intel_context_lock_pinned(struct intel_context *ce)
/**
* intel_context_is_pinned - Reports the 'pinned' status
- * @ce - the context
+ * @ce: the context
*
* While in use by the GPU, the context, along with its ring and page
* tables is pinned into memory and the GTT.
@@ -132,7 +133,7 @@ static inline void intel_context_cancel_request(struct intel_context *ce,
/**
* intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status
- * @ce - the context
+ * @ce: the context
*
* Releases the lock earlier acquired by intel_context_unlock_pinned().
*/
@@ -207,8 +208,11 @@ void intel_context_exit_engine(struct intel_context *ce);
static inline void intel_context_enter(struct intel_context *ce)
{
lockdep_assert_held(&ce->timeline->mutex);
- if (!ce->active_count++)
- ce->ops->enter(ce);
+ if (ce->active_count++)
+ return;
+
+ ce->ops->enter(ce);
+ ce->wakeref = intel_gt_pm_get(ce->vm->gt);
}
static inline void intel_context_mark_active(struct intel_context *ce)
@@ -222,8 +226,11 @@ static inline void intel_context_exit(struct intel_context *ce)
{
lockdep_assert_held(&ce->timeline->mutex);
GEM_BUG_ON(!ce->active_count);
- if (!--ce->active_count)
- ce->ops->exit(ce);
+ if (--ce->active_count)
+ return;
+
+ intel_gt_pm_put_async(ce->vm->gt, ce->wakeref);
+ ce->ops->exit(ce);
}
static inline struct intel_context *intel_context_get(struct intel_context *ce)
@@ -368,7 +375,29 @@ intel_context_clear_nopreempt(struct intel_context *ce)
clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
}
-u64 intel_context_get_total_runtime_ns(const struct intel_context *ce);
+#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)
+static inline bool intel_context_has_own_state(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_OWN_STATE, &ce->flags);
+}
+
+static inline bool intel_context_set_own_state(struct intel_context *ce)
+{
+ return test_and_set_bit(CONTEXT_OWN_STATE, &ce->flags);
+}
+#else
+static inline bool intel_context_has_own_state(const struct intel_context *ce)
+{
+ return false;
+}
+
+static inline bool intel_context_set_own_state(struct intel_context *ce)
+{
+ return true;
+}
+#endif
+
+u64 intel_context_get_total_runtime_ns(struct intel_context *ce);
u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);
static inline u64 intel_context_clock(void)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e36670f2e626..10070ee4d74c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -14,9 +14,9 @@
#include "i915_active_types.h"
#include "i915_sw_fence.h"
-#include "i915_utils.h"
#include "intel_engine_types.h"
#include "intel_sseu.h"
+#include "intel_wakeref.h"
#include "uc/intel_guc_fwif.h"
@@ -58,6 +58,8 @@ struct intel_context_ops {
void (*sched_disable)(struct intel_context *ce);
+ void (*update_stats)(struct intel_context *ce);
+
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
@@ -96,6 +98,8 @@ struct intel_context {
struct i915_address_space *vm;
struct i915_gem_context __rcu *gem_context;
+ struct file *default_state;
+
/*
* @signal_lock protects the list of requests that need signaling,
* @signals. While there are any requests that need signaling,
@@ -110,6 +114,7 @@ struct intel_context {
u32 ring_size;
struct intel_ring *ring;
struct intel_timeline *timeline;
+ intel_wakeref_t wakeref;
unsigned long flags;
#define CONTEXT_BARRIER_BIT 0
@@ -126,6 +131,8 @@ struct intel_context {
#define CONTEXT_PERMA_PIN 11
#define CONTEXT_IS_PARKING 12
#define CONTEXT_EXITING 13
+#define CONTEXT_LOW_LATENCY 14
+#define CONTEXT_OWN_STATE 15
struct {
u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index b58c30ac8ef0..f6a98cf1e5a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -79,6 +79,29 @@ struct lock_class_key;
#define ENGINE_WRITE(...) __ENGINE_WRITE_OP(write, __VA_ARGS__)
#define ENGINE_WRITE_FW(...) __ENGINE_WRITE_OP(write_fw, __VA_ARGS__)
+#define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id))
+#define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id)
+
+#define __ENGINE_INSTANCES_MASK(mask, first, count) ({ \
+ unsigned int first__ = (first); \
+ unsigned int count__ = (count); \
+ ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__; \
+})
+
+#define ENGINE_INSTANCES_MASK(gt, first, count) \
+ __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count)
+
+#define RCS_MASK(gt) \
+ ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS)
+#define BCS_MASK(gt) \
+ ENGINE_INSTANCES_MASK(gt, BCS0, I915_MAX_BCS)
+#define VDBOX_MASK(gt) \
+ ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS)
+#define VEBOX_MASK(gt) \
+ ENGINE_INSTANCES_MASK(gt, VECS0, I915_MAX_VECS)
+#define CCS_MASK(gt) \
+ ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS)
+
#define GEN6_RING_FAULT_REG_READ(engine__) \
intel_uncore_read((engine__)->uncore, RING_FAULT_REG(engine__))
@@ -126,9 +149,6 @@ execlists_active(const struct intel_engine_execlists *execlists)
return active;
}
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
-
static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
@@ -170,6 +190,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_GEM_HWS_SEQNO 0x40
#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
#define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32))
+#define I915_GEM_HWS_GGTT_BIND 0x46
+#define I915_GEM_HWS_GGTT_BIND_ADDR (I915_GEM_HWS_GGTT_BIND * sizeof(u32))
#define I915_GEM_HWS_PXP 0x60
#define I915_GEM_HWS_PXP_ADDR (I915_GEM_HWS_PXP * sizeof(u32))
#define I915_GEM_HWS_GSC 0x62
@@ -356,4 +378,12 @@ u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value);
u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value);
u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value);
+#define rb_to_uabi_engine(rb) \
+ rb_entry_safe(rb, struct intel_engine_cs, uabi_node)
+
+#define for_each_uabi_engine(engine__, i915__) \
+ for ((engine__) = rb_to_uabi_engine(rb_first(&(i915__)->uabi_engines));\
+ (engine__); \
+ (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node)))
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index d4e29da74612..b721bbd23356 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -9,6 +9,7 @@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_internal.h"
+#include "gt/intel_gt_print.h"
#include "gt/intel_gt_regs.h"
#include "i915_cmd_parser.h"
@@ -46,7 +47,7 @@
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
+#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
#define MAX_MMIO_BASES 3
struct engine_info {
@@ -307,7 +308,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
/*
* There is a discrepancy here between the size reported
* by the register and the size of the context layout
- * in the docs. Both are described as authorative!
+ * in the docs. Both are described as authoritative!
*
* The discrepancy is on the order of a few cachelines,
* but the total is under one page (4k), which is our
@@ -315,10 +316,9 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
* out in the wash.
*/
cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
- drm_dbg(&gt->i915->drm,
- "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n",
- GRAPHICS_VER(gt->i915), cxt_size * 64,
- cxt_size - 1);
+ gt_dbg(gt, "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n",
+ GRAPHICS_VER(gt->i915), cxt_size * 64,
+ cxt_size - 1);
return round_up(cxt_size * 64, PAGE_SIZE);
case 3:
case 2:
@@ -497,9 +497,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
- if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
- __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
- engine->class == RENDER_CLASS)
+ if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) &&
+ __ffs(CCS_MASK(engine->gt) | RCS_MASK(engine->gt)) == engine->instance)
engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
/* features common between engines sharing EUs */
@@ -557,7 +556,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
DRIVER_CAPS(i915)->has_logical_contexts = true;
ewma__engine_latency_init(&engine->latency);
- seqcount_init(&engine->stats.execlists.lock);
ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
@@ -590,7 +588,7 @@ u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
* NB: The GuC API only supports 32bit values. However, the limit is further
* reduced due to internal calculations which would otherwise overflow.
*/
- if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+ if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
@@ -611,7 +609,7 @@ u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
* NB: The GuC API only supports 32bit values. However, the limit is further
* reduced due to internal calculations which would otherwise overflow.
*/
- if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+ if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
@@ -679,8 +677,8 @@ void intel_engines_release(struct intel_gt *gt)
* in case we aborted before completely initialising the engines.
*/
GEM_BUG_ON(intel_gt_pm_is_awake(gt));
- if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- __intel_gt_reset(gt, ALL_ENGINES);
+ if (!intel_gt_gpu_reset_clobbers_display(gt))
+ intel_gt_reset_all_engines(gt);
/* Decouple the backend; but keep the layout for late GPU resets */
for_each_engine(engine, gt, id) {
@@ -695,6 +693,8 @@ void intel_engines_release(struct intel_gt *gt)
memset(&engine->reset, 0, sizeof(engine->reset));
}
+
+ llist_del_all(&gt->i915->uabi_engines_llist);
}
void intel_engine_free_request_pool(struct intel_engine_cs *engine)
@@ -766,14 +766,13 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt)
* and bits have disable semantices.
*/
media_fuse = intel_uncore_read(gt->uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
- if (MEDIA_VER_FULL(i915) < IP_VER(12, 50))
+ if (MEDIA_VER_FULL(i915) < IP_VER(12, 55))
media_fuse = ~media_fuse;
- vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
- vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
- GEN11_GT_VEBOX_DISABLE_SHIFT;
+ vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse);
+ vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse);
- if (MEDIA_VER_FULL(i915) >= IP_VER(12, 50)) {
+ if (MEDIA_VER_FULL(i915) >= IP_VER(12, 55)) {
fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
} else {
@@ -788,7 +787,7 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt)
if (!(BIT(i) & vdbox_mask)) {
gt->info.engine_mask &= ~BIT(_VCS(i));
- drm_dbg(&i915->drm, "vcs%u fused off\n", i);
+ gt_dbg(gt, "vcs%u fused off\n", i);
continue;
}
@@ -796,8 +795,7 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt)
gt->info.vdbox_sfc_access |= BIT(i);
logical_vdbox++;
}
- drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n",
- vdbox_mask, VDBOX_MASK(gt));
+ gt_dbg(gt, "vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(gt));
GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
for (i = 0; i < I915_MAX_VECS; i++) {
@@ -808,11 +806,10 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt)
if (!(BIT(i) & vebox_mask)) {
gt->info.engine_mask &= ~BIT(_VECS(i));
- drm_dbg(&i915->drm, "vecs%u fused off\n", i);
+ gt_dbg(gt, "vecs%u fused off\n", i);
}
}
- drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n",
- vebox_mask, VEBOX_MASK(gt));
+ gt_dbg(gt, "vebox enable: %04x, instances: %04lx\n", vebox_mask, VEBOX_MASK(gt));
GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
}
@@ -838,39 +835,7 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
*/
for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) {
info->engine_mask &= ~BIT(_CCS(i));
- drm_dbg(&i915->drm, "ccs%u fused off\n", i);
- }
-}
-
-static void engine_mask_apply_copy_fuses(struct intel_gt *gt)
-{
- struct drm_i915_private *i915 = gt->i915;
- struct intel_gt_info *info = &gt->info;
- unsigned long meml3_mask;
- unsigned long quad;
-
- if (!(GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60) &&
- GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)))
- return;
-
- meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3);
- meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask);
-
- /*
- * Link Copy engines may be fused off according to meml3_mask. Each
- * bit is a quad that houses 2 Link Copy and two Sub Copy engines.
- */
- for_each_clear_bit(quad, &meml3_mask, GEN12_MAX_MSLICES) {
- unsigned int instance = quad * 2 + 1;
- intel_engine_mask_t mask = GENMASK(_BCS(instance + 1),
- _BCS(instance));
-
- if (mask & info->engine_mask) {
- drm_dbg(&i915->drm, "bcs%u fused off\n", instance);
- drm_dbg(&i915->drm, "bcs%u fused off\n", instance + 1);
-
- info->engine_mask &= ~mask;
- }
+ gt_dbg(gt, "ccs%u fused off\n", i);
}
}
@@ -879,7 +844,7 @@ static void engine_mask_apply_copy_fuses(struct intel_gt *gt)
* Note that we have a catch-22 situation where we need to be able to access
* the blitter forcewake domain to read the engine fuses, but at the same time
* we need to know which engines are available on the system to know which
- * forcewake domains are present. We solve this by intializing the forcewake
+ * forcewake domains are present. We solve this by initializing the forcewake
* domains based on the full engine mask in the platform capabilities before
* calling this function and pruning the domains for fused-off engines
* afterwards.
@@ -892,7 +857,6 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
engine_mask_apply_media_fuses(gt);
engine_mask_apply_compute_fuses(gt);
- engine_mask_apply_copy_fuses(gt);
/*
* The only use of the GSC CS is to load and communicate with the GSC
@@ -907,11 +871,33 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
* submission, which will wake up the GSC power well.
*/
if (__HAS_ENGINE(info->engine_mask, GSC0) && !intel_uc_wants_gsc_uc(&gt->uc)) {
- drm_notice(&gt->i915->drm,
- "No GSC FW selected, disabling GSC CS and media C6\n");
+ gt_notice(gt, "No GSC FW selected, disabling GSC CS and media C6\n");
info->engine_mask &= ~BIT(GSC0);
}
+ /*
+ * Do not create the command streamer for CCS slices beyond the first.
+ * All the workload submitted to the first engine will be shared among
+ * all the slices.
+ *
+ * Once the user will be allowed to customize the CCS mode, then this
+ * check needs to be removed.
+ */
+ if (IS_DG2(gt->i915)) {
+ u8 first_ccs = __ffs(CCS_MASK(gt));
+
+ /*
+ * Store the number of active cslices before
+ * changing the CCS engine configuration
+ */
+ gt->ccs.cslices = CCS_MASK(gt);
+
+ /* Mask off all the CCS engine */
+ info->engine_mask &= ~GENMASK(CCS3, CCS0);
+ /* Put back in the first CCS engine */
+ info->engine_mask |= BIT(_CCS(first_ccs));
+ }
+
return info->engine_mask;
}
@@ -1097,8 +1083,7 @@ static int init_status_page(struct intel_engine_cs *engine)
*/
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
- drm_err(&engine->i915->drm,
- "Failed to allocate status page\n");
+ gt_err(engine->gt, "Failed to allocate status page\n");
return PTR_ERR(obj);
}
@@ -1143,12 +1128,130 @@ err_put:
return ret;
}
+static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
+{
+ static const union intel_engine_tlb_inv_reg gen8_regs[] = {
+ [RENDER_CLASS].reg = GEN8_RTCR,
+ [VIDEO_DECODE_CLASS].reg = GEN8_M1TCR, /* , GEN8_M2TCR */
+ [VIDEO_ENHANCEMENT_CLASS].reg = GEN8_VTCR,
+ [COPY_ENGINE_CLASS].reg = GEN8_BTCR,
+ };
+ static const union intel_engine_tlb_inv_reg gen12_regs[] = {
+ [RENDER_CLASS].reg = GEN12_GFX_TLB_INV_CR,
+ [VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
+ [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
+ [COMPUTE_CLASS].reg = GEN12_COMPCTX_TLB_INV_CR,
+ };
+ static const union intel_engine_tlb_inv_reg xehp_regs[] = {
+ [RENDER_CLASS].mcr_reg = XEHP_GFX_TLB_INV_CR,
+ [VIDEO_DECODE_CLASS].mcr_reg = XEHP_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
+ [COPY_ENGINE_CLASS].mcr_reg = XEHP_BLT_TLB_INV_CR,
+ [COMPUTE_CLASS].mcr_reg = XEHP_COMPCTX_TLB_INV_CR,
+ };
+ static const union intel_engine_tlb_inv_reg xelpmp_regs[] = {
+ [VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
+ [OTHER_CLASS].reg = XELPMP_GSC_TLB_INV_CR,
+ };
+ struct drm_i915_private *i915 = engine->i915;
+ const unsigned int instance = engine->instance;
+ const unsigned int class = engine->class;
+ const union intel_engine_tlb_inv_reg *regs;
+ union intel_engine_tlb_inv_reg reg;
+ unsigned int num = 0;
+ u32 val;
+
+ /*
+ * New platforms should not be added with catch-all-newer (>=)
+ * condition so that any later platform added triggers the below warning
+ * and in turn mandates a human cross-check of whether the invalidation
+ * flows have compatible semantics.
+ *
+ * For instance with the 11.00 -> 12.00 transition three out of five
+ * respective engine registers were moved to masked type. Then after the
+ * 12.00 -> 12.50 transition multi cast handling is required too.
+ */
+
+ if (engine->gt->type == GT_MEDIA) {
+ if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
+ regs = xelpmp_regs;
+ num = ARRAY_SIZE(xelpmp_regs);
+ }
+ } else {
+ if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 74) ||
+ GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
+ GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) ||
+ GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
+ regs = xehp_regs;
+ num = ARRAY_SIZE(xehp_regs);
+ } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
+ GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
+ regs = gen12_regs;
+ num = ARRAY_SIZE(gen12_regs);
+ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+ regs = gen8_regs;
+ num = ARRAY_SIZE(gen8_regs);
+ } else if (GRAPHICS_VER(i915) < 8) {
+ return 0;
+ }
+ }
+
+ if (gt_WARN_ONCE(engine->gt, !num,
+ "Platform does not implement TLB invalidation!"))
+ return -ENODEV;
+
+ if (gt_WARN_ON_ONCE(engine->gt,
+ class >= num ||
+ (!regs[class].reg.reg &&
+ !regs[class].mcr_reg.reg)))
+ return -ERANGE;
+
+ reg = regs[class];
+
+ if (regs == xelpmp_regs && class == OTHER_CLASS) {
+ /*
+ * There's only a single GSC instance, but it uses register bit
+ * 1 instead of either 0 or OTHER_GSC_INSTANCE.
+ */
+ GEM_WARN_ON(instance != OTHER_GSC_INSTANCE);
+ val = 1;
+ } else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) {
+ reg.reg = GEN8_M2TCR;
+ val = 0;
+ } else {
+ val = instance;
+ }
+
+ val = BIT(val);
+
+ engine->tlb_inv.mcr = regs == xehp_regs;
+ engine->tlb_inv.reg = reg;
+ engine->tlb_inv.done = val;
+
+ if (GRAPHICS_VER(i915) >= 12 &&
+ (engine->class == VIDEO_DECODE_CLASS ||
+ engine->class == VIDEO_ENHANCEMENT_CLASS ||
+ engine->class == COMPUTE_CLASS ||
+ engine->class == OTHER_CLASS))
+ engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
+ else
+ engine->tlb_inv.request = val;
+
+ return 0;
+}
+
static int engine_setup_common(struct intel_engine_cs *engine)
{
int err;
init_llist_head(&engine->barrier_tasks);
+ err = intel_engine_init_tlb_invalidation(engine);
+ if (err)
+ return err;
+
err = init_status_page(engine);
if (err)
return err;
@@ -1214,6 +1317,7 @@ static int measure_breadcrumb_dw(struct intel_context *ce)
if (!frame)
return -ENOMEM;
+ frame->rq.i915 = engine->i915;
frame->rq.engine = engine;
frame->rq.context = ce;
rcu_assign_pointer(frame->rq.timeline, ce->timeline);
@@ -1300,6 +1404,20 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce)
}
static struct intel_context *
+create_ggtt_bind_context(struct intel_engine_cs *engine)
+{
+ static struct lock_class_key kernel;
+
+ /*
+ * MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple
+ * bind requests at a time so get a bigger ring.
+ */
+ return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_512K,
+ I915_GEM_HWS_GGTT_BIND_ADDR,
+ &kernel, "ggtt_bind_context");
+}
+
+static struct intel_context *
create_kernel_context(struct intel_engine_cs *engine)
{
static struct lock_class_key kernel;
@@ -1309,8 +1427,8 @@ create_kernel_context(struct intel_engine_cs *engine)
&kernel, "kernel_context");
}
-/**
- * intel_engines_init_common - initialize cengine state which might require hw access
+/*
+ * engine_init_common - initialize engine state which might require hw access
* @engine: Engine to initialize.
*
* Initializes @engine@ structure members shared between legacy and execlists
@@ -1322,7 +1440,7 @@ create_kernel_context(struct intel_engine_cs *engine)
*/
static int engine_init_common(struct intel_engine_cs *engine)
{
- struct intel_context *ce;
+ struct intel_context *ce, *bce = NULL;
int ret;
engine->set_default_submission(engine);
@@ -1338,17 +1456,34 @@ static int engine_init_common(struct intel_engine_cs *engine)
ce = create_kernel_context(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
+ /*
+ * Create a separate pinned context for GGTT update with blitter engine
+ * if a platform require such service. MI_UPDATE_GTT works on other
+ * engines as well but BCS should be less busy engine so pick that for
+ * GGTT updates.
+ */
+ if (i915_ggtt_require_binder(engine->i915) && engine->id == BCS0) {
+ bce = create_ggtt_bind_context(engine);
+ if (IS_ERR(bce)) {
+ ret = PTR_ERR(bce);
+ goto err_ce_context;
+ }
+ }
ret = measure_breadcrumb_dw(ce);
if (ret < 0)
- goto err_context;
+ goto err_bce_context;
engine->emit_fini_breadcrumb_dw = ret;
engine->kernel_context = ce;
+ engine->bind_context = bce;
return 0;
-err_context:
+err_bce_context:
+ if (bce)
+ intel_engine_destroy_pinned_context(bce);
+err_ce_context:
intel_engine_destroy_pinned_context(ce);
return ret;
}
@@ -1396,8 +1531,8 @@ int intel_engines_init(struct intel_gt *gt)
}
/**
- * intel_engines_cleanup_common - cleans up the engine state created by
- * the common initiailizers.
+ * intel_engine_cleanup_common - cleans up the engine state created by
+ * the common initializers.
* @engine: Engine to cleanup.
*
* This cleans up everything created by the common helpers.
@@ -1418,6 +1553,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
if (engine->kernel_context)
intel_engine_destroy_pinned_context(engine->kernel_context);
+ if (engine->bind_context)
+ intel_engine_destroy_pinned_context(engine->bind_context);
+
+
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
cleanup_status_page(engine);
@@ -1497,9 +1636,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
* Wa_22011802037: Prior to doing a reset, ensure CS is
* stopped, set ring stop bit and prefetch disable bit to halt CS
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
@@ -1939,13 +2076,13 @@ static const char *repr_timer(const struct timer_list *t)
static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m)
{
- struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_private *i915 = engine->i915;
struct intel_engine_execlists * const execlists = &engine->execlists;
u64 addr;
- if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(dev_priv, 4, 7))
+ if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(i915, 4, 7))
drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
- if (HAS_EXECLISTS(dev_priv)) {
+ if (HAS_EXECLISTS(i915)) {
drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
@@ -1966,7 +2103,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
}
- if (GRAPHICS_VER(dev_priv) >= 6) {
+ if (GRAPHICS_VER(i915) >= 6) {
drm_printf(m, "\tRING_IMR: 0x%08x\n",
ENGINE_READ(engine, RING_IMR));
drm_printf(m, "\tRING_ESR: 0x%08x\n",
@@ -1983,15 +2120,15 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
addr = intel_engine_get_last_batch_head(engine);
drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
upper_32_bits(addr), lower_32_bits(addr));
- if (GRAPHICS_VER(dev_priv) >= 8)
+ if (GRAPHICS_VER(i915) >= 8)
addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
- else if (GRAPHICS_VER(dev_priv) >= 4)
+ else if (GRAPHICS_VER(i915) >= 4)
addr = ENGINE_READ(engine, RING_DMA_FADD);
else
addr = ENGINE_READ(engine, DMA_FADD_I8XX);
drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
upper_32_bits(addr), lower_32_bits(addr));
- if (GRAPHICS_VER(dev_priv) >= 4) {
+ if (GRAPHICS_VER(i915) >= 4) {
drm_printf(m, "\tIPEIR: 0x%08x\n",
ENGINE_READ(engine, RING_IPEIR));
drm_printf(m, "\tIPEHR: 0x%08x\n",
@@ -2001,7 +2138,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
}
- if (HAS_EXECLISTS(dev_priv) && !intel_engine_uses_guc(engine)) {
+ if (HAS_EXECLISTS(i915) && !intel_engine_uses_guc(engine)) {
struct i915_request * const *port, *rq;
const u32 *hws =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
@@ -2067,7 +2204,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
}
rcu_read_unlock();
i915_sched_engine_active_unlock_bh(engine->sched_engine);
- } else if (GRAPHICS_VER(dev_priv) > 6) {
+ } else if (GRAPHICS_VER(i915) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
ENGINE_READ(engine, RING_PP_DIR_BASE));
drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 9a527e1f5be6..b279878dca29 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -3,7 +3,10 @@
* Copyright © 2019 Intel Corporation
*/
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
+#include "i915_jiffies.h"
#include "i915_request.h"
#include "intel_context.h"
@@ -96,7 +99,8 @@ static void heartbeat_commit(struct i915_request *rq,
static void show_heartbeat(const struct i915_request *rq,
struct intel_engine_cs *engine)
{
- struct drm_printer p = drm_debug_printer("heartbeat");
+ struct drm_printer p =
+ drm_dbg_printer(&engine->i915->drm, DRM_UT_DRIVER, "heartbeat");
if (!rq) {
intel_engine_dump(engine, &p,
@@ -188,7 +192,7 @@ static void heartbeat(struct work_struct *wrk)
* low latency and no jitter] the chance to naturally
* complete before being preempted.
*/
- attr.priority = 0;
+ attr.priority = I915_PRIORITY_NORMAL;
if (rq->sched.attr.priority >= attr.priority)
attr.priority = I915_PRIORITY_HEARTBEAT;
if (rq->sched.attr.priority >= attr.priority)
@@ -290,6 +294,9 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine)
heartbeat_commit(rq, &attr);
GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
+ /* Ensure the forced pulse gets a full period to execute */
+ next_heartbeat(engine);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e971b153fda9..fb7bff27b45a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
- if (IS_METEORLAKE(i915) && engine->id == GSC0) {
+ if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) {
intel_uncore_write(engine->gt->uncore,
RC_PSMI_CTRL_GSCCS,
_MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
@@ -39,7 +39,7 @@ static void dbg_poison_ce(struct intel_context *ce)
if (ce->state) {
struct drm_i915_gem_object *obj = ce->state->obj;
- int type = i915_coherent_map_type(ce->engine->i915, obj, true);
+ int type = intel_gt_coherent_map_type(ce->engine->gt, obj, true);
void *map;
if (!i915_gem_object_trylock(obj, NULL))
@@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
ENGINE_TRACE(engine, "\n");
- intel_gt_pm_get(engine->gt);
+ engine->wakeref_track = intel_gt_pm_get(engine->gt);
/* Discard stale context state from across idling */
ce = engine->kernel_context;
@@ -115,6 +115,16 @@ __queue_and_release_pm(struct i915_request *rq,
ENGINE_TRACE(engine, "parking\n");
/*
+ * Open coded one half of intel_context_enter, which we have to omit
+ * here (see the large comment below) and because the other part must
+ * not be called due constructing directly with __i915_request_create
+ * which increments active count via intel_context_mark_active.
+ */
+ GEM_BUG_ON(rq->context->active_count != 1);
+ __intel_gt_pm_get(engine->gt);
+ rq->context->wakeref = intel_wakeref_track(&engine->gt->wakeref);
+
+ /*
* We have to serialise all potential retirement paths with our
* submission, as we don't want to underflow either the
* engine->wakeref.counter or our timeline->active_count.
@@ -269,14 +279,11 @@ static int __engine_park(struct intel_wakeref *wf)
intel_engine_park_heartbeat(engine);
intel_breadcrumbs_park(engine->breadcrumbs);
- /* Must be reset upon idling, or we may miss the busy wakeup. */
- GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN);
-
if (engine->park)
engine->park(engine);
/* While gt calls i915_vma_parked(), we have to break the lock cycle */
- intel_gt_pm_put_async(engine->gt);
+ intel_gt_pm_put_async(engine->gt, engine->wakeref_track);
return 0;
}
@@ -287,9 +294,7 @@ static const struct intel_wakeref_ops wf_ops = {
void intel_engine_init__pm(struct intel_engine_cs *engine)
{
- struct intel_runtime_pm *rpm = engine->uncore->rpm;
-
- intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
+ intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name);
intel_engine_init_heartbeat(engine);
intel_gsc_idle_msg_enable(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index d68675925b79..1d97c435a015 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -10,6 +10,7 @@
#include "i915_request.h"
#include "intel_engine_types.h"
#include "intel_wakeref.h"
+#include "intel_gt.h"
#include "intel_gt_pm.h"
static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index 6b9d9f837669..1c4784cb296c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -15,6 +15,7 @@
#define HEAD_WRAP_COUNT 0xFFE00000
#define HEAD_WRAP_ONE 0x00200000
#define HEAD_ADDR 0x001FFFFC
+#define HEAD_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */
#define RING_START(base) _MMIO((base) + 0x38)
#define RING_CTL(base) _MMIO((base) + 0x3c)
#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */
@@ -26,7 +27,6 @@
#define RING_VALID_MASK 0x00000001
#define RING_VALID 0x00000001
#define RING_INVALID 0x00000000
-#define RING_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */
#define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */
#define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */
#define RING_SYNC_0(base) _MMIO((base) + 0x40)
@@ -118,9 +118,15 @@
#define CCID_EXTENDED_STATE_RESTORE BIT(2)
#define CCID_EXTENDED_STATE_SAVE BIT(3)
#define RING_BB_PER_CTX_PTR(base) _MMIO((base) + 0x1c0) /* gen8+ */
+#define PER_CTX_BB_FORCE BIT(2)
+#define PER_CTX_BB_VALID BIT(0)
+
#define RING_INDIRECT_CTX(base) _MMIO((base) + 0x1c4) /* gen8+ */
#define RING_INDIRECT_CTX_OFFSET(base) _MMIO((base) + 0x1c8) /* gen8+ */
#define ECOSKPD(base) _MMIO((base) + 0x1d0)
+#define XEHP_BLITTER_SCHEDULING_MODE_MASK REG_GENMASK(12, 11)
+#define XEHP_BLITTER_ROUND_ROBIN_MODE \
+ REG_FIELD_PREP(XEHP_BLITTER_SCHEDULING_MODE_MASK, 1)
#define ECO_CONSTANT_BUFFER_SR_DISABLE REG_BIT(4)
#define ECO_GATING_CX_ONLY REG_BIT(3)
#define GEN6_BLITTER_FBC_NOTIFY REG_BIT(3)
@@ -177,6 +183,7 @@
#define CTX_CTRL_RS_CTX_ENABLE REG_BIT(1)
#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
+#define GEN12_CTX_CTRL_RUNALONE_MODE REG_BIT(7)
#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8)
#define RING_CTX_SR_CTL(base) _MMIO((base) + 0x244)
#define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c)
@@ -256,5 +263,7 @@
#define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18)
#define ALNUNIT_CLKGATE_DIS REG_BIT(13)
+#define VDBOX_CGCTL3F1C(base) _MMIO((base) + 0x3f1c)
+#define MFXPIPE_CLKGATE_DIS REG_BIT(3)
#endif /* __INTEL_ENGINE_REGS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fd54fb8810f..155b6255a63e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -53,9 +53,12 @@ struct intel_gt;
struct intel_ring;
struct intel_uncore;
struct intel_breadcrumbs;
+struct intel_engine_cs;
+struct i915_perf_group;
typedef u32 intel_engine_mask_t;
#define ALL_ENGINES ((intel_engine_mask_t)~0ul)
+#define VIRTUAL_ENGINES BIT(BITS_PER_TYPE(intel_engine_mask_t) - 1)
struct intel_hw_status_page {
struct list_head timelines;
@@ -234,7 +237,7 @@ struct intel_engine_execlists {
*/
struct i915_request * const *active;
/**
- * @inflight: the set of contexts submitted and acknowleged by HW
+ * @inflight: the set of contexts submitted and acknowledged by HW
*
* The set of inflight contexts is managed by reading CS events
* from the HW. On a context-switch event (not preemption), we
@@ -257,7 +260,7 @@ struct intel_engine_execlists {
unsigned int port_mask;
/**
- * @virtual: Queue of requets on a virtual engine, sorted by priority.
+ * @virtual: Queue of requests on a virtual engine, sorted by priority.
* Each RB entry is a struct i915_priolist containing a list of requests
* of the same priority.
*/
@@ -287,6 +290,7 @@ struct intel_engine_execlists {
*/
u8 csb_head;
+ /* private: selftest */
I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
};
@@ -339,6 +343,23 @@ struct intel_engine_guc_stats {
* @start_gt_clk: GT clock time of last idle to active transition.
*/
u64 start_gt_clk;
+
+ /**
+ * @total: The last value of total returned
+ */
+ u64 total;
+};
+
+union intel_engine_tlb_inv_reg {
+ i915_reg_t reg;
+ i915_mcr_reg_t mcr_reg;
+};
+
+struct intel_engine_tlb_inv {
+ bool mcr;
+ union intel_engine_tlb_inv_reg reg;
+ u32 request;
+ u32 done;
};
struct intel_engine_cs {
@@ -372,6 +393,8 @@ struct intel_engine_cs {
u32 context_size;
u32 mmio_base;
+ struct intel_engine_tlb_inv tlb_inv;
+
/*
* Some w/a require forcewake to be held (which prevents RC6) while
* a particular engine is active. If so, we set fw_domain to which
@@ -384,7 +407,15 @@ struct intel_engine_cs {
unsigned long context_tag;
- struct rb_node uabi_node;
+ /*
+ * The type evolves during initialization, see related comment for
+ * struct drm_i915_private's uabi_engines member.
+ */
+ union {
+ struct llist_node uabi_llist;
+ struct list_head uabi_list;
+ struct rb_node uabi_node;
+ };
struct intel_sseu sseu;
@@ -398,6 +429,9 @@ struct intel_engine_cs {
struct llist_head barrier_tasks;
struct intel_context *kernel_context; /* pinned */
+ struct intel_context *bind_context; /* pinned, only for BCS0 */
+ /* mark the bind context's availability status */
+ bool bind_context_ready;
/**
* pinned_contexts_list: List of pinned contexts. This list is only
@@ -417,7 +451,9 @@ struct intel_engine_cs {
unsigned long serial;
unsigned long wakeref_serial;
+ intel_wakeref_t wakeref_track;
struct intel_wakeref wakeref;
+
struct file *default_state;
struct {
@@ -555,7 +591,7 @@ struct intel_engine_cs {
#define I915_ENGINE_HAS_RCS_REG_STATE BIT(9)
#define I915_ENGINE_HAS_EU_PRIORITY BIT(10)
#define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
-#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
+#define I915_ENGINE_USES_WA_HOLD_SWITCHOUT BIT(12)
unsigned int flags;
/*
@@ -603,6 +639,14 @@ struct intel_engine_cs {
} props, defaults;
I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
+
+ /*
+ * The perf group maps to one OA unit which controls one OA buffer. All
+ * reports corresponding to this engine will be reported to this OA
+ * buffer. An engine will map to a single OA unit, but a single OA unit
+ * can generate reports for multiple engines.
+ */
+ struct i915_perf_group *oa_group;
};
static inline bool
@@ -657,10 +701,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
}
/* Wa_14014475959:dg2 */
+/* Wa_16019325821 */
+/* Wa_14019159160 */
static inline bool
-intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine)
+intel_engine_uses_wa_hold_switchout(struct intel_engine_cs *engine)
{
- return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+ return engine->flags & I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
}
#endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index cd4f1b126f75..be4bbff1a57c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -7,6 +7,8 @@
#include <linux/list_sort.h>
#include <linux/llist.h>
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
#include "intel_engine.h"
#include "intel_engine_user.h"
@@ -38,25 +40,27 @@ intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
void intel_engine_add_user(struct intel_engine_cs *engine)
{
- llist_add((struct llist_node *)&engine->uabi_node,
- (struct llist_head *)&engine->i915->uabi_engines);
+ llist_add(&engine->uabi_llist, &engine->i915->uabi_engines_llist);
}
-static const u8 uabi_classes[] = {
+#define I915_NO_UABI_CLASS ((u16)(-1))
+
+static const u16 uabi_classes[] = {
[RENDER_CLASS] = I915_ENGINE_CLASS_RENDER,
[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
[COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
+ [OTHER_CLASS] = I915_NO_UABI_CLASS, /* Not exposed to users, no uabi class. */
};
static int engine_cmp(void *priv, const struct list_head *A,
const struct list_head *B)
{
const struct intel_engine_cs *a =
- container_of((struct rb_node *)A, typeof(*a), uabi_node);
+ container_of(A, typeof(*a), uabi_list);
const struct intel_engine_cs *b =
- container_of((struct rb_node *)B, typeof(*b), uabi_node);
+ container_of(B, typeof(*b), uabi_list);
if (uabi_classes[a->class] < uabi_classes[b->class])
return -1;
@@ -73,7 +77,7 @@ static int engine_cmp(void *priv, const struct list_head *A,
static struct llist_node *get_engines(struct drm_i915_private *i915)
{
- return llist_del_all((struct llist_head *)&i915->uabi_engines);
+ return llist_del_all(&i915->uabi_engines_llist);
}
static void sort_engines(struct drm_i915_private *i915,
@@ -83,9 +87,8 @@ static void sort_engines(struct drm_i915_private *i915,
llist_for_each_safe(pos, next, get_engines(i915)) {
struct intel_engine_cs *engine =
- container_of((struct rb_node *)pos, typeof(*engine),
- uabi_node);
- list_add((struct list_head *)&engine->uabi_node, engines);
+ container_of(pos, typeof(*engine), uabi_llist);
+ list_add(&engine->uabi_list, engines);
}
list_sort(NULL, engines, engine_cmp);
}
@@ -117,7 +120,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
disabled |= (I915_SCHEDULER_CAP_ENABLED |
I915_SCHEDULER_CAP_PRIORITY);
- if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
+ if (intel_uc_uses_guc_submission(&engine->gt->uc))
enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP;
for (i = 0; i < ARRAY_SIZE(map); i++) {
@@ -202,6 +205,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16
void intel_engines_driver_register(struct drm_i915_private *i915)
{
+ u16 name_instance, other_instance = 0;
struct legacy_ring ring = {};
struct list_head *it, *next;
struct rb_node **p, *prev;
@@ -213,33 +217,33 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
p = &i915->uabi_engines.rb_node;
list_for_each_safe(it, next, &engines) {
struct intel_engine_cs *engine =
- container_of((struct rb_node *)it, typeof(*engine),
- uabi_node);
+ container_of(it, typeof(*engine), uabi_list);
if (intel_gt_has_unrecoverable_error(engine->gt))
continue; /* ignore incomplete engines */
- /*
- * We don't want to expose the GSC engine to the users, but we
- * still rename it so it is easier to identify in the debug logs
- */
- if (engine->id == GSC0) {
- engine_rename(engine, "gsc", 0);
- continue;
- }
-
GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
engine->uabi_class = uabi_classes[engine->class];
+ if (engine->uabi_class == I915_NO_UABI_CLASS) {
+ name_instance = other_instance++;
+ } else {
+ GEM_BUG_ON(engine->uabi_class >=
+ ARRAY_SIZE(i915->engine_uabi_class_count));
+ name_instance =
+ i915->engine_uabi_class_count[engine->uabi_class]++;
+ }
+ engine->uabi_instance = name_instance;
- GEM_BUG_ON(engine->uabi_class >=
- ARRAY_SIZE(i915->engine_uabi_class_count));
- engine->uabi_instance =
- i915->engine_uabi_class_count[engine->uabi_class]++;
-
- /* Replace the internal name with the final user facing name */
+ /*
+ * Replace the internal name with the final user and log facing
+ * name.
+ */
engine_rename(engine,
intel_engine_class_repr(engine->class),
- engine->uabi_instance);
+ name_instance);
+
+ if (engine->uabi_class == I915_NO_UABI_CLASS)
+ continue;
rb_link_node(&engine->uabi_node, prev, p);
rb_insert_color(&engine->uabi_node, &i915->uabi_engines);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 3c573d41d404..3df683b0402a 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -106,14 +106,20 @@
* preemption, but just sampling the new tail pointer).
*
*/
+
#include <linux/interrupt.h>
#include <linux/string_helpers.h>
+#include <drm/drm_print.h>
+
+#include "gen8_engine_cs.h"
#include "i915_drv.h"
+#include "i915_list_util.h"
#include "i915_reg.h"
+#include "i915_timer_util.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
-#include "gen8_engine_cs.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine_heartbeat.h"
@@ -405,15 +411,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
return active;
}
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
-{
- struct intel_engine_cs *engine =
- container_of(execlists, typeof(*engine), execlists);
-
- return __unwind_incomplete_requests(engine);
-}
-
static void
execlists_context_status_change(struct i915_request *rq, unsigned long status)
{
@@ -493,7 +490,7 @@ __execlists_schedule_in(struct i915_request *rq)
/* Use a fixed tag for OA and friends */
GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
ce->lrc.ccid = ce->tag;
- } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
+ } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) {
/* We don't need a strict matching tag, just different values */
unsigned int tag = ffs(READ_ONCE(engine->context_tag));
@@ -613,7 +610,7 @@ static void __execlists_schedule_out(struct i915_request * const rq,
intel_engine_add_retire(engine, ce->timeline);
ccid = ce->lrc.ccid;
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) {
ccid >>= XEHP_SW_CTX_ID_SHIFT - 32;
ccid &= XEHP_MAX_CONTEXT_HW_ID;
} else {
@@ -630,7 +627,7 @@ static void __execlists_schedule_out(struct i915_request * const rq,
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
if (engine->fw_domain && !--engine->fw_active)
intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
- intel_gt_pm_put_async(engine->gt);
+ intel_gt_pm_put_async_untracked(engine->gt);
/*
* If this is part of a virtual engine, its next request may
@@ -1907,7 +1904,7 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
head, upper_32_bits(csb), lower_32_bits(csb));
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
promote = xehp_csb_parse(csb);
else if (GRAPHICS_VER(engine->i915) >= 12)
promote = gen12_csb_parse(csb);
@@ -2018,6 +2015,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
* inspecting the queue to see if we need to resumbit.
*/
if (*prev != *execlists->active) { /* elide lite-restores */
+ struct intel_context *prev_ce = NULL, *active_ce = NULL;
+
/*
* Note the inherent discrepancy between the HW runtime,
* recorded as part of the context switch, and the CPU
@@ -2029,9 +2028,15 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
* and correct overselves later when updating from HW.
*/
if (*prev)
- lrc_runtime_stop((*prev)->context);
+ prev_ce = (*prev)->context;
if (*execlists->active)
- lrc_runtime_start((*execlists->active)->context);
+ active_ce = (*execlists->active)->context;
+ if (prev_ce != active_ce) {
+ if (prev_ce)
+ lrc_runtime_stop(prev_ce);
+ if (active_ce)
+ lrc_runtime_start(active_ce);
+ }
new_timeslice(execlists);
}
@@ -2319,6 +2324,7 @@ static u32 active_ccid(struct intel_engine_cs *engine)
static void execlists_capture(struct intel_engine_cs *engine)
{
+ struct drm_i915_private *i915 = engine->i915;
struct execlists_capture *cap;
if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
@@ -2367,7 +2373,7 @@ static void execlists_capture(struct intel_engine_cs *engine)
goto err_rq;
INIT_WORK(&cap->work, execlists_capture_work);
- schedule_work(&cap->work);
+ queue_work(i915->unordered_wq, &cap->work);
return;
err_rq:
@@ -2502,7 +2508,7 @@ static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir)
ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
ENGINE_TRACE(engine, "semaphore yield: %08x\n",
engine->execlists.yield);
- if (del_timer(&engine->execlists.timer))
+ if (timer_delete(&engine->execlists.timer))
tasklet = true;
}
@@ -2709,7 +2715,7 @@ static int emit_pdps(struct i915_request *rq)
int err, i;
u32 *cs;
- GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
+ GEM_BUG_ON(intel_vgpu_active(rq->i915));
/*
* Beware ye of the dragons, this sequence is magic!
@@ -2889,7 +2895,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine)
drm_err(&engine->i915->drm,
"engine '%s' resumed still in error: %08x\n",
engine->name, status);
- __intel_gt_reset(engine->gt, engine->mask);
+ intel_gt_reset_engine(engine);
}
/*
@@ -2992,9 +2998,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
* Wa_22011802037: In addition to stopping the cs, we need
* to wait for any pending mi force wakeups
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
intel_engine_wait_for_pending_mi_fw(engine);
engine->execlists.reset_ccid = active_ccid(engine);
@@ -3265,6 +3269,9 @@ static void execlists_park(struct intel_engine_cs *engine)
{
cancel_timer(&engine->execlists.timer);
cancel_timer(&engine->execlists.preempt);
+
+ /* Reset upon idling, or we may delay the busy wakeup. */
+ WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN);
}
static void add_to_engine(struct i915_request *rq)
@@ -3305,11 +3312,7 @@ static void remove_from_engine(struct i915_request *rq)
static bool can_preempt(struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER(engine->i915) > 8)
- return true;
-
- /* GPGPU on bdw requires extra w/a; not implemented */
- return engine->class != RENDER_CLASS;
+ return GRAPHICS_VER(engine->i915) > 8;
}
static void kick_execlists(const struct i915_request *rq, int prio)
@@ -3373,8 +3376,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
static void execlists_shutdown(struct intel_engine_cs *engine)
{
/* Synchronise with residual timers and any softirq they raise */
- del_timer_sync(&engine->execlists.timer);
- del_timer_sync(&engine->execlists.preempt);
+ timer_delete_sync(&engine->execlists.timer);
+ timer_delete_sync(&engine->execlists.preempt);
tasklet_kill(&engine->sched_engine->tasklet);
}
@@ -3472,7 +3475,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
}
}
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) {
if (intel_engine_has_preemption(engine))
engine->emit_bb_start = xehp_emit_bb_start;
else
@@ -3541,22 +3544,24 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
+ seqcount_init(&engine->stats.execlists.lock);
+
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
rcs_submission_override(engine);
lrc_init_wa_ctx(engine);
if (HAS_LOGICAL_RING_ELSQ(i915)) {
- execlists->submit_reg = uncore->regs +
+ execlists->submit_reg = intel_uncore_regs(uncore) +
i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
- execlists->ctrl_reg = uncore->regs +
+ execlists->ctrl_reg = intel_uncore_regs(uncore) +
i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore,
RING_EXECLIST_CONTROL(engine->mmio_base),
FW_REG_WRITE);
} else {
- execlists->submit_reg = uncore->regs +
+ execlists->submit_reg = intel_uncore_regs(uncore) +
i915_mmio_reg_offset(RING_ELSP(base));
}
@@ -3573,7 +3578,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
if (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) {
+ GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 55)) {
execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
}
@@ -3672,7 +3677,7 @@ static void virtual_context_destroy(struct kref *kref)
* lock, we can delegate the free of the engine to an RCU worker.
*/
INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
- queue_rcu_work(system_wq, &ve->rcu);
+ queue_rcu_work(ve->context.engine->i915->unordered_wq, &ve->rcu);
}
static void virtual_engine_initial_hint(struct virtual_engine *ve)
@@ -4150,17 +4155,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
-static unsigned long list_count(struct list_head *list)
-{
- struct list_head *pos;
- unsigned long count = 0;
-
- list_for_each(pos, list)
- count++;
-
- return count;
-}
-
void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
struct i915_request *hung_rq,
struct drm_printer *m)
@@ -4171,8 +4165,8 @@ void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
intel_engine_dump_active_requests(&engine->sched_engine->requests, hung_rq, m);
- drm_printf(m, "\tOn hold?: %lu\n",
- list_count(&engine->sched_engine->hold));
+ drm_printf(m, "\tOn hold?: %zu\n",
+ list_count_nodes(&engine->sched_engine->hold));
spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 842e69c7b21e..08c4e735481b 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -9,24 +9,30 @@
#include <linux/stop_machine.h>
#include <drm/drm_managed.h>
-#include <drm/i915_drm.h>
-#include <drm/intel-gtt.h>
+#include <drm/drm_print.h>
+#include <drm/intel/i915_drm.h>
+#include <drm/intel/intel-gtt.h>
-#include "display/intel_display.h"
#include "gem/i915_gem_lmem.h"
+#include "intel_context.h"
#include "intel_ggtt_gmch.h"
+#include "intel_gpu_commands.h"
#include "intel_gt.h"
#include "intel_gt_regs.h"
#include "intel_pci_config.h"
+#include "intel_ring.h"
#include "i915_drv.h"
#include "i915_pci.h"
+#include "i915_reg.h"
+#include "i915_request.h"
#include "i915_scatterlist.h"
#include "i915_utils.h"
#include "i915_vgpu.h"
#include "intel_gtt.h"
#include "gen8_ppgtt.h"
+#include "intel_engine_pm.h"
static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
unsigned long color,
@@ -102,11 +108,12 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
/**
* i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
* @vm: The VM to suspend the mappings for
+ * @evict_all: Evict all VMAs
*
* Suspend the memory mappings for all objects mapped to HW via the GGTT or a
* DPT page table.
*/
-void i915_ggtt_suspend_vm(struct i915_address_space *vm)
+void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all)
{
struct i915_vma *vma, *vn;
int save_skip_rewrite;
@@ -152,7 +159,7 @@ retry:
goto retry;
}
- if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
+ if (evict_all || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
i915_vma_wait_for_bind(vma);
__i915_vma_evict(vma, false);
@@ -167,13 +174,15 @@ retry:
vm->skip_pte_rewrite = save_skip_rewrite;
mutex_unlock(&vm->mutex);
+
+ drm_WARN_ON(&vm->i915->drm, evict_all && !list_empty(&vm->bound_list));
}
void i915_ggtt_suspend(struct i915_ggtt *ggtt)
{
struct intel_gt *gt;
- i915_ggtt_suspend_vm(&ggtt->vm);
+ i915_ggtt_suspend_vm(&ggtt->vm, false);
ggtt->invalidate(ggtt);
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
@@ -190,6 +199,21 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
spin_unlock_irq(&uncore->lock);
}
+static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915)
+{
+ /*
+ * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
+ * will be dropped. For WC mappings in general we have 64 byte burst
+ * writes when the WC buffer is flushed, so we can't use it, but have to
+ * resort to an uncached mapping. The WC issue is easily caught by the
+ * readback check when writing GTT PTE entries.
+ */
+ if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11)
+ return true;
+
+ return false;
+}
+
static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
{
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
@@ -197,31 +221,65 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
/*
* Note that as an uncached mmio write, this will flush the
* WCB of the writes into the GGTT before it triggers the invalidate.
+ *
+ * Only perform this when GGTT is mapped as WC, see ggtt_probe_common().
*/
- intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+ if (needs_wc_ggtt_mapping(ggtt->vm.i915))
+ intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6,
+ GFX_FLSH_CNTL_EN);
+}
+
+static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm_if_active(uncore->rpm, wakeref)
+ intel_guc_invalidate_tlb_guc(gt_to_guc(gt));
}
static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
{
struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct intel_gt *gt;
gen8_ggtt_invalidate(ggtt);
- if (GRAPHICS_VER(i915) >= 12) {
- struct intel_gt *gt;
-
- list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
+ list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
+ if (intel_guc_tlb_invalidation_is_available(gt_to_guc(gt)))
+ guc_ggtt_ct_invalidate(gt);
+ else if (GRAPHICS_VER(i915) >= 12)
intel_uncore_write_fw(gt->uncore,
GEN12_GUC_TLB_INV_CR,
GEN12_GUC_TLB_INV_CR_INVALIDATE);
- } else {
- intel_uncore_write_fw(ggtt->vm.gt->uncore,
- GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+ else
+ intel_uncore_write_fw(gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
}
}
+static u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+ unsigned int pat_index,
+ u32 flags)
+{
+ gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
+
+ WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
+
+ if (flags & PTE_LM)
+ pte |= GEN12_GGTT_PTE_LM;
+
+ if (pat_index & BIT(0))
+ pte |= MTL_GGTT_PTE_PAT0;
+
+ if (pat_index & BIT(1))
+ pte |= MTL_GGTT_PTE_PAT1;
+
+ return pte;
+}
+
u64 gen8_ggtt_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
@@ -232,33 +290,212 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
return pte;
}
+static dma_addr_t gen8_ggtt_pte_decode(u64 pte, bool *is_present, bool *is_local)
+{
+ *is_present = pte & GEN8_PAGE_PRESENT;
+ *is_local = pte & GEN12_GGTT_PTE_LM;
+
+ return pte & GEN12_GGTT_PTE_ADDR_MASK;
+}
+
+static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
+{
+ struct intel_gt *gt = ggtt->vm.gt;
+
+ return intel_gt_is_bind_context_ready(gt);
+}
+
+static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref)
+{
+ struct intel_context *ce;
+ struct intel_gt *gt = ggtt->vm.gt;
+
+ if (intel_gt_is_wedged(gt))
+ return NULL;
+
+ ce = gt->engine[BCS0]->bind_context;
+ GEM_BUG_ON(!ce);
+
+ /*
+ * If the GT is not awake already at this stage then fallback
+ * to pci based GGTT update otherwise __intel_wakeref_get_first()
+ * would conflict with fs_reclaim trying to allocate memory while
+ * doing rpm_resume().
+ */
+ *wakeref = intel_gt_pm_get_if_awake(gt);
+ if (!*wakeref)
+ return NULL;
+
+ intel_engine_pm_get(ce->engine);
+
+ return ce;
+}
+
+static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref)
+{
+ intel_engine_pm_put(ce->engine);
+ intel_gt_pm_put(ce->engine->gt, wakeref);
+}
+
+static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
+ struct sg_table *pages, u32 num_entries,
+ const gen8_pte_t pte)
+{
+ struct i915_sched_attr attr = {};
+ struct intel_gt *gt = ggtt->vm.gt;
+ const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
+ struct sgt_iter iter;
+ struct i915_request *rq;
+ struct intel_context *ce;
+ intel_wakeref_t wakeref;
+ u32 *cs;
+
+ if (!num_entries)
+ return true;
+
+ ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref);
+ if (!ce)
+ return false;
+
+ if (pages)
+ iter = __sgt_iter(pages->sgl, true);
+
+ while (num_entries) {
+ int count = 0;
+ dma_addr_t addr;
+ /*
+ * MI_UPDATE_GTT can update 512 entries in a single command but
+ * that end up with engine reset, 511 works.
+ */
+ u32 n_ptes = min_t(u32, 511, num_entries);
+
+ if (mutex_lock_interruptible(&ce->timeline->mutex))
+ goto put_ce;
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
+ intel_context_exit(ce);
+ if (IS_ERR(rq)) {
+ GT_TRACE(gt, "Failed to get bind request\n");
+ mutex_unlock(&ce->timeline->mutex);
+ goto put_ce;
+ }
+
+ cs = intel_ring_begin(rq, 2 * n_ptes + 2);
+ if (IS_ERR(cs)) {
+ GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
+ i915_request_set_error_once(rq, PTR_ERR(cs));
+ /* once a request is created, it must be queued */
+ goto queue_err_rq;
+ }
+
+ *cs++ = MI_UPDATE_GTT | (2 * n_ptes);
+ *cs++ = offset << 12;
+
+ if (pages) {
+ for_each_sgt_daddr_next(addr, iter) {
+ if (count == n_ptes)
+ break;
+ *cs++ = lower_32_bits(pte | addr);
+ *cs++ = upper_32_bits(pte | addr);
+ count++;
+ }
+ /* fill remaining with scratch pte, if any */
+ if (count < n_ptes) {
+ memset64((u64 *)cs, scratch_pte,
+ n_ptes - count);
+ cs += (n_ptes - count) * 2;
+ }
+ } else {
+ memset64((u64 *)cs, pte, n_ptes);
+ cs += n_ptes * 2;
+ }
+
+ intel_ring_advance(rq, cs);
+queue_err_rq:
+ i915_request_get(rq);
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+
+ mutex_unlock(&ce->timeline->mutex);
+ /* This will break if the request is complete or after engine reset */
+ i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+ if (rq->fence.error)
+ goto err_rq;
+
+ i915_request_put(rq);
+
+ num_entries -= n_ptes;
+ offset += n_ptes;
+ }
+
+ gen8_ggtt_bind_put_ce(ce, wakeref);
+ return true;
+
+err_rq:
+ i915_request_put(rq);
+put_ce:
+ gen8_ggtt_bind_put_ce(ce, wakeref);
+ return false;
+}
+
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
{
writeq(pte, addr);
}
+static gen8_pte_t gen8_get_pte(void __iomem *addr)
+{
+ return readq(addr);
+}
+
static void gen8_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
gen8_pte_t __iomem *pte =
(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
- gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
+ gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags));
ggtt->invalidate(ggtt);
}
+static dma_addr_t gen8_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t __iomem *pte =
+ (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ return ggtt->vm.pte_decode(gen8_get_pte(pte), is_present, is_local);
+}
+
+static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
+ dma_addr_t addr, u64 offset,
+ unsigned int pat_index, u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t pte;
+
+ pte = ggtt->vm.pte_encode(addr, pat_index, flags);
+ if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
+ gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
+ return ggtt->invalidate(ggtt);
+
+ gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
+}
+
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
- const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
gen8_pte_t __iomem *gte;
gen8_pte_t __iomem *end;
struct sgt_iter iter;
@@ -291,21 +528,118 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
ggtt->invalidate(ggtt);
}
+static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ unsigned int pat_index, u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+ gen8_pte_t pte_encode;
+ u64 start, end;
+
+ pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
+ start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
+ end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
+ if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
+ goto err;
+
+ start = end;
+ end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
+ if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
+ vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
+ goto err;
+
+ start += vma_res->node_size / I915_GTT_PAGE_SIZE;
+ if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
+ goto err;
+
+ return true;
+
+err:
+ return false;
+}
+
+static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ unsigned int pat_index, u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+ if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
+ __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
+ return ggtt->invalidate(ggtt);
+
+ gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
+}
+
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+ gen8_pte_t __iomem *gtt_base =
+ (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ for (i = 0; i < num_entries; i++)
+ gen8_set_pte(&gtt_base[i], scratch_pte);
+}
+
+static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry,
+ NULL, num_entries, scratch_pte))
+ return ggtt->invalidate(ggtt);
+
+ gen8_ggtt_clear_range(vm, start, length);
+}
+
static void gen6_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
gen6_pte_t __iomem *pte =
(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
- iowrite32(vm->pte_encode(addr, level, flags), pte);
+ iowrite32(vm->pte_encode(addr, pat_index, flags), pte);
ggtt->invalidate(ggtt);
}
+static dma_addr_t gen6_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset,
+ bool *is_present, bool *is_local)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen6_pte_t __iomem *pte =
+ (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ return vm->pte_decode(ioread32(pte), is_present, is_local);
+}
+
/*
* Binds an object into the global gtt with the specified cache level.
* The object will be accessible to the GPU via commands whose operands
@@ -314,7 +648,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
*/
static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
@@ -331,7 +665,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
iowrite32(vm->scratch[0]->encode, gte++);
end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
- iowrite32(vm->pte_encode(addr, level, flags), gte++);
+ iowrite32(vm->pte_encode(addr, pat_index, flags), gte++);
GEM_BUG_ON(gte > end);
/* Fill the allocated but "unused" space beyond the end of the buffer */
@@ -366,14 +700,15 @@ struct insert_page {
struct i915_address_space *vm;
dma_addr_t addr;
u64 offset;
- enum i915_cache_level level;
+ unsigned int pat_index;
};
static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
{
struct insert_page *arg = _arg;
- gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+ gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset,
+ arg->pat_index, 0);
bxt_vtd_ggtt_wa(arg->vm);
return 0;
@@ -382,10 +717,10 @@ static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 unused)
{
- struct insert_page arg = { vm, addr, offset, level };
+ struct insert_page arg = { vm, addr, offset, pat_index };
stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
}
@@ -393,7 +728,7 @@ static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
struct insert_entries {
struct i915_address_space *vm;
struct i915_vma_resource *vma_res;
- enum i915_cache_level level;
+ unsigned int pat_index;
u32 flags;
};
@@ -401,7 +736,8 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
{
struct insert_entries *arg = _arg;
- gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
+ gen8_ggtt_insert_entries(arg->vm, arg->vma_res,
+ arg->pat_index, arg->flags);
bxt_vtd_ggtt_wa(arg->vm);
return 0;
@@ -409,10 +745,10 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
- struct insert_entries arg = { vm, vma_res, level, flags };
+ struct insert_entries arg = { vm, vma_res, pat_index, flags };
stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
}
@@ -441,7 +777,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
void intel_ggtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags)
{
u32 pte_flags;
@@ -458,7 +794,7 @@ void intel_ggtt_bind_vma(struct i915_address_space *vm,
if (vma_res->bi.lmem)
pte_flags |= PTE_LM;
- vm->insert_entries(vm, vma_res, cache_level, pte_flags);
+ vm->insert_entries(vm, vma_res, pat_index, pte_flags);
vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
}
@@ -468,20 +804,39 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm,
vm->clear_range(vm, vma_res->start, vma_res->vma_size);
}
+dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+ return ggtt->vm.read_entry(vm, offset, is_present, is_local);
+}
+
+/*
+ * Reserve the top of the GuC address space for firmware images. Addresses
+ * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
+ * which makes for a suitable range to hold GuC/HuC firmware images if the
+ * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT
+ * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk
+ * of the same size anyway, which is far more than needed, to keep the logic
+ * in uc_fw_ggtt_offset() simple.
+ */
+#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP)
+
static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
{
- u64 size;
+ u64 offset;
int ret;
if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
return 0;
- GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
- size = ggtt->vm.total - GUC_GGTT_TOP;
+ GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE);
+ offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE;
- ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
- GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
- PIN_NOEVICT);
+ ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw,
+ GUC_TOP_RESERVE_SIZE, offset,
+ I915_COLOR_UNEVICTABLE, PIN_NOEVICT);
if (ret)
drm_dbg(&ggtt->vm.i915->drm,
"Failed to reserve top of GGTT for GuC\n");
@@ -551,8 +906,12 @@ static int init_ggtt(struct i915_ggtt *ggtt)
* paths, and we trust that 0 will remain reserved. However,
* the only likely reason for failure to insert is a driver
* bug, which we expect to cause other failures...
+ *
+ * Since CPU can perform speculative reads on error capture
+ * (write-combining allows it) add scratch page after error
+ * capture to avoid DMAR errors.
*/
- ggtt->error_capture.size = I915_GTT_PAGE_SIZE;
+ ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE;
ggtt->error_capture.color = I915_COLOR_UNEVICTABLE;
if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture))
drm_mm_insert_node_in_range(&ggtt->vm.mm,
@@ -562,11 +921,15 @@ static int init_ggtt(struct i915_ggtt *ggtt)
0, ggtt->mappable_end,
DRM_MM_INSERT_LOW);
}
- if (drm_mm_node_allocated(&ggtt->error_capture))
+ if (drm_mm_node_allocated(&ggtt->error_capture)) {
+ u64 start = ggtt->error_capture.start;
+ u64 size = ggtt->error_capture.size;
+
+ ggtt->vm.scratch_range(&ggtt->vm, start, size);
drm_dbg(&ggtt->vm.i915->drm,
"Reserved GGTT:[%llx, %llx] for use by error capture\n",
- ggtt->error_capture.start,
- ggtt->error_capture.start + ggtt->error_capture.size);
+ start, start + size);
+ }
/*
* The upper portion of the GuC address space has a sizeable hole
@@ -599,7 +962,7 @@ err:
static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags)
{
u32 pte_flags;
@@ -611,10 +974,10 @@ static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
if (flags & I915_VMA_LOCAL_BIND)
ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
- stash, vma_res, cache_level, flags);
+ stash, vma_res, pat_index, flags);
if (flags & I915_VMA_GLOBAL_BIND)
- vm->insert_entries(vm, vma_res, cache_level, pte_flags);
+ vm->insert_entries(vm, vma_res, pat_index, pte_flags);
vma_res->bound_flags |= flags;
}
@@ -832,25 +1195,26 @@ static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
{
struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
phys_addr_t phys_addr;
u32 pte_flags;
int ret;
GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
- phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
- /*
- * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
- * will be dropped. For WC mappings in general we have 64 byte burst
- * writes when the WC buffer is flushed, so we can't use it, but have to
- * resort to an uncached mapping. The WC issue is easily caught by the
- * readback check when writing GTT PTE entries.
- */
- if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
- ggtt->gsm = ioremap(phys_addr, size);
- else
+ if (i915_direct_stolen_access(i915)) {
+ drm_dbg(&i915->drm, "Using direct GSM access\n");
+ phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK;
+ } else {
+ phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
+ }
+
+ if (needs_wc_ggtt_mapping(i915))
ggtt->gsm = ioremap_wc(phys_addr, size);
+ else
+ ggtt->gsm = ioremap(phys_addr, size);
+
if (!ggtt->gsm) {
drm_err(&i915->drm, "Failed to map the ggtt page table\n");
return -ENOMEM;
@@ -871,7 +1235,9 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
ggtt->vm.scratch[0]->encode =
ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
- I915_CACHE_NONE, pte_flags);
+ i915_gem_get_pat_index(i915,
+ I915_CACHE_NONE),
+ pte_flags);
return 0;
}
@@ -919,8 +1285,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.cleanup = gen6_gmch_remove;
ggtt->vm.insert_page = gen8_ggtt_insert_page;
ggtt->vm.clear_range = nop_clear_range;
+ ggtt->vm.scratch_range = gen8_ggtt_clear_range;
ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
+ ggtt->vm.read_entry = gen8_ggtt_read_entry;
/*
* Serialize GTT updates with aperture access on BXT if VT-d is on,
@@ -943,7 +1311,18 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
}
- if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
+ if (i915_ggtt_require_binder(i915)) {
+ ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
+ ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
+ ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
+ /*
+ * On GPU is hung, we might bind VMAs for error capture.
+ * Fallback to CPU GGTT updates in that case.
+ */
+ ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
+ }
+
+ if (intel_uc_wants_guc_submission(&ggtt->vm.gt->uc))
ggtt->invalidate = guc_ggtt_invalidate;
else
ggtt->invalidate = gen8_ggtt_invalidate;
@@ -951,18 +1330,28 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
- ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
+ else
+ ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+
+ ggtt->vm.pte_decode = gen8_ggtt_pte_decode;
return ggtt_probe_common(ggtt, size);
}
+/*
+ * For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
+ * so the switch-case statements in these PTE encode functions are still valid.
+ * See translation table LEGACY_CACHELEVEL.
+ */
static u64 snb_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- switch (level) {
+ switch (pat_index) {
case I915_CACHE_L3_LLC:
case I915_CACHE_LLC:
pte |= GEN6_PTE_CACHE_LLC;
@@ -971,19 +1360,19 @@ static u64 snb_pte_encode(dma_addr_t addr,
pte |= GEN6_PTE_UNCACHED;
break;
default:
- MISSING_CASE(level);
+ MISSING_CASE(pat_index);
}
return pte;
}
static u64 ivb_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- switch (level) {
+ switch (pat_index) {
case I915_CACHE_L3_LLC:
pte |= GEN7_PTE_CACHE_L3_LLC;
break;
@@ -994,14 +1383,14 @@ static u64 ivb_pte_encode(dma_addr_t addr,
pte |= GEN6_PTE_UNCACHED;
break;
default:
- MISSING_CASE(level);
+ MISSING_CASE(pat_index);
}
return pte;
}
static u64 byt_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
@@ -1009,31 +1398,31 @@ static u64 byt_pte_encode(dma_addr_t addr,
if (!(flags & PTE_READ_ONLY))
pte |= BYT_PTE_WRITEABLE;
- if (level != I915_CACHE_NONE)
+ if (pat_index != I915_CACHE_NONE)
pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
return pte;
}
static u64 hsw_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- if (level != I915_CACHE_NONE)
+ if (pat_index != I915_CACHE_NONE)
pte |= HSW_WB_LLC_AGE3;
return pte;
}
static u64 iris_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags)
{
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
- switch (level) {
+ switch (pat_index) {
case I915_CACHE_NONE:
break;
case I915_CACHE_WT:
@@ -1047,6 +1436,14 @@ static u64 iris_pte_encode(dma_addr_t addr,
return pte;
}
+static dma_addr_t gen6_pte_decode(u64 pte, bool *is_present, bool *is_local)
+{
+ *is_present = pte & GEN6_PTE_VALID;
+ *is_local = false;
+
+ return ((pte & 0xff0) << 28) | (pte & ~0xfff);
+}
+
static int gen6_gmch_probe(struct i915_ggtt *ggtt)
{
struct drm_i915_private *i915 = ggtt->vm.i915;
@@ -1082,8 +1479,10 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.clear_range = nop_clear_range;
if (!HAS_FULL_PPGTT(i915))
ggtt->vm.clear_range = gen6_ggtt_clear_range;
+ ggtt->vm.scratch_range = gen6_ggtt_clear_range;
ggtt->vm.insert_page = gen6_ggtt_insert_page;
ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
+ ggtt->vm.read_entry = gen6_ggtt_read_entry;
ggtt->vm.cleanup = gen6_gmch_remove;
ggtt->invalidate = gen6_ggtt_invalidate;
@@ -1099,6 +1498,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
else
ggtt->vm.pte_encode = snb_pte_encode;
+ ggtt->vm.pte_decode = gen6_pte_decode;
+
ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
@@ -1204,6 +1605,7 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915)
/**
* i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM
* @vm: The VM to restore the mappings for
+ * @all_evicted: Were all VMAs expected to be evicted on suspend?
*
* Restore the memory mappings for all objects mapped to HW via the GGTT or a
* DPT page table.
@@ -1211,13 +1613,18 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915)
* Returns %true if restoring the mapping for any object that was in a write
* domain before suspend.
*/
-bool i915_ggtt_resume_vm(struct i915_address_space *vm)
+bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted)
{
struct i915_vma *vma;
bool write_domain_objs = false;
drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
+ if (all_evicted) {
+ drm_WARN_ON(&vm->i915->drm, !list_empty(&vm->bound_list));
+ return false;
+ }
+
/* First fill our portion of the GTT with scratch pages */
vm->clear_range(vm, 0, vm->total);
@@ -1235,7 +1642,9 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
*/
vma->resource->bound_flags = 0;
vma->ops->bind_vma(vm, NULL, vma->resource,
- obj ? obj->cache_level : 0,
+ obj ? obj->pat_index :
+ i915_gem_get_pat_index(vm->i915,
+ I915_CACHE_NONE),
was_bound);
if (obj) { /* only used during resume => exclusive access */
@@ -1255,7 +1664,14 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
intel_gt_check_and_clear_faults(gt);
- flush = i915_ggtt_resume_vm(&ggtt->vm);
+ flush = i915_ggtt_resume_vm(&ggtt->vm, false);
+
+ if (drm_mm_node_allocated(&ggtt->error_capture))
+ ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
+ ggtt->error_capture.size);
+
+ list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
+ intel_uc_resume_mappings(&gt->uc);
ggtt->invalidate(ggtt);
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index 37d0b0fe791d..5eda98ebc1ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -5,6 +5,8 @@
#include <linux/highmem.h>
+#include <drm/drm_print.h>
+
#include "display/intel_display.h"
#include "i915_drv.h"
#include "i915_reg.h"
@@ -298,6 +300,7 @@ void i915_vma_revoke_fence(struct i915_vma *vma)
return;
GEM_BUG_ON(fence->vma != vma);
+ i915_active_wait(&fence->active);
GEM_BUG_ON(!i915_active_is_idle(&fence->active));
GEM_BUG_ON(atomic_read(&fence->pin_count));
@@ -327,6 +330,7 @@ static bool fence_is_active(const struct i915_fence_reg *fence)
static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
{
+ struct intel_display *display = ggtt->vm.i915->display;
struct i915_fence_reg *active = NULL;
struct i915_fence_reg *fence, *fn;
@@ -352,7 +356,7 @@ static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
}
/* Wait for completion of pending flips which consume fences */
- if (intel_has_pending_fb_unpin(ggtt->vm.i915))
+ if (intel_has_pending_fb_unpin(display))
return ERR_PTR(-EAGAIN);
return ERR_PTR(-ENOBUFS);
@@ -417,7 +421,6 @@ out_unpin:
* For an untiled surface, this removes any existing fence.
*
* Returns:
- *
* 0 on success, negative error code on failure.
*/
int i915_vma_pin_fence(struct i915_vma *vma)
@@ -749,7 +752,7 @@ static void swizzle_page(struct page *page)
char *vaddr;
int i;
- vaddr = kmap(page);
+ vaddr = kmap_local_page(page);
for (i = 0; i < PAGE_SIZE; i += 128) {
memcpy(temp, &vaddr[i], 64);
@@ -757,7 +760,7 @@ static void swizzle_page(struct page *page)
memcpy(&vaddr[i + 64], temp, 64);
}
- kunmap(page);
+ kunmap_local(vaddr);
}
/**
@@ -818,7 +821,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
if (obj->bit_17 == NULL) {
obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL);
if (obj->bit_17 == NULL) {
- drm_err(&to_i915(obj->base.dev)->drm,
+ drm_err(obj->base.dev,
"Failed to allocate memory for bit 17 record\n");
return;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
index 77c793812eb4..cc5d345c5e29 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
@@ -5,7 +5,8 @@
#include "intel_ggtt_gmch.h"
-#include <drm/intel-gtt.h>
+#include <drm/drm_print.h>
+#include <drm/intel/intel-gtt.h>
#include <linux/agp_backend.h>
@@ -18,21 +19,28 @@
static void gmch_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 unused)
{
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ unsigned int flags = (pat_index == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
}
+static dma_addr_t gmch_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local)
+{
+ return intel_gmch_gtt_read_entry(offset >> PAGE_SHIFT,
+ is_present, is_local);
+}
+
static void gmch_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 unused)
{
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ unsigned int flags = (pat_index == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
@@ -102,6 +110,8 @@ int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.insert_page = gmch_ggtt_insert_page;
ggtt->vm.insert_entries = gmch_ggtt_insert_entries;
ggtt->vm.clear_range = gmch_ggtt_clear_range;
+ ggtt->vm.scratch_range = gmch_ggtt_clear_range;
+ ggtt->vm.read_entry = gmch_ggtt_read_entry;
ggtt->vm.cleanup = gmch_ggtt_remove;
ggtt->invalidate = gmch_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 2af1ae3831df..5394bc7d4daf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -121,6 +121,7 @@
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
+#define MI_SEMAPHORE_REGISTER_POLL (1 << 16)
#define MI_SEMAPHORE_POLL (1 << 15)
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@@ -219,6 +220,7 @@
#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2)
+#define CMD_3DSTATE_MESH_CONTROL ((0x3 << 29) | (0x3 << 27) | (0x0 << 24) | (0x77 << 16) | (0x3))
#define XY_CTRL_SURF_INSTR_SIZE 5
#define MI_FLUSH_DW_SIZE 3
@@ -299,6 +301,7 @@
#define PIPE_CONTROL_QW_WRITE (1<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
+#define PIPE_CONTROL_CCS_FLUSH (1<<13) /* MTL+ */
#define PIPE_CONTROL_WRITE_FLUSH (1<<12)
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
@@ -394,6 +397,7 @@
#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
+#define MI_DO_COMPARE REG_BIT(21)
#define STATE_BASE_ADDRESS \
((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16))
@@ -439,6 +443,8 @@
#define GSC_FW_LOAD GSC_INSTR(1, 0, 2)
#define HECI1_FW_LIMIT_VALID (1 << 31)
+#define GSC_HECI_CMD_PKT GSC_INSTR(0, 0, 6)
+
/*
* Used to convert any address to canonical form.
* Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c
index bcc3605158db..c43febc862dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.c
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.c
@@ -11,6 +11,7 @@
#include "gem/i915_gem_region.h"
#include "gt/intel_gsc.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_print.h"
#define GSC_BAR_LENGTH 0x00000FFC
@@ -49,13 +50,13 @@ gsc_ext_om_alloc(struct intel_gsc *gsc, struct intel_gsc_intf *intf, size_t size
I915_BO_ALLOC_CONTIGUOUS |
I915_BO_ALLOC_CPU_CLEAR);
if (IS_ERR(obj)) {
- drm_err(&gt->i915->drm, "Failed to allocate gsc memory\n");
+ gt_err(gt, "Failed to allocate gsc memory\n");
return PTR_ERR(obj);
}
err = i915_gem_object_pin_pages_unlocked(obj);
if (err) {
- drm_err(&gt->i915->drm, "Failed to pin pages for gsc memory\n");
+ gt_err(gt, "Failed to pin pages for gsc memory\n");
goto out_put;
}
@@ -102,19 +103,6 @@ static const struct gsc_def gsc_def_dg1[] = {
}
};
-static const struct gsc_def gsc_def_xehpsdv[] = {
- {
- /* HECI1 not enabled on the device. */
- },
- {
- .name = "mei-gscfi",
- .bar = DG1_GSC_HECI2_BASE,
- .bar_size = GSC_BAR_LENGTH,
- .use_polling = true,
- .slow_firmware = true,
- }
-};
-
static const struct gsc_def gsc_def_dg2[] = {
{
.name = "mei-gsc",
@@ -187,8 +175,6 @@ static void gsc_init_one(struct drm_i915_private *i915, struct intel_gsc *gsc,
if (IS_DG1(i915)) {
def = &gsc_def_dg1[intf_id];
- } else if (IS_XEHPSDV(i915)) {
- def = &gsc_def_xehpsdv[intf_id];
} else if (IS_DG2(i915)) {
def = &gsc_def_dg2[intf_id];
} else {
@@ -286,21 +272,21 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id)
int ret;
if (intf_id >= INTEL_GSC_NUM_INTERFACES) {
- drm_warn_once(&gt->i915->drm, "GSC irq: intf_id %d is out of range", intf_id);
+ gt_warn_once(gt, "GSC irq: intf_id %d is out of range", intf_id);
return;
}
if (!HAS_HECI_GSC(gt->i915)) {
- drm_warn_once(&gt->i915->drm, "GSC irq: not supported");
+ gt_warn_once(gt, "GSC irq: not supported");
return;
}
if (gt->gsc.intf[intf_id].irq < 0)
return;
- ret = generic_handle_irq(gt->gsc.intf[intf_id].irq);
+ ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq);
if (ret)
- drm_err_ratelimited(&gt->i915->drm, "error handling GSC irq: %d\n", ret);
+ gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret);
}
void intel_gsc_irq_handler(struct intel_gt *gt, u32 iir)
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.h b/drivers/gpu/drm/i915/gt/intel_gsc.h
index fcac1775e9c3..013c64251448 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.h
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.h
@@ -21,8 +21,11 @@ struct mei_aux_device;
/**
* struct intel_gsc - graphics security controller
*
- * @gem_obj: scratch memory GSC operations
- * @intf : gsc interface
+ * @intf: gsc interface
+ * @intf.adev: MEI aux. device for this @intf
+ * @intf.gem_obj: scratch memory GSC operations
+ * @intf.irq: IRQ for this device (%-1 for no IRQ)
+ * @intf.id: this interface's id number/index
*/
struct intel_gsc {
struct intel_gsc_intf {
@@ -33,7 +36,7 @@ struct intel_gsc {
} intf[INTEL_GSC_NUM_INTERFACES];
};
-void intel_gsc_init(struct intel_gsc *gsc, struct drm_i915_private *dev_priv);
+void intel_gsc_init(struct intel_gsc *gsc, struct drm_i915_private *i915);
void intel_gsc_fini(struct intel_gsc *gsc);
void intel_gsc_irq_handler(struct intel_gt *gt, u32 iir);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index f0dbfc434e07..3d3b1ba76e2b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -4,7 +4,7 @@
*/
#include <drm/drm_managed.h>
-#include <drm/intel-gtt.h>
+#include <drm/intel/intel-gtt.h>
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
@@ -28,12 +28,12 @@
#include "intel_migrate.h"
#include "intel_mocs.h"
#include "intel_pci_config.h"
-#include "intel_pm.h"
#include "intel_rc6.h"
#include "intel_renderstate.h"
#include "intel_rps.h"
#include "intel_sa_media.h"
#include "intel_gt_sysfs.h"
+#include "intel_tlb.h"
#include "intel_uncore.h"
#include "shmem_utils.h"
@@ -51,8 +51,7 @@ void intel_gt_common_init_early(struct intel_gt *gt)
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
intel_gt_init_timelines(gt);
- mutex_init(&gt->tlb.invalidate_lock);
- seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
+ intel_gt_init_tlb(gt);
intel_gt_pm_init_early(gt);
intel_wopcm_init_early(&gt->wopcm);
@@ -63,7 +62,13 @@ void intel_gt_common_init_early(struct intel_gt *gt)
/* Preliminary initialization of Tile 0 */
int intel_root_gt_init_early(struct drm_i915_private *i915)
{
- struct intel_gt *gt = to_gt(i915);
+ struct intel_gt *gt;
+
+ gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL);
+ if (!gt)
+ return -ENOMEM;
+
+ i915->gt[0] = gt;
gt->i915 = i915;
gt->uncore = &i915->uncore;
@@ -180,7 +185,7 @@ int intel_gt_init_hw(struct intel_gt *gt)
if (IS_HASWELL(i915))
intel_uncore_write(uncore,
HSW_MI_PREDICATE_RESULT_2,
- IS_HSW_GT3(i915) ?
+ INTEL_INFO(i915)->gt == 3 ?
LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
/* Apply the GT workarounds... */
@@ -263,10 +268,21 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
I915_MASTER_ERROR_INTERRUPT);
}
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ /*
+ * For the media GT, this ring fault register is not replicated,
+ * so don't do multicast/replicated register read/write operation on it.
+ */
+ if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) {
+ intel_uncore_rmw(uncore, XELPMP_RING_FAULT_REG,
+ RING_FAULT_VALID, 0);
+ intel_uncore_posting_read(uncore,
+ XELPMP_RING_FAULT_REG);
+
+ } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
RING_FAULT_VALID, 0);
intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+
} else if (GRAPHICS_VER(i915) >= 12) {
intel_uncore_rmw(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID, 0);
intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
@@ -286,25 +302,48 @@ static void gen6_check_faults(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- u32 fault;
for_each_engine(engine, gt, id) {
+ u32 fault;
+
fault = GEN6_RING_FAULT_REG_READ(engine);
+
if (fault & RING_FAULT_VALID) {
gt_dbg(gt, "Unexpected fault\n"
- "\tAddr: 0x%08lx\n"
+ "\tAddr: 0x%08x\n"
"\tAddress space: %s\n"
"\tSource ID: %d\n"
"\tType: %d\n",
- fault & PAGE_MASK,
+ fault & RING_FAULT_VADDR_MASK,
fault & RING_FAULT_GTTSEL_MASK ?
"GGTT" : "PPGTT",
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
+ REG_FIELD_GET(RING_FAULT_SRCID_MASK, fault),
+ REG_FIELD_GET(RING_FAULT_FAULT_TYPE_MASK, fault));
}
}
}
+static void gen8_report_fault(struct intel_gt *gt, u32 fault,
+ u32 fault_data0, u32 fault_data1)
+{
+ u64 fault_addr;
+
+ fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
+ ((u64)fault_data0 << 12);
+
+ gt_dbg(gt, "Unexpected fault\n"
+ "\tAddr: 0x%08x_%08x\n"
+ "\tAddress space: %s\n"
+ "\tEngine ID: %d\n"
+ "\tSource ID: %d\n"
+ "\tType: %d\n",
+ upper_32_bits(fault_addr), lower_32_bits(fault_addr),
+ fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+ REG_FIELD_GET(RING_FAULT_ENGINE_ID_MASK, fault),
+ REG_FIELD_GET(RING_FAULT_SRCID_MASK, fault),
+ REG_FIELD_GET(RING_FAULT_FAULT_TYPE_MASK, fault));
+}
+
static void xehp_check_faults(struct intel_gt *gt)
{
u32 fault;
@@ -317,28 +356,10 @@ static void xehp_check_faults(struct intel_gt *gt)
* toward the primary instance.
*/
fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
- if (fault & RING_FAULT_VALID) {
- u32 fault_data0, fault_data1;
- u64 fault_addr;
-
- fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
- fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
-
- fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
- ((u64)fault_data0 << 12);
-
- gt_dbg(gt, "Unexpected fault\n"
- "\tAddr: 0x%08x_%08x\n"
- "\tAddress space: %s\n"
- "\tEngine ID: %d\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- upper_32_bits(fault_addr), lower_32_bits(fault_addr),
- fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
- GEN8_RING_FAULT_ENGINE_ID(fault),
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
- }
+ if (fault & RING_FAULT_VALID)
+ gen8_report_fault(gt, fault,
+ intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0),
+ intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1));
}
static void gen8_check_faults(struct intel_gt *gt)
@@ -358,28 +379,10 @@ static void gen8_check_faults(struct intel_gt *gt)
}
fault = intel_uncore_read(uncore, fault_reg);
- if (fault & RING_FAULT_VALID) {
- u32 fault_data0, fault_data1;
- u64 fault_addr;
-
- fault_data0 = intel_uncore_read(uncore, fault_data0_reg);
- fault_data1 = intel_uncore_read(uncore, fault_data1_reg);
-
- fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
- ((u64)fault_data0 << 12);
-
- gt_dbg(gt, "Unexpected fault\n"
- "\tAddr: 0x%08x_%08x\n"
- "\tAddress space: %s\n"
- "\tEngine ID: %d\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- upper_32_bits(fault_addr), lower_32_bits(fault_addr),
- fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
- GEN8_RING_FAULT_ENGINE_ID(fault),
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
- }
+ if (fault & RING_FAULT_VALID)
+ gen8_report_fault(gt, fault,
+ intel_uncore_read(uncore, fault_data0_reg),
+ intel_uncore_read(uncore, fault_data1_reg));
}
void intel_gt_check_and_clear_faults(struct intel_gt *gt)
@@ -387,7 +390,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
/* From GEN8 onwards we only have one 'All Engine Fault Register' */
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
xehp_check_faults(gt);
else if (GRAPHICS_VER(i915) >= 8)
gen8_check_faults(gt);
@@ -435,7 +438,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
spin_lock_irqsave(&uncore->lock, flags);
intel_uncore_posting_read_fw(uncore,
- RING_HEAD(RENDER_RING_BASE));
+ RING_TAIL(RENDER_RING_BASE));
spin_unlock_irqrestore(&uncore->lock, flags);
}
}
@@ -467,7 +470,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
obj = i915_gem_object_create_lmem(i915, size,
I915_BO_ALLOC_VOLATILE |
I915_BO_ALLOC_GPU_ONLY);
- if (IS_ERR(obj))
+ if (IS_ERR(obj) && !IS_METEORLAKE(i915)) /* Wa_22018444074 */
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
obj = i915_gem_object_create_internal(i915, size);
@@ -737,12 +740,12 @@ int intel_gt_init(struct intel_gt *gt)
if (err)
goto err_gt;
- intel_uc_init_late(&gt->uc);
-
err = i915_inject_probe_error(gt->i915, -EIO);
if (err)
goto err_gt;
+ intel_uc_init_late(&gt->uc);
+
intel_migrate_init(&gt->migrate, gt);
goto out_fw;
@@ -785,6 +788,29 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
intel_gsc_fini(&gt->gsc);
/*
+ * If we unload the driver and wedge before the GSC worker is complete,
+ * the worker will hit an error on its submission to the GSC engine and
+ * then exit. This is hard to hit for a user, but it is reproducible
+ * with skipping selftests. The error is handled gracefully by the
+ * worker, so there are no functional issues, but we still end up with
+ * an error message in dmesg, which is something we want to avoid as
+ * this is a supported scenario. We could modify the worker to better
+ * handle a wedging occurring during its execution, but that gets
+ * complicated for a couple of reasons:
+ * - We do want the error on runtime wedging, because there are
+ * implications for subsystems outside of GT (i.e., PXP, HDCP), it's
+ * only the error on driver unload that we want to silence.
+ * - The worker is responsible for multiple submissions (GSC FW load,
+ * HuC auth, SW proxy), so all of those will have to be adapted to
+ * handle the wedged_on_fini scenario.
+ * Therefore, it's much simpler to just wait for the worker to be done
+ * before wedging on driver removal, also considering that the worker
+ * will likely already be idle in the great majority of non-selftest
+ * scenarios.
+ */
+ intel_gsc_uc_flush_work(&gt->uc.gsc);
+
+ /*
* Upon unregistering the device to prevent any new users, cancel
* all in-flight requests so that we can quickly unbind the active
* resources.
@@ -793,7 +819,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
/* Scrub all HW state upon release */
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- __intel_gt_reset(gt, ALL_ENGINES);
+ intel_gt_reset_all_engines(gt);
}
void intel_gt_driver_release(struct intel_gt *gt)
@@ -824,7 +850,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
intel_gt_fini_timelines(gt);
- mutex_destroy(&gt->tlb.invalidate_lock);
+ intel_gt_fini_tlb(gt);
intel_engines_free(gt);
}
}
@@ -865,7 +891,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
int intel_gt_probe_all(struct drm_i915_private *i915)
{
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
- struct intel_gt *gt = &i915->gt0;
+ struct intel_gt *gt = to_gt(i915);
const struct intel_gt_definition *gtdef;
phys_addr_t phys_addr;
unsigned int mmio_bar;
@@ -882,15 +908,13 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
*/
gt->i915 = i915;
gt->name = "Primary GT";
- gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
+ gt->info.engine_mask = INTEL_INFO(i915)->platform_engine_mask;
gt_dbg(gt, "Setting up %s\n", gt->name);
ret = intel_gt_tile_setup(gt, phys_addr);
if (ret)
return ret;
- i915->gt[0] = gt;
-
if (!HAS_EXTRA_GT_LIST(i915))
return 0;
@@ -945,8 +969,6 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
err:
i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret);
- intel_gt_release_all(i915);
-
return ret;
}
@@ -965,15 +987,6 @@ int intel_gt_tiles_init(struct drm_i915_private *i915)
return 0;
}
-void intel_gt_release_all(struct drm_i915_private *i915)
-{
- struct intel_gt *gt;
- unsigned int id;
-
- for_each_gt(gt, i915, id)
- i915->gt[id] = NULL;
-}
-
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p)
{
@@ -982,226 +995,78 @@ void intel_gt_info_print(const struct intel_gt_info *info,
intel_sseu_dump(&info->sseu, p);
}
-struct reg_and_bit {
- union {
- i915_reg_t reg;
- i915_mcr_reg_t mcr_reg;
- };
- u32 bit;
-};
-
-static struct reg_and_bit
-get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
- const i915_reg_t *regs, const unsigned int num)
+enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
+ struct drm_i915_gem_object *obj,
+ bool always_coherent)
{
- const unsigned int class = engine->class;
- struct reg_and_bit rb = { };
-
- if (gt_WARN_ON_ONCE(engine->gt, class >= num || !regs[class].reg))
- return rb;
-
- rb.reg = regs[class];
- if (gen8 && class == VIDEO_DECODE_CLASS)
- rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+ /*
+ * Wa_22016122933: always return I915_MAP_WC for Media
+ * version 13.0 when the object is on the Media GT
+ */
+ if (i915_gem_object_is_lmem(obj) || intel_gt_needs_wa_22016122933(gt))
+ return I915_MAP_WC;
+ if (HAS_LLC(gt->i915) || always_coherent)
+ return I915_MAP_WB;
else
- rb.bit = engine->instance;
-
- rb.bit = BIT(rb.bit);
-
- return rb;
+ return I915_MAP_WC;
}
-/*
- * HW architecture suggest typical invalidation time at 40us,
- * with pessimistic cases up to 100us and a recommendation to
- * cap at 1ms. We go a bit higher just in case.
- */
-#define TLB_INVAL_TIMEOUT_US 100
-#define TLB_INVAL_TIMEOUT_MS 4
-
-/*
- * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
- * but are now considered MCR registers. Since they exist within a GAM range,
- * the primary instance of the register rolls up the status from each unit.
- */
-static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt)
{
- if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
- return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
- TLB_INVAL_TIMEOUT_US,
- TLB_INVAL_TIMEOUT_MS);
- else
- return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
- TLB_INVAL_TIMEOUT_US,
- TLB_INVAL_TIMEOUT_MS,
- NULL);
+ /* Wa_16018031267, Wa_16018063123 */
+ return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71));
}
-static void mmio_invalidate_full(struct intel_gt *gt)
+bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
{
- static const i915_reg_t gen8_regs[] = {
- [RENDER_CLASS] = GEN8_RTCR,
- [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
- [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
- [COPY_ENGINE_CLASS] = GEN8_BTCR,
- };
- static const i915_reg_t gen12_regs[] = {
- [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
- [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
- [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
- [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
- [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
- };
- static const i915_mcr_reg_t xehp_regs[] = {
- [RENDER_CLASS] = XEHP_GFX_TLB_INV_CR,
- [VIDEO_DECODE_CLASS] = XEHP_VD_TLB_INV_CR,
- [VIDEO_ENHANCEMENT_CLASS] = XEHP_VE_TLB_INV_CR,
- [COPY_ENGINE_CLASS] = XEHP_BLT_TLB_INV_CR,
- [COMPUTE_CLASS] = XEHP_COMPCTX_TLB_INV_CR,
- };
- struct drm_i915_private *i915 = gt->i915;
- struct intel_uncore *uncore = gt->uncore;
- struct intel_engine_cs *engine;
- intel_engine_mask_t awake, tmp;
- enum intel_engine_id id;
- const i915_reg_t *regs;
- unsigned int num = 0;
- unsigned long flags;
-
- /*
- * New platforms should not be added with catch-all-newer (>=)
- * condition so that any later platform added triggers the below warning
- * and in turn mandates a human cross-check of whether the invalidation
- * flows have compatible semantics.
- *
- * For instance with the 11.00 -> 12.00 transition three out of five
- * respective engine registers were moved to masked type. Then after the
- * 12.00 -> 12.50 transition multi cast handling is required too.
- */
-
- if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
- GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
- regs = NULL;
- num = ARRAY_SIZE(xehp_regs);
- } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
- GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
- regs = gen12_regs;
- num = ARRAY_SIZE(gen12_regs);
- } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
- regs = gen8_regs;
- num = ARRAY_SIZE(gen8_regs);
- } else if (GRAPHICS_VER(i915) < 8) {
- return;
- }
-
- if (gt_WARN_ONCE(gt, !num, "Platform does not implement TLB invalidation!"))
- return;
-
- intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
- intel_gt_mcr_lock(gt, &flags);
- spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
-
- awake = 0;
- for_each_engine(engine, gt, id) {
- struct reg_and_bit rb;
-
- if (!intel_engine_pm_is_awake(engine))
- continue;
-
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
- u32 val = BIT(engine->instance);
-
- if (engine->class == VIDEO_DECODE_CLASS ||
- engine->class == VIDEO_ENHANCEMENT_CLASS ||
- engine->class == COMPUTE_CLASS)
- val = _MASKED_BIT_ENABLE(val);
- intel_gt_mcr_multicast_write_fw(gt,
- xehp_regs[engine->class],
- val);
- } else {
- rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
- if (!i915_mmio_reg_offset(rb.reg))
- continue;
-
- if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
- engine->class == VIDEO_ENHANCEMENT_CLASS ||
- engine->class == COMPUTE_CLASS))
- rb.bit = _MASKED_BIT_ENABLE(rb.bit);
-
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
- }
- awake |= engine->mask;
- }
-
- GT_TRACE(gt, "invalidated engines %08x\n", awake);
-
- /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
- if (awake &&
- (IS_TIGERLAKE(i915) ||
- IS_DG1(i915) ||
- IS_ROCKETLAKE(i915) ||
- IS_ALDERLAKE_S(i915) ||
- IS_ALDERLAKE_P(i915)))
- intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
-
- spin_unlock(&uncore->lock);
- intel_gt_mcr_unlock(gt, flags);
-
- for_each_engine_masked(engine, gt, awake, tmp) {
- struct reg_and_bit rb;
-
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
- rb.mcr_reg = xehp_regs[engine->class];
- rb.bit = BIT(engine->instance);
- } else {
- rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
- }
-
- if (wait_for_invalidate(gt, rb))
- gt_err_ratelimited(gt, "%s TLB invalidation did not complete in %ums!\n",
- engine->name, TLB_INVAL_TIMEOUT_MS);
- }
-
- /*
- * Use delayed put since a) we mostly expect a flurry of TLB
- * invalidations so it is good to avoid paying the forcewake cost and
- * b) it works around a bug in Icelake which cannot cope with too rapid
- * transitions.
- */
- intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+ return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
}
-static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
+static void __intel_gt_bind_context_set_ready(struct intel_gt *gt, bool ready)
{
- u32 cur = intel_gt_tlb_seqno(gt);
+ struct intel_engine_cs *engine = gt->engine[BCS0];
- /* Only skip if a *full* TLB invalidate barrier has passed */
- return (s32)(cur - ALIGN(seqno, 2)) > 0;
+ if (engine && engine->bind_context)
+ engine->bind_context_ready = ready;
}
-void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
+/**
+ * intel_gt_bind_context_set_ready - Set the context binding as ready
+ *
+ * @gt: GT structure
+ *
+ * This function marks the binder context as ready.
+ */
+void intel_gt_bind_context_set_ready(struct intel_gt *gt)
{
- intel_wakeref_t wakeref;
-
- if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
- return;
+ __intel_gt_bind_context_set_ready(gt, true);
+}
- if (intel_gt_is_wedged(gt))
- return;
+/**
+ * intel_gt_bind_context_set_unready - Set the context binding as ready
+ * @gt: GT structure
+ *
+ * This function marks the binder context as not ready.
+ */
- if (tlb_seqno_passed(gt, seqno))
- return;
+void intel_gt_bind_context_set_unready(struct intel_gt *gt)
+{
+ __intel_gt_bind_context_set_ready(gt, false);
+}
- with_intel_gt_pm_if_awake(gt, wakeref) {
- mutex_lock(&gt->tlb.invalidate_lock);
- if (tlb_seqno_passed(gt, seqno))
- goto unlock;
+/**
+ * intel_gt_is_bind_context_ready - Check if context binding is ready
+ *
+ * @gt: GT structure
+ *
+ * This function returns binder context's ready status.
+ */
+bool intel_gt_is_bind_context_ready(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine = gt->engine[BCS0];
- mmio_invalidate_full(gt);
+ if (engine)
+ return engine->bind_context_ready;
- write_seqcount_invalidate(&gt->tlb.seqno);
-unlock:
- mutex_unlock(&gt->tlb.invalidate_lock);
- }
+ return false;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index d2f4fbde5f9f..998ca029b73a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -13,6 +13,69 @@
struct drm_i915_private;
struct drm_printer;
+/*
+ * Check that the GT is a graphics GT and has an IP version within the
+ * specified range (inclusive).
+ */
+#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \
+ BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \
+ BUILD_BUG_ON_ZERO((until) < (from)) + \
+ ((gt)->type != GT_MEDIA && \
+ GRAPHICS_VER_FULL((gt)->i915) >= (from) && \
+ GRAPHICS_VER_FULL((gt)->i915) <= (until)))
+
+/*
+ * Check that the GT is a media GT and has an IP version within the
+ * specified range (inclusive).
+ *
+ * Only usable on platforms with a standalone media design (i.e., IP version 13
+ * and higher).
+ */
+#define IS_MEDIA_GT_IP_RANGE(gt, from, until) ( \
+ BUILD_BUG_ON_ZERO((from) < IP_VER(13, 0)) + \
+ BUILD_BUG_ON_ZERO((until) < (from)) + \
+ ((gt) && (gt)->type == GT_MEDIA && \
+ MEDIA_VER_FULL((gt)->i915) >= (from) && \
+ MEDIA_VER_FULL((gt)->i915) <= (until)))
+
+/*
+ * Check that the GT is a graphics GT with a specific IP version and has
+ * a stepping in the range [from, until). The lower stepping bound is
+ * inclusive, the upper bound is exclusive. The most common use-case of this
+ * macro is for checking bounds for workarounds, which usually have a stepping
+ * ("from") at which the hardware issue is first present and another stepping
+ * ("until") at which a hardware fix is present and the software workaround is
+ * no longer necessary. E.g.,
+ *
+ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)
+ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER)
+ *
+ * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper
+ * stepping bound for the specified IP version.
+ */
+#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \
+ BUILD_BUG_ON_ZERO((until) <= (from)) + \
+ (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \
+ IS_GRAPHICS_STEP((gt)->i915, (from), (until))))
+
+/*
+ * Check that the GT is a media GT with a specific IP version and has
+ * a stepping in the range [from, until). The lower stepping bound is
+ * inclusive, the upper bound is exclusive. The most common use-case of this
+ * macro is for checking bounds for workarounds, which usually have a stepping
+ * ("from") at which the hardware issue is first present and another stepping
+ * ("until") at which a hardware fix is present and the software workaround is
+ * no longer necessary. "STEP_FOREVER" can be passed as "until" for
+ * workarounds that have no upper stepping bound for the specified IP version.
+ *
+ * This macro may only be used to match on platforms that have a standalone
+ * media design (i.e., media version 13 or higher).
+ */
+#define IS_MEDIA_GT_IP_STEP(gt, ipver, from, until) ( \
+ BUILD_BUG_ON_ZERO((until) <= (from)) + \
+ (IS_MEDIA_GT_IP_RANGE((gt), (ipver), (ipver)) && \
+ IS_MEDIA_STEP((gt)->i915, (from), (until))))
+
#define GT_TRACE(gt, fmt, ...) do { \
const struct intel_gt *gt__ __maybe_unused = (gt); \
GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
@@ -24,6 +87,13 @@ static inline bool gt_is_root(struct intel_gt *gt)
return !gt->info.id;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt);
+bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
+
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+ intel_gt_needs_wa_16018031267(engine->gt) && \
+ engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
@@ -49,6 +119,16 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc)
return container_of(gsc, struct intel_gt, gsc);
}
+static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
+{
+ return guc_to_gt(guc)->i915;
+}
+
+static inline struct intel_guc *gt_to_guc(struct intel_gt *gt)
+{
+ return &gt->uc.guc;
+}
+
void intel_gt_common_init_early(struct intel_gt *gt);
int intel_root_gt_init_early(struct drm_i915_private *i915);
int intel_gt_assign_ggtt(struct intel_gt *gt);
@@ -94,7 +174,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
int intel_gt_probe_all(struct drm_i915_private *i915);
int intel_gt_tiles_init(struct drm_i915_private *i915);
-void intel_gt_release_all(struct drm_i915_private *i915);
#define for_each_gt(gt__, i915__, id__) \
for ((id__) = 0; \
@@ -102,21 +181,36 @@ void intel_gt_release_all(struct drm_i915_private *i915);
(id__)++) \
for_each_if(((gt__) = (i915__)->gt[(id__)]))
+/* Simple iterator over all initialised engines */
+#define for_each_engine(engine__, gt__, id__) \
+ for ((id__) = 0; \
+ (id__) < I915_NUM_ENGINES; \
+ (id__)++) \
+ for_each_if ((engine__) = (gt__)->engine[(id__)])
+
+/* Iterator over subset of engines selected by mask */
+#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \
+ for ((tmp__) = (mask__) & (gt__)->info.engine_mask; \
+ (tmp__) ? \
+ ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \
+ 0;)
+
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p);
void intel_gt_watchdog_work(struct work_struct *work);
-static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
-{
- return seqprop_sequence(&gt->tlb.seqno);
-}
+enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
+ struct drm_i915_gem_object *obj,
+ bool always_coherent);
+
+void intel_gt_bind_context_set_ready(struct intel_gt *gt);
+void intel_gt_bind_context_set_unready(struct intel_gt *gt);
+bool intel_gt_is_bind_context_ready(struct intel_gt *gt);
-static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
+static inline void intel_gt_set_wedged_async(struct intel_gt *gt)
{
- return intel_gt_tlb_seqno(gt) | 1;
+ queue_work(system_highpri_wq, &gt->wedge);
}
-void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
-
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index cadfd85785b1..c7befc5c20d0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -7,6 +7,7 @@
#include "gem/i915_gem_object.h"
#include "i915_drv.h"
+#include "i915_list_util.h"
#include "intel_engine_pm.h"
#include "intel_gt_buffer_pool.h"
@@ -88,10 +89,11 @@ static void pool_free_work(struct work_struct *wrk)
{
struct intel_gt_buffer_pool *pool =
container_of(wrk, typeof(*pool), work.work);
+ struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
if (pool_free_older_than(pool, HZ))
- schedule_delayed_work(&pool->work,
- round_jiffies_up_relative(HZ));
+ queue_delayed_work(gt->i915->unordered_wq, &pool->work,
+ round_jiffies_up_relative(HZ));
}
static void pool_retire(struct i915_active *ref)
@@ -99,6 +101,7 @@ static void pool_retire(struct i915_active *ref)
struct intel_gt_buffer_pool_node *node =
container_of(ref, typeof(*node), active);
struct intel_gt_buffer_pool *pool = node->pool;
+ struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
struct list_head *list = bucket_for_size(pool, node->obj->base.size);
unsigned long flags;
@@ -116,8 +119,8 @@ static void pool_retire(struct i915_active *ref)
WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */
spin_unlock_irqrestore(&pool->lock, flags);
- schedule_delayed_work(&pool->work,
- round_jiffies_up_relative(HZ));
+ queue_delayed_work(gt->i915->unordered_wq, &pool->work,
+ round_jiffies_up_relative(HZ));
}
void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index 000000000000..3c62a44e9106
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+ int cslice;
+ u32 mode = 0;
+ int first_ccs = __ffs(CCS_MASK(gt));
+
+ if (!IS_DG2(gt->i915))
+ return 0;
+
+ /* Build the value for the fixed CCS load balancing */
+ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+ if (gt->ccs.cslices & BIT(cslice))
+ /*
+ * If available, assign the cslice
+ * to the first available engine...
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+ else
+ /*
+ * ... otherwise, mark the cslice as
+ * unavailable if no CCS dispatches here
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice,
+ XEHP_CCS_MODE_CSLICE_MASK);
+ }
+
+ return mode;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index 000000000000..55547f2ff426
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
index 7c9be4fd1c8c..c90b35881a26 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -9,6 +9,7 @@
#include "intel_gt_clock_utils.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
+#include "soc/intel_dram.h"
static u32 read_reference_ts_freq(struct intel_uncore *uncore)
{
@@ -34,9 +35,7 @@ static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore,
u32 f24_mhz = 24000000;
u32 f25_mhz = 25000000;
u32 f38_4_mhz = 38400000;
- u32 crystal_clock =
- (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
- GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+ u32 crystal_clock = rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK;
switch (crystal_clock) {
case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
@@ -79,8 +78,7 @@ static u32 gen11_read_clock_frequency(struct intel_uncore *uncore)
* register increments from this frequency (it might
* increment only every few clock cycle).
*/
- freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
- GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+ freq >>= 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0);
}
return freq;
@@ -101,8 +99,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore)
* register increments from this frequency (it might
* increment only every few clock cycle).
*/
- freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >>
- CTC_SHIFT_PARAMETER_SHIFT);
+ freq >>= 3 - REG_FIELD_GET(CTC_SHIFT_PARAMETER_MASK, ctc_reg);
}
return freq;
@@ -151,7 +148,7 @@ static u32 gen4_read_clock_frequency(struct intel_uncore *uncore)
*
* Testing on actual hardware has shown there is no /16.
*/
- return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000;
+ return DIV_ROUND_CLOSEST(intel_fsb_freq(uncore->i915), 4) * 1000;
}
static u32 read_clock_frequency(struct intel_uncore *uncore)
@@ -208,7 +205,7 @@ static u64 div_u64_roundup(u64 nom, u32 den)
u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count)
{
- return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency);
+ return mul_u64_u32_div(count, NSEC_PER_SEC, gt->clock_frequency);
}
u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
@@ -218,7 +215,7 @@ u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns)
{
- return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC);
+ return mul_u64_u32_div(ns, gt->clock_frequency, NSEC_PER_SEC);
}
u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
index 5fc2df01aa0d..bd9abbd6d3d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
@@ -5,6 +5,8 @@
#include <linux/debugfs.h>
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
#include "intel_gt.h"
#include "intel_gt_debugfs.h"
@@ -82,12 +84,15 @@ static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root)
void intel_gt_debugfs_register(struct intel_gt *gt)
{
+ struct dentry *debugfs_root = gt->i915->drm.debugfs_root;
struct dentry *root;
+ char gtname[4];
- if (!gt->i915->drm.primary->debugfs_root)
+ if (!debugfs_root)
return;
- root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root);
+ snprintf(gtname, sizeof(gtname), "gt%u", gt->info.id);
+ root = debugfs_create_dir(gtname, debugfs_root);
if (IS_ERR(root))
return;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_defines.h b/drivers/gpu/drm/i915/gt/intel_gt_defines.h
new file mode 100644
index 000000000000..5017788bac8f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_defines.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_DEFINES__
+#define __INTEL_GT_DEFINES__
+
+#define I915_MAX_GT 2
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
index 8f9b874fdc9c..3aa1d014c14d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
@@ -6,8 +6,8 @@
#include <drm/drm_print.h>
-#include "i915_drv.h" /* for_each_engine! */
#include "intel_engine.h"
+#include "intel_gt.h"
#include "intel_gt_debugfs.h"
#include "intel_gt_engines_debugfs.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 1b25a6039152..75e802e10be2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -7,6 +7,7 @@
#include "i915_drv.h"
#include "i915_irq.h"
+#include "i915_reg.h"
#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_irq.h"
@@ -15,6 +16,7 @@
#include "intel_uncore.h"
#include "intel_rps.h"
#include "pxp/intel_pxp_irq.h"
+#include "uc/intel_gsc_proxy.h"
static void guc_irq_handler(struct intel_guc *guc, u16 iir)
{
@@ -29,7 +31,7 @@ static u32
gen11_gt_engine_identity(struct intel_gt *gt,
const unsigned int bank, const unsigned int bit)
{
- void __iomem * const regs = gt->uncore->regs;
+ void __iomem * const regs = intel_uncore_regs(gt->uncore);
u32 timeout_ts;
u32 ident;
@@ -66,9 +68,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
struct intel_gt *media_gt = gt->i915->media_gt;
if (instance == OTHER_GUC_INSTANCE)
- return guc_irq_handler(&gt->uc.guc, iir);
+ return guc_irq_handler(gt_to_guc(gt), iir);
if (instance == OTHER_MEDIA_GUC_INSTANCE && media_gt)
- return guc_irq_handler(&media_gt->uc.guc, iir);
+ return guc_irq_handler(gt_to_guc(media_gt), iir);
if (instance == OTHER_GTPM_INSTANCE)
return gen11_rps_irq_handler(&gt->rps, iir);
@@ -81,6 +83,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
if (instance == OTHER_GSC_INSTANCE)
return intel_gsc_irq_handler(gt, iir);
+ if (instance == OTHER_GSC_HECI_2_INSTANCE)
+ return intel_gsc_proxy_irq_handler(&gt->uc.gsc, iir);
+
WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
instance, iir);
}
@@ -100,7 +105,10 @@ static struct intel_gt *pick_gt(struct intel_gt *gt, u8 class, u8 instance)
case VIDEO_ENHANCEMENT_CLASS:
return media_gt;
case OTHER_CLASS:
- if (instance == OTHER_GSC_INSTANCE && HAS_ENGINE(media_gt, GSC0))
+ if (instance == OTHER_GSC_HECI_2_INSTANCE)
+ return media_gt;
+ if ((instance == OTHER_GSC_INSTANCE || instance == OTHER_KCR_INSTANCE) &&
+ HAS_ENGINE(media_gt, GSC0))
return media_gt;
fallthrough;
default:
@@ -140,7 +148,7 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity)
static void
gen11_gt_bank_handler(struct intel_gt *gt, const unsigned int bank)
{
- void __iomem * const regs = gt->uncore->regs;
+ void __iomem * const regs = intel_uncore_regs(gt->uncore);
unsigned long intr_dw;
unsigned int bit;
@@ -175,7 +183,7 @@ void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl)
bool gen11_gt_reset_one_iir(struct intel_gt *gt,
const unsigned int bank, const unsigned int bit)
{
- void __iomem * const regs = gt->uncore->regs;
+ void __iomem * const regs = intel_uncore_regs(gt->uncore);
u32 dw;
lockdep_assert_held(gt->irq_lock);
@@ -256,6 +264,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
u32 irqs = GT_RENDER_USER_INTERRUPT;
u32 guc_mask = intel_uc_wants_guc(&gt->uc) ? GUC_INTR_GUC2HOST : 0;
u32 gsc_mask = 0;
+ u32 heci_mask = 0;
u32 dmask;
u32 smask;
@@ -267,10 +276,16 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
dmask = irqs << 16 | irqs;
smask = irqs << 16;
- if (HAS_ENGINE(gt, GSC0))
+ if (HAS_ENGINE(gt, GSC0)) {
+ /*
+ * the heci2 interrupt is enabled via the same register as the
+ * GSC interrupt, but it has its own mask register.
+ */
gsc_mask = irqs;
- else if (HAS_HECI_GSC(gt->i915))
+ heci_mask = GSC_IRQ_INTF(1); /* HECI2 IRQ for SW Proxy*/
+ } else if (HAS_HECI_GSC(gt->i915)) {
gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1);
+ }
BUILD_BUG_ON(irqs & 0xffff0000);
@@ -280,7 +295,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
if (CCS_MASK(gt))
intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask);
if (gsc_mask)
- intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, gsc_mask);
+ intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, gsc_mask | heci_mask);
/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask);
@@ -308,6 +323,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask);
if (gsc_mask)
intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~gsc_mask);
+ if (heci_mask)
+ intel_uncore_write(uncore, GEN12_HECI2_RSVD_INTR_MASK,
+ ~REG_FIELD_PREP(ENGINE1_MASK, heci_mask));
if (guc_mask) {
/* the enable bit is common for both GTs but the masks are separate */
@@ -358,7 +376,7 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
gt->i915->l3_parity.which_slice |= 1 << 0;
- schedule_work(&gt->i915->l3_parity.error_work);
+ queue_work(gt->i915->unordered_wq, &gt->i915->l3_parity.error_work);
}
void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
@@ -386,7 +404,7 @@ void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
{
- void __iomem * const regs = gt->uncore->regs;
+ void __iomem * const regs = intel_uncore_regs(gt->uncore);
u32 iir;
if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
@@ -424,7 +442,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
iir = raw_reg_read(regs, GEN8_GT_IIR(2));
if (likely(iir)) {
gen6_rps_irq_handler(&gt->rps, iir);
- guc_irq_handler(&gt->uc.guc, iir >> 16);
+ guc_irq_handler(gt_to_guc(gt), iir >> 16);
raw_reg_write(regs, GEN8_GT_IIR(2), iir);
}
}
@@ -434,10 +452,10 @@ void gen8_gt_irq_reset(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
- GEN8_IRQ_RESET_NDX(uncore, GT, 0);
- GEN8_IRQ_RESET_NDX(uncore, GT, 1);
- GEN8_IRQ_RESET_NDX(uncore, GT, 2);
- GEN8_IRQ_RESET_NDX(uncore, GT, 3);
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(0));
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(1));
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(2));
+ gen2_irq_reset(uncore, GEN8_GT_IRQ_REGS(3));
}
void gen8_gt_irq_postinstall(struct intel_gt *gt)
@@ -458,14 +476,14 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
gt->pm_ier = 0x0;
gt->pm_imr = ~gt->pm_ier;
- GEN8_IRQ_INIT_NDX(uncore, GT, 0, ~gt_interrupts[0], gt_interrupts[0]);
- GEN8_IRQ_INIT_NDX(uncore, GT, 1, ~gt_interrupts[1], gt_interrupts[1]);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(0), ~gt_interrupts[0], gt_interrupts[0]);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(1), ~gt_interrupts[1], gt_interrupts[1]);
/*
* RPS interrupts will get enabled/disabled on demand when RPS itself
- * is enabled/disabled. Same wil be the case for GuC interrupts.
+ * is enabled/disabled. Same will be the case for GuC interrupts.
*/
- GEN8_IRQ_INIT_NDX(uncore, GT, 2, gt->pm_imr, gt->pm_ier);
- GEN8_IRQ_INIT_NDX(uncore, GT, 3, ~gt_interrupts[3], gt_interrupts[3]);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(2), gt->pm_imr, gt->pm_ier);
+ gen2_irq_init(uncore, GEN8_GT_IRQ_REGS(3), ~gt_interrupts[3], gt_interrupts[3]);
}
static void gen5_gt_update_irq(struct intel_gt *gt,
@@ -496,9 +514,9 @@ void gen5_gt_irq_reset(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
- GEN3_IRQ_RESET(uncore, GT);
+ gen2_irq_reset(uncore, GT_IRQ_REGS);
if (GRAPHICS_VER(gt->i915) >= 6)
- GEN3_IRQ_RESET(uncore, GEN6_PM);
+ gen2_irq_reset(uncore, GEN6_PM_IRQ_REGS);
}
void gen5_gt_irq_postinstall(struct intel_gt *gt)
@@ -520,7 +538,7 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt)
else
gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
- GEN3_IRQ_INIT(uncore, GT, gt->gt_imr, gt_irqs);
+ gen2_irq_init(uncore, GT_IRQ_REGS, gt->gt_imr, gt_irqs);
if (GRAPHICS_VER(gt->i915) >= 6) {
/*
@@ -533,6 +551,6 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt)
}
gt->pm_imr = 0xffffffff;
- GEN3_IRQ_INIT(uncore, GEN6_PM, gt->pm_imr, pm_irqs);
+ gen2_irq_init(uncore, GEN6_PM_IRQ_REGS, gt->pm_imr, pm_irqs);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 169393a7ad88..c3afa321fe30 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -4,7 +4,8 @@
*/
#include "i915_drv.h"
-
+#include "i915_wait_util.h"
+#include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
@@ -35,7 +36,7 @@
* ignored.
*/
-#define HAS_MSLICE_STEERING(dev_priv) (INTEL_INFO(dev_priv)->has_mslice_steering)
+#define HAS_MSLICE_STEERING(i915) (INTEL_INFO(i915)->has_mslice_steering)
static const char * const intel_steering_types[] = {
"L3BANK",
@@ -57,51 +58,18 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = {
* are of a "GAM" subclass that has special rules. Thus we use a separate
* GAM table farther down for those.
*/
-static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
+static const struct intel_mmio_range dg2_mslice_steering_table[] = {
{ 0x00DD00, 0x00DDFF },
{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
{},
};
-static const struct intel_mmio_range xehpsdv_gam_steering_table[] = {
- { 0x004000, 0x004AFF },
- { 0x00C800, 0x00CFFF },
- {},
-};
-
-static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
- { 0x00B000, 0x00B0FF },
- { 0x00D800, 0x00D8FF },
- {},
-};
-
static const struct intel_mmio_range dg2_lncf_steering_table[] = {
{ 0x00B000, 0x00B0FF },
{ 0x00D880, 0x00D8FF },
{},
};
-/*
- * We have several types of MCR registers on PVC where steering to (0,0)
- * will always provide us with a non-terminated value. We'll stick them
- * all in the same table for simplicity.
- */
-static const struct intel_mmio_range pvc_instance0_steering_table[] = {
- { 0x004000, 0x004AFF }, /* HALF-BSLICE */
- { 0x008800, 0x00887F }, /* CC */
- { 0x008A80, 0x008AFF }, /* TILEPSMI */
- { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */
- { 0x00B100, 0x00B3FF }, /* L3BANK */
- { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */
- { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */
- { 0x00DD00, 0x00DDFF }, /* BSLICE */
- { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */
- { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */
- { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */
- { 0x024180, 0x0241FF }, /* HALF-BSLICE */
- {},
-};
-
static const struct intel_mmio_range xelpg_instance0_steering_table[] = {
{ 0x000B00, 0x000BFF }, /* SQIDI */
{ 0x001000, 0x001FFF }, /* SQIDI */
@@ -154,9 +122,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
gt->info.mslice_mask =
intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask,
GEN_DSS_PER_MSLICE);
- gt->info.mslice_mask |=
- (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
- GEN12_MEML3_EN_MASK);
+ gt->info.mslice_mask |= REG_FIELD_GET(GEN12_MEML3_EN_MASK,
+ intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3));
if (!gt->info.mslice_mask) /* should be impossible! */
gt_warn(gt, "mslice mask all zero!\n");
@@ -166,8 +133,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
/* Wa_14016747170 */
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK,
intel_uncore_read(gt->uncore,
MTL_GT_ACTIVITY_FACTOR));
@@ -185,22 +152,16 @@ void intel_gt_mcr_init(struct intel_gt *gt)
gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table;
gt->steering_table[L3BANK] = xelpg_l3bank_steering_table;
gt->steering_table[DSS] = xelpg_dss_steering_table;
- } else if (IS_PONTEVECCHIO(i915)) {
- gt->steering_table[INSTANCE0] = pvc_instance0_steering_table;
} else if (IS_DG2(i915)) {
- gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
+ gt->steering_table[MSLICE] = dg2_mslice_steering_table;
gt->steering_table[LNCF] = dg2_lncf_steering_table;
/*
* No need to hook up the GAM table since it has a dedicated
* steering control register on DG2 and can use implicit
* steering.
*/
- } else if (IS_XEHPSDV(i915)) {
- gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
- gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
- gt->steering_table[GAM] = xehpsdv_gam_steering_table;
} else if (GRAPHICS_VER(i915) >= 11 &&
- GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
+ GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) {
gt->steering_table[L3BANK] = icl_l3bank_steering_table;
gt->info.l3bank_mask =
~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
@@ -278,7 +239,7 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt,
* to remain in multicast mode for reads. There's no real
* downside to this, so we'll just go ahead and do so on all
* platforms; we'll only clear the multicast bit from the mask
- * when exlicitly doing a write operation.
+ * when explicitly doing a write operation.
*/
if (rw_flag == FW_REG_WRITE)
mcr_mask |= GEN11_MCR_MULTICAST;
@@ -364,6 +325,7 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
* function call.
*/
void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
+ __acquires(&gt->mcr_lock)
{
unsigned long __flags;
int err = 0;
@@ -375,9 +337,25 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
* driver threads, but also with hardware/firmware agents. A dedicated
* locking register is used.
*/
- if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+ /*
+ * The steering control and semaphore registers are inside an
+ * "always on" power domain with respect to RC6. However there
+ * are some issues if higher-level platform sleep states are
+ * entering/exiting at the same time these registers are
+ * accessed. Grabbing GT forcewake and holding it over the
+ * entire lock/steer/unlock cycle ensures that those sleep
+ * states have been fully exited before we access these
+ * registers. This wakeref will be released in the unlock
+ * routine.
+ *
+ * Wa_22018931422
+ */
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT);
+
err = wait_for(intel_uncore_read_fw(gt->uncore,
MTL_STEER_SEMAPHORE) == 0x1, 100);
+ }
/*
* Even on platforms with a hardware lock, we'll continue to grab
@@ -410,9 +388,35 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
* Context: Releases gt->mcr_lock
*/
void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags)
+ __releases(&gt->mcr_lock)
{
spin_unlock_irqrestore(&gt->mcr_lock, flags);
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+ intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1);
+
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_GT);
+ }
+}
+
+/**
+ * intel_gt_mcr_lock_sanitize - Sanitize MCR steering lock
+ * @gt: GT structure
+ *
+ * This will be used to sanitize the initial status of the hardware lock
+ * during driver load and resume since there won't be any concurrent access
+ * from other agents at those times, but it's possible that boot firmware
+ * may have left the lock in a bad state.
+ *
+ */
+void intel_gt_mcr_lock_sanitize(struct intel_gt *gt)
+{
+ /*
+ * This gets called at load/resume time, so we shouldn't be
+ * racing with other driver threads grabbing the mcr lock.
+ */
+ lockdep_assert_not_held(&gt->mcr_lock);
+
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1);
}
@@ -559,12 +563,15 @@ static bool reg_needs_read_steering(struct intel_gt *gt,
i915_mcr_reg_t reg,
enum intel_steering_type type)
{
- const u32 offset = i915_mmio_reg_offset(reg);
+ u32 offset = i915_mmio_reg_offset(reg);
const struct intel_mmio_range *entry;
if (likely(!gt->steering_table[type]))
return false;
+ if (IS_GSI_REG(offset))
+ offset += gt->uncore->gsi_offset;
+
for (entry = gt->steering_table[type]; entry->end; entry++) {
if (offset >= entry->start && offset <= entry->end)
return true;
@@ -775,8 +782,6 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
for (int i = 0; i < NUM_STEERING_TYPES; i++)
if (gt->steering_table[i])
report_steering_type(p, gt, i, dump_table);
- } else if (IS_PONTEVECCHIO(gt->i915)) {
- report_steering_type(p, gt, INSTANCE0, dump_table);
} else if (HAS_MSLICE_STEERING(gt->i915)) {
report_steering_type(p, gt, MSLICE, dump_table);
report_steering_type(p, gt, LNCF, dump_table);
@@ -796,10 +801,7 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
unsigned int *group, unsigned int *instance)
{
- if (IS_PONTEVECCHIO(gt->i915)) {
- *group = dss / GEN_DSS_PER_CSLICE;
- *instance = dss % GEN_DSS_PER_CSLICE;
- } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) {
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 55)) {
*group = dss / GEN_DSS_PER_GSLICE;
*instance = dss % GEN_DSS_PER_GSLICE;
} else {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
index 41684495b7da..a67a4c35a4fa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
@@ -11,6 +11,7 @@
void intel_gt_mcr_init(struct intel_gt *gt);
void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags);
void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags);
+void intel_gt_mcr_lock_sanitize(struct intel_gt *gt);
u32 intel_gt_mcr_read(struct intel_gt *gt,
i915_mcr_reg_t reg,
@@ -53,7 +54,7 @@ int intel_gt_mcr_wait_for_reg(struct intel_gt *gt,
* the topology, so we lookup the DSS ID directly in "slice 0."
*/
#define _HAS_SS(ss_, gt_, group_, instance_) ( \
- GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 50) ? \
+ GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 55) ? \
intel_sseu_has_subslice(&(gt_)->info.sseu, 0, ss_) : \
intel_sseu_has_subslice(&(gt_)->info.sseu, group_, instance_))
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index cef3d6f5c34e..c7f59d60fac6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -6,6 +6,8 @@
#include <linux/string_helpers.h>
#include <linux/suspend.h>
+#include "display/intel_display_power.h"
+
#include "i915_drv.h"
#include "i915_irq.h"
#include "i915_params.h"
@@ -13,55 +15,35 @@
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
+#include "intel_gt_mcr.h"
#include "intel_gt_pm.h"
#include "intel_gt_print.h"
#include "intel_gt_requests.h"
#include "intel_llc.h"
-#include "intel_pm.h"
#include "intel_rc6.h"
#include "intel_rps.h"
#include "intel_wakeref.h"
-#include "intel_pcode.h"
#include "pxp/intel_pxp_pm.h"
#define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2)
-static void mtl_media_busy(struct intel_gt *gt)
-{
- /* Wa_14017073508: mtl */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
- gt->type == GT_MEDIA)
- snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
- PCODE_MBOX_GT_STATE_MEDIA_BUSY,
- PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
-}
-
-static void mtl_media_idle(struct intel_gt *gt)
-{
- /* Wa_14017073508: mtl */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
- gt->type == GT_MEDIA)
- snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
- PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
- PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
-}
-
static void user_forcewake(struct intel_gt *gt, bool suspend)
{
int count = atomic_read(&gt->user_wakeref);
+ intel_wakeref_t wakeref;
/* Inside suspend/resume so single threaded, no races to worry about. */
if (likely(!count))
return;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
if (suspend) {
GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
atomic_sub(count, &gt->wakeref.count);
} else {
atomic_add(count, &gt->wakeref.count);
}
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
}
static void runtime_begin(struct intel_gt *gt)
@@ -90,12 +72,10 @@ static int __gt_unpark(struct intel_wakeref *wf)
{
struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
struct drm_i915_private *i915 = gt->i915;
+ struct intel_display *display = i915->display;
GT_TRACE(gt, "\n");
- /* Wa_14017073508: mtl */
- mtl_media_busy(gt);
-
/*
* It seems that the DMC likes to transition between the DC states a lot
* when there are no connected displays (no active power domains) during
@@ -107,12 +87,12 @@ static int __gt_unpark(struct intel_wakeref *wf)
* Work around it by grabbing a GT IRQ power domain whilst there is any
* GT activity, preventing any DC state transitions.
*/
- gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+ gt->awake = intel_display_power_get(display, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!gt->awake);
intel_rc6_unpark(&gt->rc6);
intel_rps_unpark(&gt->rps);
- i915_pmu_gt_unparked(i915);
+ i915_pmu_gt_unparked(gt);
intel_guc_busyness_unpark(gt);
intel_gt_unpark_requests(gt);
@@ -126,6 +106,7 @@ static int __gt_park(struct intel_wakeref *wf)
struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
struct drm_i915_private *i915 = gt->i915;
+ struct intel_display *display = i915->display;
GT_TRACE(gt, "\n");
@@ -134,7 +115,7 @@ static int __gt_park(struct intel_wakeref *wf)
intel_guc_busyness_park(gt);
i915_vma_parked(gt);
- i915_pmu_gt_parked(i915);
+ i915_pmu_gt_parked(gt);
intel_rps_park(&gt->rps);
intel_rc6_park(&gt->rc6);
@@ -143,10 +124,7 @@ static int __gt_park(struct intel_wakeref *wf)
/* Defer dropping the display power well for 100ms, it's slow! */
GEM_BUG_ON(!wakeref);
- intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
-
- /* Wa_14017073508: mtl */
- mtl_media_idle(gt);
+ intel_display_power_put_async(display, POWER_DOMAIN_GT_IRQ, wakeref);
return 0;
}
@@ -165,7 +143,7 @@ void intel_gt_pm_init_early(struct intel_gt *gt)
* runtime_pm is per-device rather than per-tile, so this is still the
* correct structure.
*/
- intel_wakeref_init(&gt->wakeref, &gt->i915->runtime_pm, &wf_ops);
+ intel_wakeref_init(&gt->wakeref, gt->i915, &wf_ops, "GT");
seqcount_mutex_init(&gt->stats.lock, &gt->wakeref.mutex);
}
@@ -182,10 +160,10 @@ void intel_gt_pm_init(struct intel_gt *gt)
static bool reset_engines(struct intel_gt *gt)
{
- if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ if (intel_gt_gpu_reset_clobbers_display(gt))
return false;
- return __intel_gt_reset(gt, ALL_ENGINES) == 0;
+ return intel_gt_reset_all_engines(gt) == 0;
}
static void gt_sanitize(struct intel_gt *gt, bool force)
@@ -194,7 +172,7 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
enum intel_engine_id id;
intel_wakeref_t wakeref;
- GT_TRACE(gt, "force:%s", str_yes_no(force));
+ GT_TRACE(gt, "force:%s\n", str_yes_no(force));
/* Use a raw wakeref to avoid calling intel_display_power_get early */
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
@@ -244,10 +222,26 @@ void intel_gt_pm_fini(struct intel_gt *gt)
intel_rc6_fini(&gt->rc6);
}
+void intel_gt_resume_early(struct intel_gt *gt)
+{
+ /*
+ * Sanitize steer semaphores during driver resume. This is necessary
+ * to address observed cases of steer semaphores being
+ * held after a suspend operation. Confirmation from the hardware team
+ * assures the safety of this operation, as no lock acquisitions
+ * by other agents occur during driver load/resume process.
+ */
+ intel_gt_mcr_lock_sanitize(gt);
+
+ intel_uncore_resume_early(gt->uncore);
+ intel_gt_check_and_clear_faults(gt);
+}
+
int intel_gt_resume(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err;
err = intel_gt_has_unrecoverable_error(gt);
@@ -264,7 +258,7 @@ int intel_gt_resume(struct intel_gt *gt)
*/
gt_sanitize(gt, true);
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
intel_rc6_sanitize(&gt->rc6);
@@ -307,7 +301,8 @@ int intel_gt_resume(struct intel_gt *gt)
out_fw:
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
+ intel_gt_bind_context_set_ready(gt);
return err;
err_wedged:
@@ -334,6 +329,7 @@ static void wait_for_suspend(struct intel_gt *gt)
void intel_gt_suspend_prepare(struct intel_gt *gt)
{
+ intel_gt_bind_context_set_unready(gt);
user_forcewake(gt, true);
wait_for_suspend(gt);
}
@@ -387,6 +383,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
void intel_gt_runtime_suspend(struct intel_gt *gt)
{
+ intel_gt_bind_context_set_unready(gt);
intel_uc_runtime_suspend(&gt->uc);
GT_TRACE(gt, "\n");
@@ -404,6 +401,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
if (ret)
return ret;
+ intel_gt_bind_context_set_ready(gt);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 6c9a46452364..6f25c747bc29 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -16,19 +16,28 @@ static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
return intel_wakeref_is_active(&gt->wakeref);
}
-static inline void intel_gt_pm_get(struct intel_gt *gt)
+static inline void intel_gt_pm_get_untracked(struct intel_gt *gt)
{
intel_wakeref_get(&gt->wakeref);
}
+static inline intel_wakeref_t intel_gt_pm_get(struct intel_gt *gt)
+{
+ intel_gt_pm_get_untracked(gt);
+ return intel_wakeref_track(&gt->wakeref);
+}
+
static inline void __intel_gt_pm_get(struct intel_gt *gt)
{
__intel_wakeref_get(&gt->wakeref);
}
-static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
+static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt)
{
- return intel_wakeref_get_if_active(&gt->wakeref);
+ if (!intel_wakeref_get_if_active(&gt->wakeref))
+ return NULL;
+
+ return intel_wakeref_track(&gt->wakeref);
}
static inline void intel_gt_pm_might_get(struct intel_gt *gt)
@@ -36,12 +45,18 @@ static inline void intel_gt_pm_might_get(struct intel_gt *gt)
intel_wakeref_might_get(&gt->wakeref);
}
-static inline void intel_gt_pm_put(struct intel_gt *gt)
+static inline void intel_gt_pm_put_untracked(struct intel_gt *gt)
{
intel_wakeref_put(&gt->wakeref);
}
-static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+static inline void intel_gt_pm_put(struct intel_gt *gt, intel_wakeref_t handle)
+{
+ intel_wakeref_untrack(&gt->wakeref, handle);
+ intel_gt_pm_put_untracked(gt);
+}
+
+static inline void intel_gt_pm_put_async_untracked(struct intel_gt *gt)
{
intel_wakeref_put_async(&gt->wakeref);
}
@@ -51,9 +66,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
intel_wakeref_might_put(&gt->wakeref);
}
-#define with_intel_gt_pm(gt, tmp) \
- for (tmp = 1, intel_gt_pm_get(gt); tmp; \
- intel_gt_pm_put(gt), tmp = 0)
+static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t handle)
+{
+ intel_wakeref_untrack(&gt->wakeref, handle);
+ intel_gt_pm_put_async_untracked(gt);
+}
+
+#define with_intel_gt_pm(gt, wf) \
+ for ((wf) = intel_gt_pm_get(gt); (wf); intel_gt_pm_put((gt), (wf)), (wf) = NULL)
/**
* with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
@@ -64,7 +84,7 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
* @wf: pointer to a temporary wakeref.
*/
#define with_intel_gt_pm_if_awake(gt, wf) \
- for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+ for ((wf) = intel_gt_pm_get_if_awake(gt); (wf); intel_gt_pm_put_async((gt), (wf)), (wf) = NULL)
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
@@ -78,15 +98,20 @@ void intel_gt_pm_fini(struct intel_gt *gt);
void intel_gt_suspend_prepare(struct intel_gt *gt);
void intel_gt_suspend_late(struct intel_gt *gt);
int intel_gt_resume(struct intel_gt *gt);
+void intel_gt_resume_early(struct intel_gt *gt);
void intel_gt_runtime_suspend(struct intel_gt *gt);
int intel_gt_runtime_resume(struct intel_gt *gt);
ktime_t intel_gt_get_awake_time(const struct intel_gt *gt);
+#define INTEL_WAKEREF_MOCK_GT ERR_PTR(-ENODEV)
+
static inline bool is_mock_gt(const struct intel_gt *gt)
{
- return I915_SELFTEST_ONLY(gt->awake == -ENODEV);
+ BUILD_BUG_ON(INTEL_WAKEREF_DEF == INTEL_WAKEREF_MOCK_GT);
+
+ return I915_SELFTEST_ONLY(gt->awake == INTEL_WAKEREF_MOCK_GT);
}
#endif /* INTEL_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 83df4cd5e06c..96411f357f5d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -7,6 +7,8 @@
#include <linux/seq_file.h>
#include <linux/string_helpers.h>
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
#include "i915_reg.h"
#include "intel_gt.h"
@@ -22,12 +24,12 @@
#include "intel_rps.h"
#include "intel_runtime_pm.h"
#include "intel_uncore.h"
-#include "vlv_sideband.h"
+#include "vlv_iosf_sb.h"
void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
{
atomic_inc(&gt->user_wakeref);
- intel_gt_pm_get(gt);
+ intel_gt_pm_get_untracked(gt);
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_get(gt->uncore);
}
@@ -36,7 +38,7 @@ void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
{
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_put(gt->uncore);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put_untracked(gt);
atomic_dec(&gt->user_wakeref);
}
@@ -71,6 +73,8 @@ static int fw_domains_show(struct seq_file *m, void *data)
struct intel_uncore_forcewake_domain *fw_domain;
unsigned int tmp;
+ spin_lock_irq(&uncore->lock);
+
seq_printf(m, "user.bypass_count = %u\n",
uncore->user_forcewake_count);
@@ -79,6 +83,8 @@ static int fw_domains_show(struct seq_file *m, void *data)
intel_uncore_forcewake_domain_to_str(fw_domain->id),
READ_ONCE(fw_domain->wake_count));
+ spin_unlock_irq(&uncore->lock);
+
return 0;
}
DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
@@ -290,7 +296,6 @@ static int mtl_drpc(struct seq_file *m)
seq_puts(m, "RC6\n");
break;
default:
- MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status));
seq_puts(m, "Unknown\n");
break;
}
@@ -363,12 +368,11 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
drm_printf(p, "SW control enabled: %s\n",
str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
- vlv_punit_get(i915);
- freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
- vlv_punit_put(i915);
+ vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
+ freq_sts = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS);
+ vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
drm_printf(p, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
- drm_printf(p, "DDR freq: %d MHz\n", i915->mem_freq);
drm_printf(p, "actual GPU freq: %d MHz\n",
intel_gpu_freq(rps, (freq_sts >> 8) & 0xff));
@@ -393,10 +397,6 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
drm_puts(p, "no P-state info available\n");
}
- drm_printf(p, "Current CD clock frequency: %d kHz\n", i915->display.cdclk.hw.cdclk);
- drm_printf(p, "Max CD clock frequency: %d kHz\n", i915->display.cdclk.max_cdclk_freq);
- drm_printf(p, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
-
intel_runtime_pm_put(uncore->rpm, wakeref);
}
@@ -433,7 +433,7 @@ static int llc_show(struct seq_file *m, void *data)
max_gpu_freq /= GEN9_FREQ_SCALER;
}
- seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
+ seq_puts(m, "GPU freq (MHz)\tEffective GPU freq (MHz)\tEffective Ring freq (MHz)\n");
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
@@ -539,7 +539,10 @@ static bool rps_eval(void *data)
{
struct intel_gt *gt = data;
- return HAS_RPS(gt->i915);
+ if (intel_guc_slpc_is_used(gt_to_guc(gt)))
+ return false;
+ else
+ return HAS_RPS(gt->i915);
}
DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
@@ -580,7 +583,7 @@ static bool perf_limit_reasons_eval(void *data)
}
DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get,
- perf_limit_reasons_clear, "%llu\n");
+ perf_limit_reasons_clear, "0x%llx\n");
void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_print.h b/drivers/gpu/drm/i915/gt/intel_gt_print.h
index 5d9da355ce24..7fdc78c79273 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_print.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_print.h
@@ -16,6 +16,9 @@
#define gt_warn(_gt, _fmt, ...) \
drm_warn(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+#define gt_warn_once(_gt, _fmt, ...) \
+ drm_warn_once(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
#define gt_notice(_gt, _fmt, ...) \
drm_notice(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
@@ -28,6 +31,9 @@
#define gt_err_ratelimited(_gt, _fmt, ...) \
drm_err_ratelimited(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+#define gt_notice_ratelimited(_gt, _fmt, ...) \
+ dev_notice_ratelimited((_gt)->i915->drm.dev, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
#define gt_probe_error(_gt, _fmt, ...) \
do { \
if (i915_error_injected()) \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index be0f6e305c88..7421ed18d8d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -7,9 +7,8 @@
#define __INTEL_GT_REGS__
#include "i915_reg_defs.h"
-#include "display/intel_display_reg_defs.h" /* VLV_DISPLAY_BASE */
-#define MCR_REG(offset) ((const i915_mcr_reg_t){ .reg = (offset) })
+#define VLV_GUNIT_BASE 0x180000
/*
* The perf control registers are technically multicast registers, but the
@@ -27,22 +26,19 @@
#define MTL_CAGF_MASK REG_GENMASK(8, 0)
#define MTL_CC0 0x0
#define MTL_CC6 0x3
-#define MTL_CC_MASK REG_GENMASK(12, 9)
+#define MTL_CC_MASK REG_GENMASK(10, 9)
/* RPM unit config (Gen8+) */
#define RPM_CONFIG0 _MMIO(0xd00)
-#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3
-#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
-#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0
-#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2
-#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3
-#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1
-#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 0)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 1)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 2)
+#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ REG_FIELD_PREP(GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 3)
+#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_BIT(3)
+#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ REG_FIELD_PREP(GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 0)
+#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ REG_FIELD_PREP(GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, 1)
+#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1)
#define RPM_CONFIG1 _MMIO(0xd04)
#define GEN10_GT_NOA_ENABLE (1 << 9)
@@ -165,6 +161,8 @@
#define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20d4)
#define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
#define GEN11_ENABLE_32_PLANE_MODE (1 << 7)
+#define GEN12_CS_DEBUG_MODE2 _MMIO(0x20d8)
+#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6)
#define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0)
#define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1 << 14)
@@ -325,6 +323,12 @@
_RING_FAULT_REG_VCS, \
_RING_FAULT_REG_VECS, \
_RING_FAULT_REG_BCS))
+#define RING_FAULT_VADDR_MASK REG_GENMASK(31, 12) /* pre-bdw */
+#define RING_FAULT_ENGINE_ID_MASK REG_GENMASK(16, 12) /* bdw+ */
+#define RING_FAULT_GTTSEL_MASK REG_BIT(11) /* pre-bdw */
+#define RING_FAULT_SRCID_MASK REG_GENMASK(10, 3)
+#define RING_FAULT_FAULT_TYPE_MASK REG_GENMASK(2, 1) /* ivb+ */
+#define RING_FAULT_VALID REG_BIT(0)
#define ERROR_GEN6 _MMIO(0x40a0)
@@ -333,9 +337,11 @@
#define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4)
#define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4)
#define BSD_HWS_PGA_GEN7 _MMIO(0x4180)
-#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208)
-#define GEN12_VD0_AUX_NV _MMIO(0x4218)
-#define GEN12_VD1_AUX_NV _MMIO(0x4228)
+
+#define GEN12_CCS_AUX_INV _MMIO(0x4208)
+#define GEN12_VD0_AUX_INV _MMIO(0x4218)
+#define GEN12_VE0_AUX_INV _MMIO(0x4238)
+#define GEN12_BCS0_AUX_INV _MMIO(0x4248)
#define GEN8_RTCR _MMIO(0x4260)
#define GEN8_M1TCR _MMIO(0x4264)
@@ -343,14 +349,12 @@
#define GEN8_BTCR _MMIO(0x426c)
#define GEN8_VTCR _MMIO(0x4270)
-#define GEN12_VD2_AUX_NV _MMIO(0x4298)
-#define GEN12_VD3_AUX_NV _MMIO(0x42a8)
-#define GEN12_VE0_AUX_NV _MMIO(0x4238)
-
#define BLT_HWS_PGA_GEN7 _MMIO(0x4280)
-#define GEN12_VE1_AUX_NV _MMIO(0x42b8)
+#define GEN12_VD2_AUX_INV _MMIO(0x4298)
+#define GEN12_CCS0_AUX_INV _MMIO(0x42c8)
#define AUX_INV REG_BIT(0)
+
#define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380)
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
@@ -358,7 +362,11 @@
#define GEN7_TLB_RD_ADDR _MMIO(0x4700)
#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
-#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4)
+#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
+ 0x4800, 0x4804, \
+ 0x4848, 0x484c)
+#define XEHP_PAT_INDEX(index) MCR_REG(_PAT_INDEX(index))
+#define XELPMP_PAT_INDEX(index) _MMIO(_PAT_INDEX(index))
#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900)
#define XEHP_TILE_LMEM_RANGE_SHIFT 8
@@ -380,6 +388,8 @@
#define GEN8_FAULT_TLB_DATA0 _MMIO(0x4b10)
#define GEN8_FAULT_TLB_DATA1 _MMIO(0x4b14)
+#define FAULT_GTT_SEL REG_BIT(4)
+#define FAULT_VA_HIGH_BITS REG_GENMASK(3, 0)
#define GEN11_GACB_PERF_CTRL _MMIO(0x4b80)
#define GEN11_HASH_CTRL_MASK (0x3 << 12 | 0xf << 0)
@@ -404,14 +414,14 @@
#define GEN7_SO_PRIM_STORAGE_NEEDED(n) _MMIO(0x5240 + (n) * 8)
#define GEN7_SO_PRIM_STORAGE_NEEDED_UDW(n) _MMIO(0x5240 + (n) * 8 + 4)
+#define GEN8_WM_CHICKEN2 MCR_REG(0x5584)
+#define WAIT_ON_DEPTH_STALL_DONE_DISABLE REG_BIT(5)
+
#define GEN9_WM_CHICKEN3 _MMIO(0x5588)
#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9)
#define XEHP_CULLBIT1 MCR_REG(0x6100)
-#define CHICKEN_RASTER_1 MCR_REG(0x6204)
-#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8)
-
#define CHICKEN_RASTER_2 MCR_REG(0x6208)
#define TBIMR_FAST_CLIP REG_BIT(5)
@@ -430,6 +440,7 @@
#define XEHPG_INSTDONE_GEOM_SVG MCR_REG(0x666c)
#define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */
+#define DISABLE_REPACKING_FOR_COMPRESSION REG_BIT(15) /* jsl+ */
#define RC_OP_FLUSH_ENABLE (1 << 0)
#define HIZ_RAW_STALL_OPT_DISABLE (1 << 2)
#define CACHE_MODE_1 _MMIO(0x7004) /* IVB+ */
@@ -467,6 +478,9 @@
#define XEHP_PSS_MODE2 MCR_REG(0x703c)
#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5)
+#define XEHP_PSS_CHICKEN MCR_REG(0x7044)
+#define FD_END_COLLECT REG_BIT(5)
+
#define GEN7_SC_INSTDONE _MMIO(0x7100)
#define GEN12_SC_INSTDONE_EXTRA _MMIO(0x7104)
#define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108)
@@ -480,6 +494,9 @@
#define HDC_FORCE_NON_COHERENT (1 << 4)
#define HDC_BARRIER_PERFORMANCE_DISABLE (1 << 10)
+#define COMMON_SLICE_CHICKEN4 _MMIO(0x7300)
+#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
+
#define GEN8_HDC_CHICKEN1 _MMIO(0x7304)
#define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
@@ -495,11 +512,12 @@
#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
#define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice) * 0x4)
+#define GEN9_PGCTL_SS_ACK(subslice) REG_BIT(2 + (subslice) * 2)
+#define GEN9_PGCTL_SLICE_ACK REG_BIT(0)
+
#define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \
((slice) % 3) * 0x4)
-#define GEN9_PGCTL_SLICE_ACK (1 << 0)
-#define GEN9_PGCTL_SS_ACK(subslice) (1 << (2 + (subslice) * 2))
-#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? 0x7F : 0x1F)
+#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? REG_GENMASK(6, 0) : REG_GENMASK(4, 0))
#define GEN9_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + (slice) * 0x8)
#define GEN10_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + ((slice) / 3) * 0x30 + \
@@ -507,14 +525,14 @@
#define GEN9_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + (slice) * 0x8)
#define GEN10_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + ((slice) / 3) * 0x30 + \
((slice) % 3) * 0x8)
-#define GEN9_PGCTL_SSA_EU08_ACK (1 << 0)
-#define GEN9_PGCTL_SSA_EU19_ACK (1 << 2)
-#define GEN9_PGCTL_SSA_EU210_ACK (1 << 4)
-#define GEN9_PGCTL_SSA_EU311_ACK (1 << 6)
-#define GEN9_PGCTL_SSB_EU08_ACK (1 << 8)
-#define GEN9_PGCTL_SSB_EU19_ACK (1 << 10)
-#define GEN9_PGCTL_SSB_EU210_ACK (1 << 12)
-#define GEN9_PGCTL_SSB_EU311_ACK (1 << 14)
+#define GEN9_PGCTL_SSB_EU311_ACK REG_BIT(14)
+#define GEN9_PGCTL_SSB_EU210_ACK REG_BIT(12)
+#define GEN9_PGCTL_SSB_EU19_ACK REG_BIT(10)
+#define GEN9_PGCTL_SSB_EU08_ACK REG_BIT(8)
+#define GEN9_PGCTL_SSA_EU311_ACK REG_BIT(6)
+#define GEN9_PGCTL_SSA_EU210_ACK REG_BIT(4)
+#define GEN9_PGCTL_SSA_EU19_ACK REG_BIT(2)
+#define GEN9_PGCTL_SSA_EU08_ACK REG_BIT(0)
#define VF_PREEMPTION _MMIO(0x83a4)
#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
@@ -524,9 +542,17 @@
#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
+#define GEN12_SQCNT1 _MMIO(0x8718)
+#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
+#define GEN12_SQCNT1_OABPC REG_BIT(29)
+#define GEN12_STRICT_RAR_ENABLE REG_BIT(23)
+
#define XEHP_SQCM MCR_REG(0x8724)
#define EN_32B_ACCESS REG_BIT(30)
+#define MTL_GSCPSMI_BASEADDR_LSB _MMIO(0x880c)
+#define MTL_GSCPSMI_BASEADDR_MSB _MMIO(0x8810)
+
#define HSW_IDICR _MMIO(0x9008)
#define IDIHASHMSK(x) (((x) & 0x3f) << 16)
@@ -563,7 +589,7 @@
#define GEN10_L3BANK_MASK 0x0F
/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
#define GEN12_MAX_MSLICES 4
-#define GEN12_MEML3_EN_MASK 0x0F
+#define GEN12_MEML3_EN_MASK REG_GENMASK(3, 0)
#define HSW_PAVP_FUSE1 _MMIO(0x911c)
#define XEHP_SFC_ENABLE_MASK REG_GENMASK(27, 24)
@@ -573,37 +599,30 @@
#define HSW_F1_EU_DIS_6EUS 2
#define GEN8_FUSE2 _MMIO(0x9120)
-#define GEN8_F2_SS_DIS_SHIFT 21
-#define GEN8_F2_SS_DIS_MASK (0x7 << GEN8_F2_SS_DIS_SHIFT)
-#define GEN8_F2_S_ENA_SHIFT 25
-#define GEN8_F2_S_ENA_MASK (0x7 << GEN8_F2_S_ENA_SHIFT)
-#define GEN9_F2_SS_DIS_SHIFT 20
-#define GEN9_F2_SS_DIS_MASK (0xf << GEN9_F2_SS_DIS_SHIFT)
-#define GEN10_F2_S_ENA_SHIFT 22
-#define GEN10_F2_S_ENA_MASK (0x3f << GEN10_F2_S_ENA_SHIFT)
-#define GEN10_F2_SS_DIS_SHIFT 18
-#define GEN10_F2_SS_DIS_MASK (0xf << GEN10_F2_SS_DIS_SHIFT)
+#define GEN10_F2_S_ENA_MASK REG_GENMASK(27, 22)
+#define GEN10_F2_SS_DIS_MASK REG_GENMASK(21, 18)
+#define GEN8_F2_S_ENA_MASK REG_GENMASK(27, 25)
+#define GEN9_F2_SS_DIS_MASK REG_GENMASK(23, 20)
+#define GEN8_F2_SS_DIS_MASK REG_GENMASK(23, 21)
#define GEN8_EU_DISABLE0 _MMIO(0x9134)
#define GEN9_EU_DISABLE(slice) _MMIO(0x9134 + (slice) * 0x4)
#define GEN11_EU_DISABLE _MMIO(0x9134)
-#define GEN8_EU_DIS0_S0_MASK 0xffffff
-#define GEN8_EU_DIS0_S1_SHIFT 24
-#define GEN8_EU_DIS0_S1_MASK (0xff << GEN8_EU_DIS0_S1_SHIFT)
-#define GEN11_EU_DIS_MASK 0xFF
+#define GEN8_EU_DIS0_S1_MASK REG_GENMASK(31, 24)
+#define GEN8_EU_DIS0_S0_MASK REG_GENMASK(23, 0)
+#define GEN11_EU_DIS_MASK REG_GENMASK(7, 0)
#define XEHP_EU_ENABLE _MMIO(0x9134)
-#define XEHP_EU_ENA_MASK 0xFF
+#define XEHP_EU_ENA_MASK REG_GENMASK(7, 0)
#define GEN8_EU_DISABLE1 _MMIO(0x9138)
-#define GEN8_EU_DIS1_S1_MASK 0xffff
-#define GEN8_EU_DIS1_S2_SHIFT 16
-#define GEN8_EU_DIS1_S2_MASK (0xffff << GEN8_EU_DIS1_S2_SHIFT)
+#define GEN8_EU_DIS1_S2_MASK REG_GENMASK(31, 16)
+#define GEN8_EU_DIS1_S1_MASK REG_GENMASK(15, 0)
#define GEN11_GT_SLICE_ENABLE _MMIO(0x9138)
-#define GEN11_GT_S_ENA_MASK 0xFF
+#define GEN11_GT_S_ENA_MASK REG_GENMASK(7, 0)
#define GEN8_EU_DISABLE2 _MMIO(0x913c)
-#define GEN8_EU_DIS2_S2_MASK 0xff
+#define GEN8_EU_DIS2_S2_MASK REG_GENMASK(7, 0)
#define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913c)
#define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913c)
@@ -611,9 +630,8 @@
#define GEN10_EU_DISABLE3 _MMIO(0x9140)
#define GEN10_EU_DIS_SS_MASK 0xff
#define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140)
-#define GEN11_GT_VDBOX_DISABLE_MASK 0xff
-#define GEN11_GT_VEBOX_DISABLE_SHIFT 16
-#define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT)
+#define GEN11_GT_VEBOX_DISABLE_MASK REG_GENMASK(19, 16)
+#define GEN11_GT_VDBOX_DISABLE_MASK REG_GENMASK(7, 0)
#define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144)
#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT _MMIO(0x9148)
@@ -702,44 +720,11 @@
#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434)
#define VFUNIT_CLKGATE_DIS REG_BIT(20)
-#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */
#define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */
#define GAMEDIA_CLKGATE_DIS REG_BIT(11)
#define HSUNIT_CLKGATE_DIS REG_BIT(8)
#define VSUNIT_CLKGATE_DIS REG_BIT(3)
-#define UNSLCGCTL9440 _MMIO(0x9440)
-#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28)
-#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27)
-#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26)
-#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24)
-#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23)
-#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22)
-#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21)
-#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17)
-#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16)
-#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15)
-#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14)
-#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6)
-
-#define UNSLCGCTL9444 _MMIO(0x9444)
-#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30)
-#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29)
-#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28)
-#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27)
-#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26)
-#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25)
-#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24)
-#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23)
-#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22)
-#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21)
-#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20)
-#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19)
-#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18)
-#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17)
-#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
-#define LTCDD_CLKGATE_DIS REG_BIT(10)
-
#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4)
#define SARBUNIT_CLKGATE_DIS (1 << 5)
@@ -749,9 +734,6 @@
#define L3_CLKGATE_DIS REG_BIT(16)
#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
-#define SCCGCTL94DC MCR_REG(0x94dc)
-#define CG3DDISURB REG_BIT(14)
-
#define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4)
#define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19)
#define PSDUNIT_CLKGATE_DIS REG_BIT(5)
@@ -769,9 +751,6 @@
#define GEN10_DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550)
#define DFR_DISABLE (1 << 9)
-#define INF_UNIT_LEVEL_CLKGATE MCR_REG(0x9560)
-#define CGPSF_CLKGATE_DIS (1 << 3)
-
#define MICRO_BP0_0 _MMIO(0x9800)
#define MICRO_BP0_2 _MMIO(0x9804)
#define MICRO_BP0_1 _MMIO(0x9808)
@@ -900,11 +879,10 @@
/* GPM unit config (Gen9+) */
#define CTC_MODE _MMIO(0xa26c)
-#define CTC_SOURCE_PARAMETER_MASK 1
-#define CTC_SOURCE_CRYSTAL_CLOCK 0
-#define CTC_SOURCE_DIVIDE_LOGIC 1
-#define CTC_SHIFT_PARAMETER_SHIFT 1
-#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT)
+#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1)
+#define CTC_SOURCE_PARAMETER_MASK REG_BIT(0)
+#define CTC_SOURCE_CRYSTAL_CLOCK REG_FIELD_PREP(CTC_SOURCE_PARAMETER_MASK, 0)
+#define CTC_SOURCE_DIVIDE_LOGIC REG_FIELD_PREP(CTC_SOURCE_PARAMETER_MASK, 1)
/* GPM MSG_IDLE */
#define MSG_IDLE_CS _MMIO(0x8000)
@@ -948,12 +926,12 @@
#define CHV_POWER_SS0_SIG1 _MMIO(0xa720)
#define CHV_POWER_SS0_SIG2 _MMIO(0xa724)
#define CHV_POWER_SS1_SIG1 _MMIO(0xa728)
-#define CHV_SS_PG_ENABLE (1 << 1)
-#define CHV_EU08_PG_ENABLE (1 << 9)
-#define CHV_EU19_PG_ENABLE (1 << 17)
-#define CHV_EU210_PG_ENABLE (1 << 25)
+#define CHV_EU210_PG_ENABLE REG_BIT(25)
+#define CHV_EU19_PG_ENABLE REG_BIT(17)
+#define CHV_EU08_PG_ENABLE REG_BIT(9)
+#define CHV_SS_PG_ENABLE REG_BIT(1)
#define CHV_POWER_SS1_SIG2 _MMIO(0xa72c)
-#define CHV_EU311_PG_ENABLE (1 << 1)
+#define CHV_EU311_PG_ENABLE REG_BIT(1)
#define GEN7_SARCHKMD _MMIO(0xb000)
#define GEN7_DISABLE_DEMAND_PREFETCH (1 << 31)
@@ -976,10 +954,6 @@
#define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C
#define GEN7_L3AGDIS (1 << 19)
-#define XEHPC_LNCFMISCCFGREG0 MCR_REG(0xb01c)
-#define XEHPC_HOSTCACHEEN REG_BIT(1)
-#define XEHPC_OVRLSCCC REG_BIT(0)
-
#define GEN7_L3CNTLREG2 _MMIO(0xb020)
/* MOCS (Memory Object Control State) registers */
@@ -1033,20 +1007,9 @@
#define XEHP_L3SQCREG5 MCR_REG(0xb158)
#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
-#define MLTICTXCTL MCR_REG(0xb170)
-#define TDONRENDER REG_BIT(2)
-
#define XEHP_L3SCQREG7 MCR_REG(0xb188)
#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
-#define XEHPC_L3SCRUB MCR_REG(0xb18c)
-#define SCRUB_CL_DWNGRADE_SHARED REG_BIT(12)
-#define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0)
-#define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6)
-
-#define L3SQCREG1_CCS0 MCR_REG(0xb200)
-#define FLUSHALLNONCOH REG_BIT(5)
-
#define GEN11_GLBLINVL _MMIO(0xb404)
#define GEN11_BANK_HASH_ADDR_EXCL_MASK (0x7f << 5)
#define GEN11_BANK_HASH_ADDR_EXCL_BIT0 (1 << 5)
@@ -1072,16 +1035,12 @@
#define XEHP_FAULT_TLB_DATA0 MCR_REG(0xceb8)
#define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc)
#define XEHP_FAULT_TLB_DATA1 MCR_REG(0xcebc)
-#define FAULT_VA_HIGH_BITS (0xf << 0)
-#define FAULT_GTT_SEL (1 << 4)
+/* see GEN8_FAULT_TLB_DATA0/1 */
#define GEN12_RING_FAULT_REG _MMIO(0xcec4)
#define XEHP_RING_FAULT_REG MCR_REG(0xcec4)
-#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7)
-#define RING_FAULT_GTTSEL_MASK (1 << 11)
-#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff)
-#define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3)
-#define RING_FAULT_VALID (1 << 0)
+#define XELPMP_RING_FAULT_REG _MMIO(0xcec4)
+/* see GEN8_RING_FAULT_REG */
#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
#define XEHP_GFX_TLB_INV_CR MCR_REG(0xced8)
@@ -1093,8 +1052,8 @@
#define XEHP_BLT_TLB_INV_CR MCR_REG(0xcee4)
#define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04)
#define XEHP_COMPCTX_TLB_INV_CR MCR_REG(0xcf04)
+#define XELPMP_GSC_TLB_INV_CR _MMIO(0xcf04) /* media GT only */
-#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28)
#define RENDER_MOD_CTRL MCR_REG(0xcf2c)
#define COMP_MOD_CTRL MCR_REG(0xcf30)
#define XELPMP_GSC_MOD_CTRL _MMIO(0xcf30) /* media GT only */
@@ -1145,6 +1104,8 @@
#define ENABLE_SMALLPL REG_BIT(15)
#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
#define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5)
+#define MTL_DISABLE_SAMPLER_SC_OOO REG_BIT(3)
+#define GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0)
#define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194)
#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15)
@@ -1156,13 +1117,18 @@
#define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10)
#define DISABLE_ECC REG_BIT(5)
#define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4)
+/*
+ * We have both ENABLE and DISABLE defines below using the same bit because the
+ * meaning depends on the target platform. There are no platform prefix for them
+ * because different steppings of DG2 pick one or the other semantics.
+ */
#define ENABLE_PREFETCH_INTO_IC REG_BIT(3)
+#define DISABLE_PREFETCH_INTO_IC REG_BIT(3)
#define EU_PERF_CNTL0 PERF_REG(0xe458)
#define EU_PERF_CNTL4 PERF_REG(0xe45c)
#define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c)
-#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13)
#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11)
#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
#define GEN11_DIS_PICK_2ND_EU REG_BIT(7)
@@ -1171,13 +1137,14 @@
#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
#define HSW_ROW_CHICKEN3 _MMIO(0xe49c)
+#define GEN9_ROW_CHICKEN3 MCR_REG(0xe49c)
#define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6)
+#define MTL_DISABLE_FIX_FOR_EOT_FLUSH REG_BIT(9)
#define GEN8_ROW_CHICKEN MCR_REG(0xe4f0)
#define FLOW_CONTROL_ENABLE REG_BIT(15)
#define UGM_BACKUP_MODE REG_BIT(13)
#define MDQ_ARBITRATION_MODE REG_BIT(12)
-#define SYSTOLIC_DOP_CLOCK_GATING_DIS REG_BIT(10)
#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8)
#define STALL_DOP_GATING_DISABLE REG_BIT(5)
#define THROTTLE_12_5 REG_GENMASK(4, 2)
@@ -1190,6 +1157,7 @@
#define GEN12_DISABLE_EARLY_READ REG_BIT(14)
#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12)
#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8)
+#define XELPG_DISABLE_TDL_SVHS_GATING REG_BIT(1)
#define GEN12_DISABLE_DOP_GATING REG_BIT(0)
#define RT_CTRL MCR_REG(0xe530)
@@ -1202,6 +1170,8 @@
#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0)
#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11)
+#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3)
+
#define ICL_HDC_MODE MCR_REG(0xe5f4)
#define EU_PERF_CNTL2 PERF_REG(0xe658)
@@ -1212,6 +1182,7 @@
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4)
+#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32)
#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
@@ -1449,19 +1420,21 @@
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
#define GEN12_RCU_MODE _MMIO(0x14800)
+#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
-#define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168)
-#define CHV_FGT_DISABLE_SS0 (1 << 10)
-#define CHV_FGT_DISABLE_SS1 (1 << 11)
-#define CHV_FGT_EU_DIS_SS0_R0_SHIFT 16
-#define CHV_FGT_EU_DIS_SS0_R0_MASK (0xf << CHV_FGT_EU_DIS_SS0_R0_SHIFT)
-#define CHV_FGT_EU_DIS_SS0_R1_SHIFT 20
-#define CHV_FGT_EU_DIS_SS0_R1_MASK (0xf << CHV_FGT_EU_DIS_SS0_R1_SHIFT)
-#define CHV_FGT_EU_DIS_SS1_R0_SHIFT 24
-#define CHV_FGT_EU_DIS_SS1_R0_MASK (0xf << CHV_FGT_EU_DIS_SS1_R0_SHIFT)
-#define CHV_FGT_EU_DIS_SS1_R1_SHIFT 28
-#define CHV_FGT_EU_DIS_SS1_R1_MASK (0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT)
+#define XEHP_CCS_MODE _MMIO(0x14804)
+#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
+#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
+#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
+
+#define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168)
+#define CHV_FGT_EU_DIS_SS1_R1_MASK REG_GENMASK(31, 28)
+#define CHV_FGT_EU_DIS_SS1_R0_MASK REG_GENMASK(27, 24)
+#define CHV_FGT_EU_DIS_SS0_R1_MASK REG_GENMASK(23, 20)
+#define CHV_FGT_EU_DIS_SS0_R0_MASK REG_GENMASK(19, 16)
+#define CHV_FGT_DISABLE_SS1 REG_BIT(11)
+#define CHV_FGT_DISABLE_SS0 REG_BIT(10)
#define BCS_SWCTRL _MMIO(0x22200)
#define BCS_SRC_Y REG_BIT(0)
@@ -1491,6 +1464,10 @@
GEN6_PM_RP_DOWN_THRESHOLD | \
GEN6_PM_RP_DOWN_TIMEOUT)
+#define GEN6_PM_IRQ_REGS I915_IRQ_REGS(GEN6_PMIMR, \
+ GEN6_PMIER, \
+ GEN6_PMIIR)
+
#define GEN7_GT_SCRATCH(i) _MMIO(0x4f100 + (i) * 4)
#define GEN7_GT_SCRATCH_REG_NUM 8
@@ -1572,12 +1549,15 @@
#define VLV_RENDER_C0_COUNT _MMIO(0x138118)
#define VLV_MEDIA_C0_COUNT _MMIO(0x13811c)
+#define PCU_PWM_FAN_SPEED _MMIO(0x138140)
+
#define GEN12_RPSTAT1 _MMIO(0x1381b4)
#define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0)
#define GEN12_CAGF_MASK REG_GENMASK(19, 11)
#define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4))
#define GEN11_CSME (31)
+#define GEN12_HECI_2 (30)
#define GEN11_GUNIT (28)
#define GEN11_GUC (25)
#define MTL_MGUC (24)
@@ -1619,6 +1599,7 @@
/* irq instances for OTHER_CLASS */
#define OTHER_GUC_INSTANCE 0
#define OTHER_GTPM_INSTANCE 1
+#define OTHER_GSC_HECI_2_INSTANCE 3
#define OTHER_KCR_INSTANCE 4
#define OTHER_GSC_INSTANCE 6
#define OTHER_MEDIA_GUC_INSTANCE 16
@@ -1634,6 +1615,7 @@
#define GEN12_VCS6_VCS7_INTR_MASK _MMIO(0x1900b4)
#define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0)
#define GEN12_VECS2_VECS3_INTR_MASK _MMIO(0x1900d4)
+#define GEN12_HECI2_RSVD_INTR_MASK _MMIO(0x1900e4)
#define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8)
#define MTL_GUC_MGUC_INTR_MASK _MMIO(0x1900e8) /* MTL+ */
#define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec)
@@ -1648,11 +1630,6 @@
#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000)
-#define GT0_PACKAGE_ENERGY_STATUS _MMIO(0x250004)
-#define GT0_PACKAGE_RAPL_LIMIT _MMIO(0x250008)
-#define GT0_PACKAGE_POWER_SKU_UNIT _MMIO(0x250068)
-#define GT0_PLATFORM_ENERGY_STATUS _MMIO(0x25006c)
-
/*
* Standalone Media's non-engine GT registers are located at their regular GT
* offsets plus 0x380000. This extra offset is stored inside the intel_uncore
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 1dfd01668c79..93298820bee2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -116,7 +116,7 @@ void intel_engine_add_retire(struct intel_engine_cs *engine,
GEM_BUG_ON(intel_engine_is_virtual(engine));
if (add_retire(engine, tl))
- schedule_work(&engine->retire_work);
+ queue_work(engine->i915->unordered_wq, &engine->retire_work);
}
void intel_engine_init_retire(struct intel_engine_cs *engine)
@@ -207,8 +207,8 @@ static void retire_work_handler(struct work_struct *work)
struct intel_gt *gt =
container_of(work, typeof(*gt), requests.retire_work.work);
- schedule_delayed_work(&gt->requests.retire_work,
- round_jiffies_up_relative(HZ));
+ queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
+ round_jiffies_up_relative(HZ));
intel_gt_retire_requests(gt);
}
@@ -224,8 +224,8 @@ void intel_gt_park_requests(struct intel_gt *gt)
void intel_gt_unpark_requests(struct intel_gt *gt)
{
- schedule_delayed_work(&gt->requests.retire_work,
- round_jiffies_up_relative(HZ));
+ queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
+ round_jiffies_up_relative(HZ));
}
void intel_gt_fini_requests(struct intel_gt *gt)
@@ -250,11 +250,17 @@ void intel_gt_watchdog_work(struct work_struct *work)
llist_for_each_entry_safe(rq, rn, first, watchdog.link) {
if (!i915_request_completed(rq)) {
struct dma_fence *f = &rq->fence;
+ const char __rcu *timeline;
+ const char __rcu *driver;
+ rcu_read_lock();
+ driver = dma_fence_driver_name(f);
+ timeline = dma_fence_timeline_name(f);
pr_notice("Fence expiration time out i915-%s:%s:%llx!\n",
- f->ops->get_driver_name(f),
- f->ops->get_timeline_name(f),
+ rcu_dereference(driver),
+ rcu_dereference(timeline),
f->seqno);
+ rcu_read_unlock();
i915_request_cancel(rq, -EINTR);
}
i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index 6629e4c72b6b..33cba406b569 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -72,7 +72,7 @@ static void kobj_gt_release(struct kobject *kobj)
{
}
-static struct kobj_type kobj_gt_type = {
+static const struct kobj_type kobj_gt_type = {
.release = kobj_gt_release,
.sysfs_ops = &kobj_sysfs_ops,
.default_groups = id_groups,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 28f27091cd3b..1154cd2b7c34 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -176,27 +176,13 @@ static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id)
return DIV_ROUND_CLOSEST_ULL(res, 1000);
}
-static u8 get_rc6_mask(struct intel_gt *gt)
-{
- u8 mask = 0;
-
- if (HAS_RC6(gt->i915))
- mask |= BIT(0);
- if (HAS_RC6p(gt->i915))
- mask |= BIT(1);
- if (HAS_RC6pp(gt->i915))
- mask |= BIT(2);
-
- return mask;
-}
-
static ssize_t rc6_enable_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
- return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
+ return sysfs_emit(buff, "%x\n", gt->rc6.enabled);
}
static ssize_t rc6_enable_dev_show(struct device *dev,
@@ -205,7 +191,7 @@ static ssize_t rc6_enable_dev_show(struct device *dev,
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name);
- return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
+ return sysfs_emit(buff, "%x\n", gt->rc6.enabled);
}
static u32 __rc6_residency_ms_show(struct intel_gt *gt)
@@ -451,6 +437,72 @@ static ssize_t punit_req_freq_mhz_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", preq);
}
+static ssize_t slpc_ignore_eff_freq_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buff)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
+
+ return sysfs_emit(buff, "%u\n", slpc->ignore_eff_freq);
+}
+
+static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buff, size_t count)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
+ int err;
+ u32 val;
+
+ err = kstrtou32(buff, 0, &val);
+ if (err)
+ return err;
+
+ err = intel_guc_slpc_set_ignore_eff_freq(slpc, val);
+ return err ?: count;
+}
+
+static ssize_t slpc_power_profile_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buff)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+
+ switch (slpc->power_profile) {
+ case SLPC_POWER_PROFILES_BASE:
+ return sysfs_emit(buff, "[%s] %s\n", "base", "power_saving");
+ case SLPC_POWER_PROFILES_POWER_SAVING:
+ return sysfs_emit(buff, "%s [%s]\n", "base", "power_saving");
+ }
+
+ return sysfs_emit(buff, "%u\n", slpc->power_profile);
+}
+
+static ssize_t slpc_power_profile_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buff, size_t count)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ char power_saving[] = "power_saving";
+ char base[] = "base";
+ int err;
+ u32 val;
+
+ if (!strncmp(buff, power_saving, sizeof(power_saving) - 1))
+ val = SLPC_POWER_PROFILES_POWER_SAVING;
+ else if (!strncmp(buff, base, sizeof(base) - 1))
+ val = SLPC_POWER_PROFILES_BASE;
+ else
+ return -EINVAL;
+
+ err = intel_guc_slpc_set_power_profile(slpc, val);
+ return err ?: count;
+}
+
struct intel_gt_bool_throttle_attr {
struct attribute attr;
ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
@@ -560,7 +612,6 @@ static ssize_t media_freq_factor_show(struct kobject *kobj,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
intel_wakeref_t wakeref;
u32 mode;
@@ -568,20 +619,12 @@ static ssize_t media_freq_factor_show(struct kobject *kobj,
* Retrieve media_ratio_mode from GEN6_RPNSWREQ bit 13 set by
* GuC. GEN6_RPNSWREQ:13 value 0 represents 1:2 and 1 represents 1:1
*/
- if (IS_XEHPSDV(gt->i915) &&
- slpc->media_ratio_mode == SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL) {
- /*
- * For XEHPSDV dynamic mode GEN6_RPNSWREQ:13 does not contain
- * the media_ratio_mode, just return the cached media ratio
- */
- mode = slpc->media_ratio_mode;
- } else {
- with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- mode = intel_uncore_read(gt->uncore, GEN6_RPNSWREQ);
- mode = REG_FIELD_GET(GEN12_MEDIA_FREQ_RATIO, mode) ?
- SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE :
- SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO;
- }
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ mode = intel_uncore_read(gt->uncore, GEN6_RPNSWREQ);
+
+ mode = REG_FIELD_GET(GEN12_MEDIA_FREQ_RATIO, mode) ?
+ SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE :
+ SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO;
return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode));
}
@@ -591,7 +634,7 @@ static ssize_t media_freq_factor_store(struct kobject *kobj,
const char *buff, size_t count)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
u32 factor, mode;
int err;
@@ -663,6 +706,9 @@ static struct kobj_attribute attr_media_freq_factor_scale =
INTEL_GT_ATTR_RO(media_RP0_freq_mhz);
INTEL_GT_ATTR_RO(media_RPn_freq_mhz);
+INTEL_GT_ATTR_RW(slpc_ignore_eff_freq);
+INTEL_GT_ATTR_RW(slpc_power_profile);
+
static const struct attribute *media_perf_power_attrs[] = {
&attr_media_freq_factor.attr,
&attr_media_freq_factor_scale.attr,
@@ -672,6 +718,80 @@ static const struct attribute *media_perf_power_attrs[] = {
};
static ssize_t
+rps_up_threshold_pct_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_rps *rps = &gt->rps;
+
+ return sysfs_emit(buf, "%u\n", intel_rps_get_up_threshold(rps));
+}
+
+static ssize_t
+rps_up_threshold_pct_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_rps *rps = &gt->rps;
+ int ret;
+ u8 val;
+
+ ret = kstrtou8(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ ret = intel_rps_set_up_threshold(rps, val);
+
+ return ret == 0 ? count : ret;
+}
+
+static struct kobj_attribute rps_up_threshold_pct =
+ __ATTR(rps_up_threshold_pct,
+ 0664,
+ rps_up_threshold_pct_show,
+ rps_up_threshold_pct_store);
+
+static ssize_t
+rps_down_threshold_pct_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_rps *rps = &gt->rps;
+
+ return sysfs_emit(buf, "%u\n", intel_rps_get_down_threshold(rps));
+}
+
+static ssize_t
+rps_down_threshold_pct_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_rps *rps = &gt->rps;
+ int ret;
+ u8 val;
+
+ ret = kstrtou8(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ ret = intel_rps_set_down_threshold(rps, val);
+
+ return ret == 0 ? count : ret;
+}
+
+static struct kobj_attribute rps_down_threshold_pct =
+ __ATTR(rps_down_threshold_pct,
+ 0664,
+ rps_down_threshold_pct_show,
+ rps_down_threshold_pct_store);
+
+static const struct attribute * const gen6_gt_rps_attrs[] = {
+ &rps_up_threshold_pct.attr,
+ &rps_down_threshold_pct.attr,
+ NULL
+};
+
+static ssize_t
default_min_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_gt *gt = kobj_to_gt(kobj->parent);
@@ -693,9 +813,37 @@ default_max_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, cha
static struct kobj_attribute default_max_freq_mhz =
__ATTR(rps_max_freq_mhz, 0444, default_max_freq_mhz_show, NULL);
+static ssize_t
+default_rps_up_threshold_pct_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_gt *gt = kobj_to_gt(kobj->parent);
+
+ return sysfs_emit(buf, "%u\n", gt->defaults.rps_up_threshold);
+}
+
+static struct kobj_attribute default_rps_up_threshold_pct =
+__ATTR(rps_up_threshold_pct, 0444, default_rps_up_threshold_pct_show, NULL);
+
+static ssize_t
+default_rps_down_threshold_pct_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_gt *gt = kobj_to_gt(kobj->parent);
+
+ return sysfs_emit(buf, "%u\n", gt->defaults.rps_down_threshold);
+}
+
+static struct kobj_attribute default_rps_down_threshold_pct =
+__ATTR(rps_down_threshold_pct, 0444, default_rps_down_threshold_pct_show, NULL);
+
static const struct attribute * const rps_defaults_attrs[] = {
&default_min_freq_mhz.attr,
&default_max_freq_mhz.attr,
+ &default_rps_up_threshold_pct.attr,
+ &default_rps_down_threshold_pct.attr,
NULL
};
@@ -723,6 +871,12 @@ static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj)
if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
ret = sysfs_create_file(kobj, vlv_attr);
+ if (is_object_gt(kobj) && !intel_uc_uses_guc_slpc(&gt->uc)) {
+ ret = sysfs_create_files(kobj, gen6_gt_rps_attrs);
+ if (ret)
+ return ret;
+ }
+
return ret;
}
@@ -744,6 +898,19 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
if (ret)
gt_warn(gt, "failed to create punit_req_freq_mhz sysfs (%pe)", ERR_PTR(ret));
+ if (intel_uc_uses_guc_slpc(&gt->uc)) {
+ ret = sysfs_create_file(kobj, &attr_slpc_ignore_eff_freq.attr);
+ if (ret)
+ gt_warn(gt, "failed to create ignore_eff_freq sysfs (%pe)", ERR_PTR(ret));
+ }
+
+ if (intel_uc_uses_guc_slpc(&gt->uc)) {
+ ret = sysfs_create_file(kobj, &attr_slpc_power_profile.attr);
+ if (ret)
+ gt_warn(gt, "failed to create slpc_power_profile sysfs (%pe)",
+ ERR_PTR(ret));
+ }
+
if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) {
ret = sysfs_create_files(kobj, throttle_reason_attrs);
if (ret)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index f08c2556aa25..bcee084b1f27 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -83,6 +83,9 @@ enum intel_submission_method {
struct gt_defaults {
u32 min_freq;
u32 max_freq;
+
+ u8 rps_up_threshold;
+ u8 rps_down_threshold;
};
enum intel_gt_type {
@@ -204,6 +207,14 @@ struct intel_gt {
[MAX_ENGINE_INSTANCE + 1];
enum intel_submission_method submission_method;
+ struct {
+ /*
+ * Mask of the non fused CCS slices
+ * to be used for the load balancing
+ */
+ intel_engine_mask_t cslices;
+ } ccs;
+
/*
* Default address space (either GGTT or ppGTT depending on arch).
*
@@ -281,6 +292,8 @@ struct intel_gt {
struct gt_defaults defaults;
struct kobject *sysfs_defaults;
+ struct work_struct wedge;
+
struct i915_perf_gt perf;
/** link: &ggtt.gt_list */
@@ -306,4 +319,6 @@ enum intel_gt_scratch_field {
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
};
+#define intel_gt_support_legacy_fencing(gt) ((gt)->ggtt->num_fences > 0)
+
#endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 4f436ba7a3c8..afbc5c769308 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -21,6 +21,12 @@
#include "intel_gt_regs.h"
#include "intel_gtt.h"
+bool i915_ggtt_require_binder(struct drm_i915_private *i915)
+{
+ /* Wa_13010847436 & Wa_14019519902 */
+ return !i915_direct_stolen_access(i915) &&
+ MEDIA_VER_FULL(i915) == IP_VER(13, 0);
+}
static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
{
@@ -58,6 +64,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
if (!IS_ERR(obj)) {
obj->base.resv = i915_vm_resv_get(vm);
obj->shares_resv_from = vm;
+
+ if (vm->fpriv)
+ i915_drm_client_add_object(vm->fpriv->client, obj);
}
return obj;
@@ -79,6 +88,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
if (!IS_ERR(obj)) {
obj->base.resv = i915_vm_resv_get(vm);
obj->shares_resv_from = vm;
+
+ if (vm->fpriv)
+ i915_drm_client_add_object(vm->fpriv->client, obj);
}
return obj;
@@ -89,7 +101,17 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
enum i915_map_type type;
void *vaddr;
- type = i915_coherent_map_type(vm->i915, obj, true);
+ type = intel_gt_coherent_map_type(vm->gt, obj, true);
+ /*
+ * FIXME: It is suspected that some Address Translation Service (ATS)
+ * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+ * Applying a write barrier to the ppgtt set entry functions appeared
+ * to have no effect, so we must temporarily use I915_MAP_WC here on
+ * MTL until a proper ATS solution is found.
+ */
+ if (IS_METEORLAKE(vm->i915))
+ type = I915_MAP_WC;
+
vaddr = i915_gem_object_pin_map_unlocked(obj, type);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -103,7 +125,17 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
enum i915_map_type type;
void *vaddr;
- type = i915_coherent_map_type(vm->i915, obj, true);
+ type = intel_gt_coherent_map_type(vm->gt, obj, true);
+ /*
+ * FIXME: It is suspected that some Address Translation Service (ATS)
+ * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+ * Applying a write barrier to the ppgtt set entry functions appeared
+ * to have no effect, so we must temporarily use I915_MAP_WC here on
+ * MTL until a proper ATS solution is found.
+ */
+ if (IS_METEORLAKE(vm->i915))
+ type = I915_MAP_WC;
+
vaddr = i915_gem_object_pin_map(obj, type);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -144,7 +176,6 @@ static void clear_vm_list(struct list_head *list)
i915_vma_destroy_locked(vma);
i915_gem_object_put(obj);
}
-
}
}
@@ -468,6 +499,44 @@ void gtt_write_workarounds(struct intel_gt *gt)
}
}
+static void xelpmp_setup_private_ppat(struct intel_uncore *uncore)
+{
+ intel_uncore_write(uncore, XELPMP_PAT_INDEX(0),
+ MTL_PPAT_L4_0_WB);
+ intel_uncore_write(uncore, XELPMP_PAT_INDEX(1),
+ MTL_PPAT_L4_1_WT);
+ intel_uncore_write(uncore, XELPMP_PAT_INDEX(2),
+ MTL_PPAT_L4_3_UC);
+ intel_uncore_write(uncore, XELPMP_PAT_INDEX(3),
+ MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+ intel_uncore_write(uncore, XELPMP_PAT_INDEX(4),
+ MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+ /*
+ * Remaining PAT entries are left at the hardware-default
+ * fully-cached setting
+ */
+}
+
+static void xelpg_setup_private_ppat(struct intel_gt *gt)
+{
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0),
+ MTL_PPAT_L4_0_WB);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1),
+ MTL_PPAT_L4_1_WT);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2),
+ MTL_PPAT_L4_3_UC);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3),
+ MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4),
+ MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+ /*
+ * Remaining PAT entries are left at the hardware-default
+ * fully-cached setting
+ */
+}
+
static void tgl_setup_private_ppat(struct intel_uncore *uncore)
{
/* TGL doesn't support LLC or AGE settings */
@@ -603,7 +672,14 @@ void setup_private_pat(struct intel_gt *gt)
GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ if (gt->type == GT_MEDIA) {
+ xelpmp_setup_private_ppat(gt->uncore);
+ return;
+ }
+
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ xelpg_setup_private_ppat(gt);
+ else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
xehp_setup_private_ppat(gt);
else if (GRAPHICS_VER(i915) >= 12)
tgl_setup_private_ppat(uncore);
@@ -625,7 +701,7 @@ __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size)
if (IS_ERR(obj))
return ERR_CAST(obj);
- i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 5a775310d3fc..9d3a3ad567a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -35,9 +35,9 @@
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
-#define DBG(...) trace_printk(__VA_ARGS__)
+#define GTT_TRACE(...) trace_printk(__VA_ARGS__)
#else
-#define DBG(...)
+#define GTT_TRACE(...)
#endif
#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */
@@ -88,9 +88,17 @@ typedef u64 gen8_pte_t;
#define BYT_PTE_SNOOPED_BY_CPU_CACHES REG_BIT(2)
#define BYT_PTE_WRITEABLE REG_BIT(1)
+#define MTL_PPGTT_PTE_PAT3 BIT_ULL(62)
#define GEN12_PPGTT_PTE_LM BIT_ULL(11)
+#define GEN12_PPGTT_PTE_PAT2 BIT_ULL(7)
+#define GEN12_PPGTT_PTE_PAT1 BIT_ULL(4)
+#define GEN12_PPGTT_PTE_PAT0 BIT_ULL(3)
-#define GEN12_GGTT_PTE_LM BIT_ULL(1)
+#define GEN12_GGTT_PTE_LM BIT_ULL(1)
+#define MTL_GGTT_PTE_PAT0 BIT_ULL(52)
+#define MTL_GGTT_PTE_PAT1 BIT_ULL(53)
+#define GEN12_GGTT_PTE_ADDR_MASK GENMASK_ULL(45, 12)
+#define MTL_GGTT_PTE_PAT_MASK GENMASK_ULL(53, 52)
#define GEN12_PDE_64K BIT(6)
#define GEN12_PTE_PS64 BIT(8)
@@ -147,7 +155,13 @@ typedef u64 gen8_pte_t;
#define GEN8_PDE_IPS_64K BIT(11)
#define GEN8_PDE_PS_2M BIT(7)
-enum i915_cache_level;
+#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0)
+#define MTL_PPAT_L4_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_L4_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_L4_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
struct drm_i915_gem_object;
struct i915_fence_reg;
@@ -157,6 +171,9 @@ struct intel_gt;
#define for_each_sgt_daddr(__dp, __iter, __sgt) \
__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
+#define for_each_sgt_daddr_next(__dp, __iter) \
+ __for_each_daddr_next(__dp, __iter, I915_GTT_PAGE_SIZE)
+
struct i915_page_table {
struct drm_i915_gem_object *base;
union {
@@ -216,7 +233,7 @@ struct i915_vma_ops {
void (*bind_vma)(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags);
/*
* Unmap an object from an address space. This usually consists of
@@ -232,8 +249,13 @@ struct i915_address_space {
struct work_struct release_work;
struct drm_mm mm;
+ struct {
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ } rsvd;
struct intel_gt *gt;
struct drm_i915_private *i915;
+ struct drm_i915_file_private *fpriv;
struct device *dma;
u64 total; /* size addr space maps (ex. 2GB for ggtt) */
u64 reserved; /* size addr space reserved */
@@ -288,8 +310,9 @@ struct i915_address_space {
(*alloc_scratch_dma)(struct i915_address_space *vm, int sz);
u64 (*pte_encode)(dma_addr_t addr,
- enum i915_cache_level level,
+ unsigned int pat_index,
u32 flags); /* Create a valid PTE */
+ dma_addr_t (*pte_decode)(u64 pte, bool *is_present, bool *is_local);
#define PTE_READ_ONLY BIT(0)
#define PTE_LM BIT(1)
@@ -298,24 +321,28 @@ struct i915_address_space {
u64 start, u64 length);
void (*clear_range)(struct i915_address_space *vm,
u64 start, u64 length);
+ void (*scratch_range)(struct i915_address_space *vm,
+ u64 start, u64 length);
void (*insert_page)(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags);
void (*insert_entries)(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags);
void (*raw_insert_page)(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags);
void (*raw_insert_entries)(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags);
+ dma_addr_t (*read_entry)(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local);
void (*cleanup)(struct i915_address_space *vm);
void (*foreach)(struct i915_address_space *vm,
@@ -491,7 +518,7 @@ static inline void i915_vm_put(struct i915_address_space *vm)
/**
* i915_vm_resv_put - Release a reference on the vm's reservation lock
- * @resv: Pointer to a reservation lock obtained from i915_vm_resv_get()
+ * @vm: The vm whose reservation lock reference we want to release
*/
static inline void i915_vm_resv_put(struct i915_address_space *vm)
{
@@ -561,11 +588,14 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
void intel_ggtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags);
void intel_ggtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma_resource *vma_res);
+dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm,
+ u64 offset, bool *is_present, bool *is_local);
+
int i915_ggtt_probe_hw(struct drm_i915_private *i915);
int i915_ggtt_init_hw(struct drm_i915_private *i915);
int i915_ggtt_enable_hw(struct drm_i915_private *i915);
@@ -584,8 +614,8 @@ int i915_ppgtt_init_hw(struct intel_gt *gt);
struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
unsigned long lmem_pt_obj_flags);
-void i915_ggtt_suspend_vm(struct i915_address_space *vm);
-bool i915_ggtt_resume_vm(struct i915_address_space *vm);
+void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all);
+bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
@@ -639,7 +669,7 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags);
void ppgtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma_resource *vma_res);
@@ -672,4 +702,6 @@ static inline struct sgt_dma {
return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) };
}
+bool i915_ggtt_require_binder(struct drm_i915_private *i915);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 81a96c52a92b..e8927ad49142 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3,6 +3,8 @@
* Copyright © 2014 Intel Corporation
*/
+#include <drm/drm_print.h>
+
#include "gem/i915_gem_lmem.h"
#include "gen8_engine_cs.h"
@@ -546,47 +548,6 @@ static const u8 gen12_rcs_offsets[] = {
END
};
-static const u8 xehp_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- END
-};
-
static const u8 dg2_rcs_offsets[] = {
NOP(1),
LRI(15, POSTED),
@@ -695,8 +656,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
return mtl_rcs_offsets;
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_rcs_offsets;
- else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
- return xehp_rcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_rcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 11)
@@ -719,7 +678,7 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return 0x70;
else if (GRAPHICS_VER(engine->i915) >= 12)
return 0x60;
@@ -733,7 +692,7 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return 0x80;
else if (GRAPHICS_VER(engine->i915) >= 12)
return 0x70;
@@ -748,7 +707,7 @@ static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return 0x84;
else if (GRAPHICS_VER(engine->i915) >= 12)
return 0x74;
@@ -794,8 +753,7 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
{
-
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
/*
* Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
* simply to match the RCS context image layout.
@@ -829,6 +787,18 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
}
static void
+lrc_setup_bb_per_ctx(u32 *regs,
+ const struct intel_engine_cs *engine,
+ u32 ctx_bb_ggtt_addr)
+{
+ GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+ regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
+ ctx_bb_ggtt_addr |
+ PER_CTX_BB_FORCE |
+ PER_CTX_BB_VALID;
+}
+
+static void
lrc_setup_indirect_ctx(u32 *regs,
const struct intel_engine_cs *engine,
u32 ctx_bb_ggtt_addr,
@@ -845,6 +815,29 @@ lrc_setup_indirect_ctx(u32 *regs,
lrc_ring_indirect_offset_default(engine) << 6;
}
+static bool ctx_needs_runalone(const struct intel_context *ce)
+{
+ struct i915_gem_context *gem_ctx;
+ bool ctx_is_protected = false;
+
+ /*
+ * Wa_14019159160 - Case 2.
+ * On some platforms, protected contexts require setting
+ * the LRC run-alone bit or else the encryption/decryption will not happen.
+ * NOTE: Case 2 only applies to PXP use-case of said workaround.
+ */
+ if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) &&
+ (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) {
+ rcu_read_lock();
+ gem_ctx = rcu_dereference(ce->gem_context);
+ if (gem_ctx)
+ ctx_is_protected = gem_ctx->uses_protected_content;
+ rcu_read_unlock();
+ }
+
+ return ctx_is_protected;
+}
+
static void init_common_regs(u32 * const regs,
const struct intel_context *ce,
const struct intel_engine_cs *engine,
@@ -860,6 +853,9 @@ static void init_common_regs(u32 * const regs,
if (GRAPHICS_VER(engine->i915) < 11)
ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
+ /* Wa_14019159160 - Case 2.*/
+ if (ctx_needs_runalone(ce))
+ ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE);
regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
@@ -997,7 +993,13 @@ static u32 context_wa_bb_offset(const struct intel_context *ce)
return PAGE_SIZE * ce->wa_bb_page;
}
-static u32 *context_indirect_bb(const struct intel_context *ce)
+/*
+ * per_ctx below determines which WABB section is used.
+ * When true, the function returns the location of the
+ * PER_CTX_BB. When false, the function returns the
+ * location of the INDIRECT_CTX.
+ */
+static u32 *context_wabb(const struct intel_context *ce, bool per_ctx)
{
void *ptr;
@@ -1006,6 +1008,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce)
ptr = ce->lrc_reg_state;
ptr -= LRC_STATE_OFFSET; /* back to start of context image */
ptr += context_wa_bb_offset(ce);
+ ptr += per_ctx ? PAGE_SIZE : 0;
return ptr;
}
@@ -1018,9 +1021,8 @@ void lrc_init_state(struct intel_context *ce,
set_redzone(state, engine);
- if (engine->default_state) {
- shmem_read(engine->default_state, 0,
- state, engine->context_size);
+ if (ce->default_state) {
+ shmem_read(ce->default_state, 0, state, engine->context_size);
__set_bit(CONTEXT_VALID_BIT, &ce->flags);
inhibit = false;
}
@@ -1082,7 +1084,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
if (GRAPHICS_VER(engine->i915) >= 12) {
ce->wa_bb_page = context_size / PAGE_SIZE;
- context_size += PAGE_SIZE;
+ /* INDIRECT_CTX and PER_CTX_BB need separate pages. */
+ context_size += PAGE_SIZE * 2;
}
if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
@@ -1092,10 +1095,19 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
obj = i915_gem_object_create_lmem(engine->i915, context_size,
I915_BO_ALLOC_PM_VOLATILE);
- if (IS_ERR(obj))
+ if (IS_ERR(obj)) {
obj = i915_gem_object_create_shmem(engine->i915, context_size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ /*
+ * Wa_22016122933: For Media version 13.0, all Media GT shared
+ * memory needs to be mapped as WC on CPU side and UC (PAT
+ * index 2) on GPU side.
+ */
+ if (intel_gt_needs_wa_22016122933(engine->gt))
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+ }
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
@@ -1122,6 +1134,9 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
GEM_BUG_ON(ce->state);
+ if (!intel_context_has_own_state(ce))
+ ce->default_state = engine->default_state;
+
vma = __lrc_alloc_state(ce, engine);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -1184,9 +1199,9 @@ lrc_pre_pin(struct intel_context *ce,
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
*vaddr = i915_gem_object_pin_map(ce->state->obj,
- i915_coherent_map_type(ce->engine->i915,
- ce->state->obj,
- false) |
+ intel_gt_coherent_map_type(ce->engine->gt,
+ ce->state->obj,
+ false) |
I915_MAP_OVERRIDE);
return PTR_ERR_OR_ZERO(*vaddr);
@@ -1308,29 +1323,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
}
/*
- * On DG2 during context restore of a preempted context in GPGPU mode,
- * RCS restore hang is detected. This is extremely timing dependent.
- * To address this below sw wabb is implemented for DG2 A steppings.
- */
-static u32 *
-dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
-{
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base));
- *cs++ = 0x21;
-
- *cs++ = MI_LOAD_REGISTER_REG;
- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1);
-
- *cs++ = MI_LOAD_REGISTER_REG;
- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2);
-
- return cs;
-}
-
-/*
* The bspec's tuning guide asks us to program a vertical watermark value of
* 0x3FF. However this register is not saved/restored properly by the
* hardware, so we're required to apply the desired value via INDIRECT_CTX
@@ -1348,29 +1340,35 @@ dg2_emit_draw_watermark_setting(u32 *cs)
}
static u32 *
+gen12_invalidate_state_cache(u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
+ *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
+ return cs;
+}
+
+static u32 *
gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
{
cs = gen12_emit_timestamp_wa(ce, cs);
cs = gen12_emit_cmd_buf_wa(ce, cs);
cs = gen12_emit_restore_scratch(ce, cs);
- /* Wa_22011450934:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
- cs = dg2_emit_rcs_hang_wabb(ce, cs);
-
/* Wa_16013000631:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(ce->engine->i915))
+ if (IS_DG2_G11(ce->engine->i915))
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
- /* hsdes: 1809175790 */
- if (!HAS_FLAT_CCS(ce->engine->i915))
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
+ cs = gen12_emit_aux_table_inv(ce->engine, cs);
+
+ /* Wa_18022495364 */
+ if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10)))
+ cs = gen12_invalidate_state_cache(cs);
/* Wa_16014892111 */
- if (IS_DG2(ce->engine->i915))
+ if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(ce->engine->i915))
cs = dg2_emit_draw_watermark_setting(cs);
return cs;
@@ -1383,32 +1381,94 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
cs = gen12_emit_restore_scratch(ce, cs);
/* Wa_16013000631:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(ce->engine->i915))
+ if (IS_DG2_G11(ce->engine->i915))
if (ce->engine->class == COMPUTE_CLASS)
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
0);
- /* hsdes: 1809175790 */
- if (!HAS_FLAT_CCS(ce->engine->i915)) {
- if (ce->engine->class == VIDEO_DECODE_CLASS)
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VD0_AUX_NV);
- else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VE0_AUX_NV);
- }
+ return gen12_emit_aux_table_inv(ce->engine, cs);
+}
+
+static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs)
+{
+ struct intel_gt *gt = ce->engine->gt;
+ int mocs = gt->mocs.uc_index << 1;
+
+ /**
+ * Wa_16018031267 / Wa_16018063123 requires that SW forces the
+ * main copy engine arbitration into round robin mode. We
+ * additionally need to submit the following WABB blt command
+ * to produce 4 subblits with each subblit generating 0 byte
+ * write requests as WABB:
+ *
+ * XY_FASTCOLOR_BLT
+ * BG0 -> 5100000E
+ * BG1 -> 0000003F (Dest pitch)
+ * BG2 -> 00000000 (X1, Y1) = (0, 0)
+ * BG3 -> 00040001 (X2, Y2) = (1, 4)
+ * BG4 -> scratch
+ * BG5 -> scratch
+ * BG6-12 -> 00000000
+ * BG13 -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
+ * BG14 -> 00000010 (Qpitch = 4)
+ * BG15 -> 00000000
+ */
+ *cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2);
+ *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f;
+ *cs++ = 0;
+ *cs++ = 4 << 16 | 1;
+ *cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+ *cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0x20004004;
+ *cs++ = 0x10;
+ *cs++ = 0;
+
+ return cs;
+}
+
+static u32 *
+xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs)
+{
+ /* Wa_16018031267, Wa_16018063123 */
+ if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
+ cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
return cs;
}
static void
+setup_per_ctx_bb(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ u32 *(*emit)(const struct intel_context *, u32 *))
+{
+ /* Place PER_CTX_BB on next page after INDIRECT_CTX */
+ u32 * const start = context_wabb(ce, true);
+ u32 *cs;
+
+ cs = emit(ce, start);
+
+ /* PER_CTX_BB must manually terminate */
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+ lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine,
+ lrc_indirect_bb(ce) + PAGE_SIZE);
+}
+
+static void
setup_indirect_ctx_bb(const struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 *(*emit)(const struct intel_context *, u32 *))
{
- u32 * const start = context_indirect_bb(ce);
+ u32 * const start = context_wabb(ce, false);
u32 *cs;
cs = emit(ce, start);
@@ -1507,6 +1567,7 @@ u32 lrc_update_regs(const struct intel_context *ce,
/* Mutually exclusive wrt to global indirect bb */
GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
setup_indirect_ctx_bb(ce, engine, fn);
+ setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb);
}
return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 3f638f198796..aff5aca591e6 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -35,9 +35,9 @@ static bool engine_supports_migration(struct intel_engine_cs *engine)
return true;
}
-static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
- struct i915_page_table *pt,
- void *data)
+static void xehp_toggle_pdes(struct i915_address_space *vm,
+ struct i915_page_table *pt,
+ void *data)
{
struct insert_pte_data *d = data;
@@ -45,14 +45,16 @@ static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
* Insert a dummy PTE into every PT that will map to LMEM to ensure
* we have a correctly setup PDE structure for later use.
*/
- vm->insert_page(vm, 0, d->offset, I915_CACHE_NONE, PTE_LM);
+ vm->insert_page(vm, 0, d->offset,
+ i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
+ PTE_LM);
GEM_BUG_ON(!pt->is_compact);
d->offset += SZ_2M;
}
-static void xehpsdv_insert_pte(struct i915_address_space *vm,
- struct i915_page_table *pt,
- void *data)
+static void xehp_insert_pte(struct i915_address_space *vm,
+ struct i915_page_table *pt,
+ void *data)
{
struct insert_pte_data *d = data;
@@ -63,7 +65,9 @@ static void xehpsdv_insert_pte(struct i915_address_space *vm,
* alignment is 64K underneath for the pt, and we are careful
* not to access the space in the void.
*/
- vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, PTE_LM);
+ vm->insert_page(vm, px_dma(pt), d->offset,
+ i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
+ PTE_LM);
d->offset += SZ_64K;
}
@@ -73,7 +77,8 @@ static void insert_pte(struct i915_address_space *vm,
{
struct insert_pte_data *d = data;
- vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE,
+ vm->insert_page(vm, px_dma(pt), d->offset,
+ i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0);
d->offset += PAGE_SIZE;
}
@@ -115,7 +120,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
* 512 entry layout using 4K GTT pages. The other two windows just map
* lmem pages and must use the new compact 32 entry layout using 64K GTT
* pages, which ensures we can address any lmem object that the user
- * throws at us. We then also use the xehpsdv_toggle_pdes as a way of
+ * throws at us. We then also use the xehp_toggle_pdes as a way of
* just toggling the PDE bit(GEN12_PDE_64K) for us, to enable the
* compact layout for each of these page-tables, that fall within the
* [CHUNK_SIZE, 3 * CHUNK_SIZE) range.
@@ -204,12 +209,12 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
/* Now allow the GPU to rewrite the PTE via its own ppGTT */
if (HAS_64K_PAGES(gt->i915)) {
vm->vm.foreach(&vm->vm, base, d.offset - base,
- xehpsdv_insert_pte, &d);
+ xehp_insert_pte, &d);
d.offset = base + CHUNK_SZ;
vm->vm.foreach(&vm->vm,
d.offset,
2 * CHUNK_SZ,
- xehpsdv_toggle_pdes, &d);
+ xehp_toggle_pdes, &d);
} else {
vm->vm.foreach(&vm->vm, base, d.offset - base,
insert_pte, &d);
@@ -299,7 +304,7 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m)
struct intel_context *ce;
/*
- * We randomly distribute contexts across the engines upon constrction,
+ * We randomly distribute contexts across the engines upon construction,
* as they all share the same pinned vm, and so in order to allow
* multiple blits to run in parallel, we must construct each blit
* to use a different range of the vm for its GTT. This has to be
@@ -356,13 +361,13 @@ static int max_pte_pkt_size(struct i915_request *rq, int pkt)
static int emit_pte(struct i915_request *rq,
struct sgt_dma *it,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
bool is_lmem,
u64 offset,
int length)
{
- bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915);
- const u64 encode = rq->context->vm->pte_encode(0, cache_level,
+ bool has_64K_pages = HAS_64K_PAGES(rq->i915);
+ const u64 encode = rq->context->vm->pte_encode(0, pat_index,
is_lmem ? PTE_LM : 0);
struct intel_ring *ring = rq->ring;
int pkt, dword_length;
@@ -370,7 +375,7 @@ static int emit_pte(struct i915_request *rq,
u32 page_size;
u32 *hdr, *cs;
- GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8);
+ GEM_BUG_ON(GRAPHICS_VER(rq->i915) < 8);
page_size = I915_GTT_PAGE_SIZE;
dword_length = 0x400;
@@ -526,7 +531,7 @@ static int emit_copy_ccs(struct i915_request *rq,
u32 dst_offset, u8 dst_access,
u32 src_offset, u8 src_access, int size)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
int mocs = rq->engine->gt->mocs.uc_index << 1;
u32 num_ccs_blks;
u32 *cs;
@@ -576,7 +581,7 @@ static int emit_copy_ccs(struct i915_request *rq,
static int emit_copy(struct i915_request *rq,
u32 dst_offset, u32 src_offset, int size)
{
- const int ver = GRAPHICS_VER(rq->engine->i915);
+ const int ver = GRAPHICS_VER(rq->i915);
u32 instance = rq->engine->instance;
u32 *cs;
@@ -641,7 +646,7 @@ calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
* When CHUNK_SZ is passed all the pages upto CHUNK_SZ
* will be taken for the blt. in Flat-ccs supported
* platform Smem obj will have more pages than required
- * for main meory hence limit it to the required size
+ * for main memory hence limit it to the required size
* for main memory
*/
return min_t(u64, bytes_to_cpy, CHUNK_SZ);
@@ -673,17 +678,17 @@ int
intel_context_migrate_copy(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *src,
- enum i915_cache_level src_cache_level,
+ unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
- enum i915_cache_level dst_cache_level,
+ unsigned int dst_pat_index,
bool dst_is_lmem,
struct i915_request **out)
{
struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs;
struct drm_i915_private *i915 = ce->engine->i915;
u64 ccs_bytes_to_cpy = 0, bytes_to_cpy;
- enum i915_cache_level ccs_cache_level;
+ unsigned int ccs_pat_index;
u32 src_offset, dst_offset;
u8 src_access, dst_access;
struct i915_request *rq;
@@ -707,12 +712,12 @@ intel_context_migrate_copy(struct intel_context *ce,
dst_sz = scatter_list_length(dst);
if (src_is_lmem) {
it_ccs = it_dst;
- ccs_cache_level = dst_cache_level;
+ ccs_pat_index = dst_pat_index;
ccs_is_src = false;
} else if (dst_is_lmem) {
bytes_to_cpy = dst_sz;
it_ccs = it_src;
- ccs_cache_level = src_cache_level;
+ ccs_pat_index = src_pat_index;
ccs_is_src = true;
}
@@ -773,7 +778,7 @@ intel_context_migrate_copy(struct intel_context *ce,
src_sz = calculate_chunk_sz(i915, src_is_lmem,
bytes_to_cpy, ccs_bytes_to_cpy);
- len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
+ len = emit_pte(rq, &it_src, src_pat_index, src_is_lmem,
src_offset, src_sz);
if (!len) {
err = -EINVAL;
@@ -784,7 +789,7 @@ intel_context_migrate_copy(struct intel_context *ce,
goto out_rq;
}
- err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem,
+ err = emit_pte(rq, &it_dst, dst_pat_index, dst_is_lmem,
dst_offset, len);
if (err < 0)
goto out_rq;
@@ -811,7 +816,7 @@ intel_context_migrate_copy(struct intel_context *ce,
goto out_rq;
ccs_sz = GET_CCS_BYTES(i915, len);
- err = emit_pte(rq, &it_ccs, ccs_cache_level, false,
+ err = emit_pte(rq, &it_ccs, ccs_pat_index, false,
ccs_is_src ? src_offset : dst_offset,
ccs_sz);
if (err < 0)
@@ -912,7 +917,7 @@ out_ce:
static int emit_clear(struct i915_request *rq, u32 offset, int size,
u32 value, bool is_lmem)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
int mocs = rq->engine->gt->mocs.uc_index << 1;
const int ver = GRAPHICS_VER(i915);
int ring_sz;
@@ -920,7 +925,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size,
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
- if (HAS_FLAT_CCS(i915) && ver >= 12)
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
ring_sz = XY_FAST_COLOR_BLT_DW;
else if (ver >= 8)
ring_sz = 8;
@@ -931,7 +936,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size,
if (IS_ERR(cs))
return PTR_ERR(cs);
- if (HAS_FLAT_CCS(i915) && ver >= 12) {
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
(XY_FAST_COLOR_BLT_DW - 2);
*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
@@ -979,7 +984,7 @@ int
intel_context_migrate_clear(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *sg,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
bool is_lmem,
u32 value,
struct i915_request **out)
@@ -1027,7 +1032,7 @@ intel_context_migrate_clear(struct intel_context *ce,
if (err)
goto out_rq;
- len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ);
+ len = emit_pte(rq, &it, pat_index, is_lmem, offset, CHUNK_SZ);
if (len <= 0) {
err = len;
goto out_rq;
@@ -1074,10 +1079,10 @@ int intel_migrate_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *src,
- enum i915_cache_level src_cache_level,
+ unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
- enum i915_cache_level dst_cache_level,
+ unsigned int dst_pat_index,
bool dst_is_lmem,
struct i915_request **out)
{
@@ -1098,8 +1103,8 @@ int intel_migrate_copy(struct intel_migrate *m,
goto out;
err = intel_context_migrate_copy(ce, deps,
- src, src_cache_level, src_is_lmem,
- dst, dst_cache_level, dst_is_lmem,
+ src, src_pat_index, src_is_lmem,
+ dst, dst_pat_index, dst_is_lmem,
out);
intel_context_unpin(ce);
@@ -1113,7 +1118,7 @@ intel_migrate_clear(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *sg,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
bool is_lmem,
u32 value,
struct i915_request **out)
@@ -1134,7 +1139,7 @@ intel_migrate_clear(struct intel_migrate *m,
if (err)
goto out;
- err = intel_context_migrate_clear(ce, deps, sg, cache_level,
+ err = intel_context_migrate_clear(ce, deps, sg, pat_index,
is_lmem, value, out);
intel_context_unpin(ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h
index ccc677ec4aa3..11fc09a00c4b 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.h
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.h
@@ -16,7 +16,6 @@ struct i915_request;
struct i915_gem_ww_ctx;
struct intel_gt;
struct scatterlist;
-enum i915_cache_level;
int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt);
@@ -26,20 +25,20 @@ int intel_migrate_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *src,
- enum i915_cache_level src_cache_level,
+ unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
- enum i915_cache_level dst_cache_level,
+ unsigned int dst_pat_index,
bool dst_is_lmem,
struct i915_request **out);
int intel_context_migrate_copy(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *src,
- enum i915_cache_level src_cache_level,
+ unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
- enum i915_cache_level dst_cache_level,
+ unsigned int dst_pat_index,
bool dst_is_lmem,
struct i915_request **out);
@@ -48,7 +47,7 @@ intel_migrate_clear(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *sg,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
bool is_lmem,
u32 value,
struct i915_request **out);
@@ -56,7 +55,7 @@ int
intel_context_migrate_clear(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *sg,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
bool is_lmem,
u32 value,
struct i915_request **out);
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 69b489e8dfed..e8d93a657ef6 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -3,6 +3,8 @@
* Copyright © 2015 Intel Corporation
*/
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
#include "intel_engine.h"
@@ -40,6 +42,10 @@ struct drm_i915_mocs_table {
#define LE_COS(value) ((value) << 15)
#define LE_SSE(value) ((value) << 17)
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define _L4_CACHEABILITY(value) ((value) << 2)
+#define IG_PAT(value) ((value) << 8)
+
/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
#define L3_ESC(value) ((value) << 0)
#define L3_SCC(value) ((value) << 1)
@@ -49,7 +55,7 @@ struct drm_i915_mocs_table {
/* Helper defines */
#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
-#define PVC_NUM_MOCS_ENTRIES 3
+#define MTL_NUM_MOCS_ENTRIES 16
/* (e)LLC caching options */
/*
@@ -73,6 +79,12 @@ struct drm_i915_mocs_table {
#define L3_2_RESERVED _L3_CACHEABILITY(2)
#define L3_3_WB _L3_CACHEABILITY(3)
+/* L4 caching options */
+#define L4_0_WB _L4_CACHEABILITY(0)
+#define L4_1_WT _L4_CACHEABILITY(1)
+#define L4_2_RESERVED _L4_CACHEABILITY(2)
+#define L4_3_UC _L4_CACHEABILITY(3)
+
#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
[__idx] = { \
.control_value = __control_value, \
@@ -304,7 +316,6 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = {
};
static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
-
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
/* WB - L3 */
@@ -356,31 +367,6 @@ static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
L3_3_WB),
};
-static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = {
- /* wa_1608975824 */
- MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)),
-
- /* UC - Coherent; GO:L3 */
- MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)),
- /* UC - Coherent; GO:Memory */
- MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Non-Coherent; GO:Memory */
- MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)),
- /* UC - Non-Coherent; GO:L3 */
- MOCS_ENTRY(4, 0, L3_1_UC),
-
- /* WB */
- MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)),
-
- /* HW Reserved - SW program but never use. */
- MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)),
- MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)),
- MOCS_ENTRY(60, 0, L3_1_UC),
- MOCS_ENTRY(61, 0, L3_1_UC),
- MOCS_ENTRY(62, 0, L3_1_UC),
- MOCS_ENTRY(63, 0, L3_1_UC),
-};
-
static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
/* UC - Coherent; GO:L3 */
MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
@@ -393,27 +379,55 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
};
-static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
- /* Wa_14011441408: Set Go to Memory for MOCS#0 */
- MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Coherent; GO:Memory */
- MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Non-Coherent; GO:Memory */
- MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
-
- /* WB - LC */
- MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
-};
-
-static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
- /* Error */
- MOCS_ENTRY(0, 0, L3_3_WB),
-
- /* UC */
- MOCS_ENTRY(1, 0, L3_1_UC),
-
- /* WB */
- MOCS_ENTRY(2, 0, L3_3_WB),
+static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
+ /* Error - Reserved for Non-Use */
+ MOCS_ENTRY(0,
+ IG_PAT(0),
+ L3_LKUP(1) | L3_3_WB),
+ /* Cached - L3 + L4 */
+ MOCS_ENTRY(1,
+ IG_PAT(1),
+ L3_LKUP(1) | L3_3_WB),
+ /* L4 - GO:L3 */
+ MOCS_ENTRY(2,
+ IG_PAT(1),
+ L3_LKUP(1) | L3_1_UC),
+ /* Uncached - GO:L3 */
+ MOCS_ENTRY(3,
+ IG_PAT(1) | L4_3_UC,
+ L3_LKUP(1) | L3_1_UC),
+ /* L4 - GO:Mem */
+ MOCS_ENTRY(4,
+ IG_PAT(1),
+ L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+ /* Uncached - GO:Mem */
+ MOCS_ENTRY(5,
+ IG_PAT(1) | L4_3_UC,
+ L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+ /* L4 - L3:NoLKUP; GO:L3 */
+ MOCS_ENTRY(6,
+ IG_PAT(1),
+ L3_1_UC),
+ /* Uncached - L3:NoLKUP; GO:L3 */
+ MOCS_ENTRY(7,
+ IG_PAT(1) | L4_3_UC,
+ L3_1_UC),
+ /* L4 - L3:NoLKUP; GO:Mem */
+ MOCS_ENTRY(8,
+ IG_PAT(1),
+ L3_GLBGO(1) | L3_1_UC),
+ /* Uncached - L3:NoLKUP; GO:Mem */
+ MOCS_ENTRY(9,
+ IG_PAT(1) | L4_3_UC,
+ L3_GLBGO(1) | L3_1_UC),
+ /* Display - L3; L4:WT */
+ MOCS_ENTRY(14,
+ IG_PAT(1) | L4_1_WT,
+ L3_LKUP(1) | L3_3_WB),
+ /* CCS - Non-Displayable */
+ MOCS_ENTRY(15,
+ IG_PAT(1),
+ L3_GLBGO(1) | L3_1_UC),
};
enum {
@@ -437,7 +451,7 @@ static bool has_mocs(const struct drm_i915_private *i915)
return !IS_DGFX(i915);
}
-static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
+static unsigned int get_mocs_settings(struct drm_i915_private *i915,
struct drm_i915_mocs_table *table)
{
unsigned int flags;
@@ -445,30 +459,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
memset(table, 0, sizeof(struct drm_i915_mocs_table));
table->unused_entries_index = I915_MOCS_PTE;
- if (IS_PONTEVECCHIO(i915)) {
- table->size = ARRAY_SIZE(pvc_mocs_table);
- table->table = pvc_mocs_table;
- table->n_entries = PVC_NUM_MOCS_ENTRIES;
- table->uc_index = 1;
- table->wb_index = 2;
- table->unused_entries_index = 2;
+ if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 74))) {
+ table->size = ARRAY_SIZE(mtl_mocs_table);
+ table->table = mtl_mocs_table;
+ table->n_entries = MTL_NUM_MOCS_ENTRIES;
+ table->uc_index = 9;
+ table->unused_entries_index = 1;
} else if (IS_DG2(i915)) {
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
- table->table = dg2_mocs_table_g10_ax;
- } else {
- table->size = ARRAY_SIZE(dg2_mocs_table);
- table->table = dg2_mocs_table;
- }
+ table->size = ARRAY_SIZE(dg2_mocs_table);
+ table->table = dg2_mocs_table;
table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->unused_entries_index = 3;
- } else if (IS_XEHPSDV(i915)) {
- table->size = ARRAY_SIZE(xehpsdv_mocs_table);
- table->table = xehpsdv_mocs_table;
- table->uc_index = 2;
- table->n_entries = GEN9_NUM_MOCS_ENTRIES;
- table->unused_entries_index = 5;
} else if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
@@ -619,7 +621,7 @@ static void init_l3cc_table(struct intel_gt *gt,
intel_gt_mcr_lock(gt, &flags);
for_each_l3cc(l3cc, table, i)
- if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 55))
intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc);
else
intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc);
@@ -674,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt)
__init_mocs_table(gt->uncore, &table, global_mocs_offset());
/*
- * Initialize the L3CC table as part of mocs initalization to make
+ * Initialize the L3CC table as part of mocs initialization to make
* sure the LNCFCMOCSx registers are programmed for the subsequent
* memory transactions including guc transactions
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 7ecfa672f738..d07a4f97b943 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -8,6 +8,7 @@
#include "gem/i915_gem_lmem.h"
#include "i915_trace.h"
+#include "intel_gt.h"
#include "intel_gtt.h"
#include "gen6_ppgtt.h"
#include "gen8_ppgtt.h"
@@ -181,7 +182,7 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
u32 flags)
{
u32 pte_flags;
@@ -199,7 +200,7 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
if (vma_res->bi.lmem)
pte_flags |= PTE_LM;
- vm->insert_entries(vm, vma_res, cache_level, pte_flags);
+ vm->insert_entries(vm, vma_res, pat_index, pte_flags);
wmb();
}
@@ -210,8 +211,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm,
return;
vm->clear_range(vm, vma_res->start, vma_res->vma_size);
- if (vma_res->tlb)
- vma_invalidate_tlb(vm, vma_res->tlb);
+ vma_invalidate_tlb(vm, vma_res->tlb);
}
static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 5c91622dfca4..286d49ecc449 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -6,6 +6,9 @@
#include <linux/pm_runtime.h>
#include <linux/string_helpers.h>
+#include <drm/drm_print.h>
+
+#include "display/vlv_clock.h"
#include "gem/i915_gem_region.h"
#include "i915_drv.h"
#include "i915_reg.h"
@@ -53,11 +56,6 @@ static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
return rc6_to_gt(rc)->i915;
}
-static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
-{
- intel_uncore_write_fw(uncore, reg, val);
-}
-
static void gen11_rc6_enable(struct intel_rc6 *rc6)
{
struct intel_gt *gt = rc6_to_gt(rc6);
@@ -72,19 +70,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
*/
if (!intel_uc_uses_guc_rc(&gt->uc)) {
/* 2b: Program RC6 thresholds.*/
- set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
- set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+ intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+ intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
- set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
- set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
for_each_engine(engine, rc6_to_gt(rc6), id)
- set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
- set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+ intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA);
- set(uncore, GEN6_RC_SLEEP, 0);
+ intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
- set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+ intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
}
/*
@@ -105,8 +103,8 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
* Broadwell+, To be conservative, we want to factor in a context
* switch on top (due to ksoftirqd).
*/
- set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
- set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
+ intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
+ intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
/* 3a: Enable RC6
*
@@ -114,7 +112,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
* thus allowing GuC to control RC6 entry/exit fully instead.
* We will not set the HW ENABLE and EI bits
*/
- if (!intel_guc_rc_enable(&gt->uc.guc))
+ if (!intel_guc_rc_enable(gt_to_guc(gt)))
rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE;
else
rc6->ctl_enable =
@@ -122,26 +120,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_RC6_ENABLE |
GEN6_RC_CTL_EI_MODE(1);
- /* Wa_16011777198 - Render powergating must remain disabled */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
- pg_enable =
- GEN9_MEDIA_PG_ENABLE |
- GEN11_MEDIA_SAMPLER_PG_ENABLE;
- else
- pg_enable =
- GEN9_RENDER_PG_ENABLE |
- GEN9_MEDIA_PG_ENABLE |
- GEN11_MEDIA_SAMPLER_PG_ENABLE;
+ pg_enable =
+ GEN9_RENDER_PG_ENABLE |
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE;
- if (GRAPHICS_VER(gt->i915) >= 12) {
+ if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) {
for (i = 0; i < I915_MAX_VCS; i++)
if (HAS_ENGINE(gt, _VCS(i)))
pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
VDN_MFX_POWERGATE_ENABLE(i));
}
- set(uncore, GEN9_PG_ENABLE, pg_enable);
+ intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable);
}
static void gen9_rc6_enable(struct intel_rc6 *rc6)
@@ -152,26 +143,26 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
/* 2b: Program RC6 thresholds.*/
if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) {
- set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
- set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+ intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+ intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
} else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
/*
* WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
* when CPG is enabled
*/
- set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
+ intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
} else {
- set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
+ intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
}
- set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
- set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
for_each_engine(engine, rc6_to_gt(rc6), id)
- set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
- set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+ intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA);
- set(uncore, GEN6_RC_SLEEP, 0);
+ intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
/*
* 2c: Program Coarse Power Gating Policies.
@@ -194,11 +185,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
* conservative, we have to factor in a context switch on top (due
* to ksoftirqd).
*/
- set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
- set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+ intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
+ intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
/* 3a: Enable RC6 */
- set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
+ intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
rc6->ctl_enable =
GEN6_RC_CTL_HW_ENABLE |
@@ -210,8 +201,8 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
* - Render/Media PG need to be disabled with RC6.
*/
if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
- set(uncore, GEN9_PG_ENABLE,
- GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+ intel_uncore_write_fw(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
}
static void gen8_rc6_enable(struct intel_rc6 *rc6)
@@ -221,13 +212,13 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6)
enum intel_engine_id id;
/* 2b: Program RC6 thresholds.*/
- set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
- set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
- set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+ intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
for_each_engine(engine, rc6_to_gt(rc6), id)
- set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
- set(uncore, GEN6_RC_SLEEP, 0);
- set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
+ intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
+ intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
/* 3: Enable RC6 */
rc6->ctl_enable =
@@ -245,20 +236,20 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
u32 rc6vids, rc6_mask;
int ret;
- set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
- set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
- set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
- set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
- set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+ intel_uncore_write_fw(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
+ intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
+ intel_uncore_write_fw(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
+ intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+ intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
for_each_engine(engine, rc6_to_gt(rc6), id)
- set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
- set(uncore, GEN6_RC_SLEEP, 0);
- set(uncore, GEN6_RC1e_THRESHOLD, 1000);
- set(uncore, GEN6_RC6_THRESHOLD, 50000);
- set(uncore, GEN6_RC6p_THRESHOLD, 150000);
- set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
+ intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
+ intel_uncore_write_fw(uncore, GEN6_RC1e_THRESHOLD, 1000);
+ intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000);
+ intel_uncore_write_fw(uncore, GEN6_RC6p_THRESHOLD, 150000);
+ intel_uncore_write_fw(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
/* We don't use those on Haswell */
rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
@@ -353,7 +344,7 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
return PTR_ERR(pctx);
}
- GEM_BUG_ON(range_overflows_end_t(u64,
+ GEM_BUG_ON(range_end_overflows_t(u64,
i915->dsm.stolen.start,
pctx->stolen->start,
U32_MAX));
@@ -372,22 +363,22 @@ static void chv_rc6_enable(struct intel_rc6 *rc6)
enum intel_engine_id id;
/* 2a: Program RC6 thresholds.*/
- set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
- set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
- set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+ intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
for_each_engine(engine, rc6_to_gt(rc6), id)
- set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
- set(uncore, GEN6_RC_SLEEP, 0);
+ intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
/* TO threshold set to 500 us (0x186 * 1.28 us) */
- set(uncore, GEN6_RC6_THRESHOLD, 0x186);
+ intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x186);
/* Allows RC6 residency counter to work */
- set(uncore, VLV_COUNTER_CONTROL,
- _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
- VLV_MEDIA_RC6_COUNT_EN |
- VLV_RENDER_RC6_COUNT_EN));
+ intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+ VLV_MEDIA_RC6_COUNT_EN |
+ VLV_RENDER_RC6_COUNT_EN));
/* 3: Enable RC6 */
rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
@@ -399,27 +390,42 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
- set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
- set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+ intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+ intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+ intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
for_each_engine(engine, rc6_to_gt(rc6), id)
- set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
- set(uncore, GEN6_RC6_THRESHOLD, 0x557);
+ intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x557);
/* Allows RC6 residency counter to work */
- set(uncore, VLV_COUNTER_CONTROL,
- _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
- VLV_MEDIA_RC0_COUNT_EN |
- VLV_RENDER_RC0_COUNT_EN |
- VLV_MEDIA_RC6_COUNT_EN |
- VLV_RENDER_RC6_COUNT_EN));
+ intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+ VLV_MEDIA_RC0_COUNT_EN |
+ VLV_RENDER_RC0_COUNT_EN |
+ VLV_MEDIA_RC6_COUNT_EN |
+ VLV_RENDER_RC6_COUNT_EN));
rc6->ctl_enable =
GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
}
+bool intel_check_bios_c6_setup(struct intel_rc6 *rc6)
+{
+ if (!rc6->bios_state_captured) {
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm(uncore->rpm, wakeref)
+ rc6->bios_rc_state = intel_uncore_read(uncore, GEN6_RC_STATE);
+
+ rc6->bios_state_captured = true;
+ }
+
+ return rc6->bios_rc_state & RC_SW_TARGET_STATE_MASK;
+}
+
static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
{
struct intel_uncore *uncore = rc6_to_uncore(rc6);
@@ -486,6 +492,7 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
static bool rc6_supported(struct intel_rc6 *rc6)
{
struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_gt *gt = rc6_to_gt(rc6);
if (!HAS_RC6(i915))
return false;
@@ -502,6 +509,19 @@ static bool rc6_supported(struct intel_rc6 *rc6)
return false;
}
+ if (IS_METEORLAKE(gt->i915) &&
+ !intel_check_bios_c6_setup(rc6)) {
+ drm_notice(&i915->drm,
+ "C6 disabled by BIOS\n");
+ return false;
+ }
+
+ if (IS_MEDIA_GT_IP_STEP(gt, IP_VER(13, 0), STEP_A0, STEP_B0)) {
+ drm_notice(&i915->drm,
+ "Media RC6 disabled on A step\n");
+ return false;
+ }
+
return true;
}
@@ -541,31 +561,35 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
struct intel_gt *gt = rc6_to_gt(rc6);
/* Take control of RC6 back from GuC */
- intel_guc_rc_disable(&gt->uc.guc);
+ intel_guc_rc_disable(gt_to_guc(gt));
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
if (GRAPHICS_VER(i915) >= 9)
- set(uncore, GEN9_PG_ENABLE, 0);
- set(uncore, GEN6_RC_CONTROL, 0);
- set(uncore, GEN6_RC_STATE, 0);
+ intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0);
+ intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0);
+ intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0);
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
}
static void rc6_res_reg_init(struct intel_rc6 *rc6)
{
- memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg));
+ i915_reg_t res_reg[INTEL_RC6_RES_MAX] = {
+ [0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG,
+ };
switch (rc6_to_gt(rc6)->type) {
case GT_MEDIA:
- rc6->res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6;
+ res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6;
break;
default:
- rc6->res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED;
- rc6->res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6;
- rc6->res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p;
- rc6->res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp;
+ res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED;
+ res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6;
+ res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p;
+ res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp;
break;
}
+
+ memcpy(rc6->res_reg, res_reg, sizeof(res_reg));
}
void intel_rc6_init(struct intel_rc6 *rc6)
@@ -654,7 +678,7 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
return;
/* Restore HW timers for automatic RC6 entry while busy */
- set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
+ intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
}
void intel_rc6_park(struct intel_rc6 *rc6)
@@ -674,7 +698,7 @@ void intel_rc6_park(struct intel_rc6 *rc6)
return;
/* Turn off the HW timers and go directly to rc6 */
- set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
+ intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
if (HAS_RC6pp(rc6_to_i915(rc6)))
target = 0x6; /* deepest rc6 */
@@ -682,7 +706,7 @@ void intel_rc6_park(struct intel_rc6 *rc6)
target = 0x5; /* deep rc6 */
else
target = 0x4; /* normal rc6 */
- set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
+ intel_uncore_write_fw(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
}
void intel_rc6_disable(struct intel_rc6 *rc6)
@@ -699,9 +723,14 @@ void intel_rc6_disable(struct intel_rc6 *rc6)
void intel_rc6_fini(struct intel_rc6 *rc6)
{
struct drm_i915_gem_object *pctx;
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
intel_rc6_disable(rc6);
+ /* We want the BIOS C6 state preserved across loads for MTL */
+ if (IS_METEORLAKE(rc6_to_i915(rc6)) && rc6->bios_state_captured)
+ intel_uncore_write_fw(uncore, GEN6_RC_STATE, rc6->bios_rc_state);
+
pctx = fetch_and_zero(&rc6->pctx);
if (pctx)
i915_gem_object_put(pctx);
@@ -731,18 +760,18 @@ static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
* before we have set the default VLV_COUNTER_CONTROL value. So always
* set the high bit to be safe.
*/
- set(uncore, VLV_COUNTER_CONTROL,
- _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+ intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
upper = intel_uncore_read_fw(uncore, reg);
do {
tmp = upper;
- set(uncore, VLV_COUNTER_CONTROL,
- _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
+ intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
lower = intel_uncore_read_fw(uncore, reg);
- set(uncore, VLV_COUNTER_CONTROL,
- _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+ intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
upper = intel_uncore_read_fw(uncore, reg);
} while (upper != tmp && --loop);
@@ -776,7 +805,7 @@ u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, enum intel_rc6_res_type id)
/* On VLV and CHV, residency time is in CZ units rather than 1.28us */
if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
mul = 1000000;
- div = i915->czclk_freq;
+ div = vlv_clock_get_czclk(&i915->drm);
overflow_hw = BIT_ULL(40);
time_hw = vlv_residency_raw(uncore, reg);
} else {
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
index 456fa668a276..e137c2c397c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -27,4 +27,6 @@ u64 intel_rc6_residency_us(struct intel_rc6 *rc6, enum intel_rc6_res_type id);
void intel_rc6_print_residency(struct seq_file *m, const char *title,
enum intel_rc6_res_type id);
+bool intel_check_bios_c6_setup(struct intel_rc6 *rc6);
+
#endif /* INTEL_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
index fa23c4dce00b..cd4587098162 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -29,6 +29,7 @@ struct intel_rc6 {
u64 cur_residency[INTEL_RC6_RES_MAX];
u32 ctl_enable;
+ u32 bios_rc_state;
struct drm_i915_gem_object *pctx;
@@ -36,6 +37,7 @@ struct intel_rc6 {
bool enabled : 1;
bool manual : 1;
bool wakeref : 1;
+ bool bios_state_captured : 1;
};
#endif /* INTEL_RC6_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index f3ad93db0b21..a30060fd4429 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -3,6 +3,8 @@
* Copyright © 2019 Intel Corporation
*/
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
#include "i915_pci.h"
#include "i915_reg.h"
@@ -18,16 +20,6 @@
#include "gt/intel_gt_regs.h"
#ifdef CONFIG_64BIT
-static void _release_bars(struct pci_dev *pdev)
-{
- int resno;
-
- for (resno = PCI_STD_RESOURCES; resno < PCI_STD_RESOURCE_END; resno++) {
- if (pci_resource_len(pdev, resno))
- pci_release_resource(pdev, resno);
- }
-}
-
static void
_resize_bar(struct drm_i915_private *i915, int resno, resource_size_t size)
{
@@ -35,9 +27,7 @@ _resize_bar(struct drm_i915_private *i915, int resno, resource_size_t size)
int bar_size = pci_rebar_bytes_to_size(size);
int ret;
- _release_bars(pdev);
-
- ret = pci_resize_resource(pdev, resno, bar_size);
+ ret = pci_resize_resource(pdev, resno, bar_size, 0);
if (ret) {
drm_info(&i915->drm, "Failed to resize BAR%d to %dM (%pe)\n",
resno, 1 << bar_size, ERR_PTR(ret));
@@ -54,22 +44,19 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t
struct resource *root_res;
resource_size_t rebar_size;
resource_size_t current_size;
+ intel_wakeref_t wakeref;
u32 pci_cmd;
int i;
current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR));
if (i915->params.lmem_bar_size) {
- u32 bar_sizes;
-
- rebar_size = i915->params.lmem_bar_size *
- (resource_size_t)SZ_1M;
- bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR);
-
+ rebar_size = i915->params.lmem_bar_size * (resource_size_t)SZ_1M;
if (rebar_size == current_size)
return;
- if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) ||
+ if (!pci_rebar_size_supported(pdev, GEN12_LMEM_BAR,
+ pci_rebar_bytes_to_size(rebar_size)) ||
rebar_size >= roundup_pow_of_two(lmem_size)) {
rebar_size = lmem_size;
@@ -102,15 +89,25 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t
return;
}
- /* First disable PCI memory decoding references */
- pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
- pci_write_config_dword(pdev, PCI_COMMAND,
- pci_cmd & ~PCI_COMMAND_MEMORY);
+ /*
+ * Releasing forcewake during BAR resizing results in later forcewake
+ * ack timeouts and former can happen any time - it is asynchronous.
+ * Grabbing all forcewakes prevents it.
+ */
+ with_intel_runtime_pm(i915->uncore.rpm, wakeref) {
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
- _resize_bar(i915, GEN12_LMEM_BAR, rebar_size);
+ /* First disable PCI memory decoding references */
+ pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
+ pci_write_config_dword(pdev, PCI_COMMAND,
+ pci_cmd & ~PCI_COMMAND_MEMORY);
- pci_assign_unassigned_bus_resources(pdev->bus);
- pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+ _resize_bar(i915, GEN12_LMEM_BAR, rebar_size);
+
+ pci_assign_unassigned_bus_resources(pdev->bus);
+ pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+ }
}
#else
static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) {}
@@ -133,8 +130,8 @@ region_lmem_init(struct intel_memory_region *mem)
int ret;
if (!io_mapping_init_wc(&mem->iomap,
- mem->io_start,
- mem->io_size))
+ mem->io.start,
+ resource_size(&mem->io)))
return -EIO;
ret = intel_region_ttm_init(mem);
@@ -158,7 +155,7 @@ static const struct intel_memory_region_ops intel_region_lmem_ops = {
static bool get_legacy_lowmem_region(struct intel_uncore *uncore,
u64 *start, u32 *size)
{
- if (!IS_DG1_GRAPHICS_STEP(uncore->i915, STEP_A0, STEP_C0))
+ if (!IS_DG1(uncore->i915))
return false;
*start = 0;
@@ -209,7 +206,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
resource_size_t lmem_range;
u64 tile_stolen, flat_ccs_base;
- lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF;
+ lmem_range = intel_gt_mcr_read_any(to_gt(i915), XEHP_TILE0_ADDR_RANGE) & 0xFFFF;
lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT;
lmem_size *= SZ_1G;
@@ -229,7 +226,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
lmem_size -= tile_stolen;
} else {
/* Stolen starts from GSMBASE without CCS */
- lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE);
+ lmem_size = intel_uncore_read64(&i915->uncore, GEN6_GSMBASE);
}
i915_resize_lmem_bar(i915, lmem_size);
@@ -262,14 +259,6 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
if (err)
goto err_region_put;
- drm_dbg(&i915->drm, "Local memory: %pR\n", &mem->region);
- drm_dbg(&i915->drm, "Local memory IO start: %pa\n",
- &mem->io_start);
- drm_info(&i915->drm, "Local memory IO size: %pa\n",
- &mem->io_size);
- drm_info(&i915->drm, "Local memory available: %pa\n",
- &lmem_size);
-
if (io_size < lmem_size)
drm_info(&i915->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' or similar, if available in the BIOS.\n",
(u64)io_size >> 20);
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 4b56ec3743cf..d53766c288f7 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -3,6 +3,8 @@
* Copyright © 2014 Intel Corporation
*/
+#include <drm/drm_print.h>
+
#include "gem/i915_gem_internal.h"
#include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 0bb9094fdacd..41b5036dc538 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -7,34 +7,32 @@
#include <linux/stop_machine.h>
#include <linux/string_helpers.h>
-#include "display/intel_display.h"
+#include "display/intel_display_reset.h"
#include "display/intel_overlay.h"
-
#include "gem/i915_gem_context.h"
-
#include "gt/intel_gt_regs.h"
+#include "gt/uc/intel_gsc_fw.h"
+#include "uc/intel_guc.h"
#include "i915_drv.h"
#include "i915_file_private.h"
#include "i915_gpu_error.h"
#include "i915_irq.h"
+#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_print.h"
#include "intel_gt_requests.h"
#include "intel_mchbar_regs.h"
#include "intel_pci_config.h"
#include "intel_reset.h"
-#include "uc/intel_guc.h"
-
#define RESET_MAX_RETRIES 3
-/* XXX How to handle concurrent GGTT updates using tiling registers? */
-#define RESET_UNDER_STOP_MACHINE 0
-
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
{
struct drm_i915_file_private *file_priv = ctx->file_priv;
@@ -161,16 +159,16 @@ static int i915_do_reset(struct intel_gt *gt,
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
int err;
- /* Assert reset for at least 20 usec, and wait for acknowledgement. */
+ /* Assert reset for at least 50 usec, and wait for acknowledgement. */
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
udelay(50);
- err = wait_for_atomic(i915_in_reset(pdev), 50);
+ err = _wait_for_atomic(i915_in_reset(pdev), 50000, 0);
/* Clear the reset request. */
pci_write_config_byte(pdev, I915_GDRST, 0);
udelay(50);
if (!err)
- err = wait_for_atomic(!i915_in_reset(pdev), 50);
+ err = _wait_for_atomic(!i915_in_reset(pdev), 50000, 0);
return err;
}
@@ -190,7 +188,7 @@ static int g33_do_reset(struct intel_gt *gt,
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
- return wait_for_atomic(g4x_reset_complete(pdev), 50);
+ return _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
}
static int g4x_do_reset(struct intel_gt *gt,
@@ -207,7 +205,7 @@ static int g4x_do_reset(struct intel_gt *gt,
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_MEDIA | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
if (ret) {
GT_TRACE(gt, "Wait for media reset failed\n");
goto out;
@@ -215,7 +213,7 @@ static int g4x_do_reset(struct intel_gt *gt,
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_RENDER | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
if (ret) {
GT_TRACE(gt, "Wait for render reset failed\n");
goto out;
@@ -268,10 +266,28 @@ out:
static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
{
struct intel_uncore *uncore = gt->uncore;
- int loops = 2;
+ int loops;
int err;
/*
+ * On some platforms, e.g. Jasperlake, we see that the engine register
+ * state is not cleared until shortly after GDRST reports completion,
+ * causing a failure as we try to immediately resume while the internal
+ * state is still in flux. If we immediately repeat the reset, the
+ * second reset appears to serialise with the first, and since it is a
+ * no-op, the registers should retain their reset value. However, there
+ * is still a concern that upon leaving the second reset, the internal
+ * engine state is still in flux and not ready for resuming.
+ *
+ * Starting on MTL, there are some prep steps that we need to do when
+ * resetting some engines that need to be applied every time we write to
+ * GEN6_GDRST. As those are time consuming (tens of ms), we don't want
+ * to perform that twice, so, since the Jasperlake issue hasn't been
+ * observed on MTL, we avoid repeating the reset on newer platforms.
+ */
+ loops = GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70) ? 2 : 1;
+
+ /*
* GEN6_GDRST is not in the gt power well, no need to check
* for fifo space for the write or forcewake the chip for
* the read
@@ -279,20 +295,7 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
do {
intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
- /*
- * Wait for the device to ack the reset requests.
- *
- * On some platforms, e.g. Jasperlake, we see that the
- * engine register state is not cleared until shortly after
- * GDRST reports completion, causing a failure as we try
- * to immediately resume while the internal state is still
- * in flux. If we immediately repeat the reset, the second
- * reset appears to serialise with the first, and since
- * it is a no-op, the registers should retain their reset
- * value. However, there is still a concern that upon
- * leaving the second reset, the internal engine state
- * is still in flux and not ready for resuming.
- */
+ /* Wait for the device to ack the reset requests. */
err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
hw_domain_mask, 0,
2000, 0,
@@ -587,10 +590,10 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
700, 0, NULL);
if (ret)
- drm_err(&engine->i915->drm,
- "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
- engine->name, request,
- intel_uncore_read_fw(uncore, reg));
+ gt_err(engine->gt,
+ "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
+ engine->name, request,
+ intel_uncore_read_fw(uncore, reg));
return ret;
}
@@ -690,7 +693,75 @@ static reset_func intel_get_gpu_reset(const struct intel_gt *gt)
return NULL;
}
-int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+static int __reset_guc(struct intel_gt *gt)
+{
+ u32 guc_domain =
+ GRAPHICS_VER(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
+
+ return gen6_hw_domain_reset(gt, guc_domain);
+}
+
+static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+{
+ if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0))
+ return false;
+
+ if (!__HAS_ENGINE(engine_mask, GSC0))
+ return false;
+
+ return intel_gsc_uc_fw_init_done(&gt->uc.gsc);
+}
+
+static intel_engine_mask_t
+wa_14015076503_start(struct intel_gt *gt, intel_engine_mask_t engine_mask, bool first)
+{
+ if (!needs_wa_14015076503(gt, engine_mask))
+ return engine_mask;
+
+ /*
+ * wa_14015076503: if the GSC FW is loaded, we need to alert it that
+ * we're going to do a GSC engine reset and then wait for 200ms for the
+ * FW to get ready for it. However, if this is the first ALL_ENGINES
+ * reset attempt and the GSC is not busy, we can try to instead reset
+ * the GuC and all the other engines individually to avoid the 200ms
+ * wait.
+ * Skipping the GSC engine is safe because, differently from other
+ * engines, the GSCCS only role is to forward the commands to the GSC
+ * FW, so it doesn't have any HW outside of the CS itself and therefore
+ * it has no state that we don't explicitly re-init on resume or on
+ * context switch LRC or power context). The HW for the GSC uC is
+ * managed by the GSC FW so we don't need to care about that.
+ */
+ if (engine_mask == ALL_ENGINES && first && intel_engine_is_idle(gt->engine[GSC0])) {
+ __reset_guc(gt);
+ engine_mask = gt->info.engine_mask & ~BIT(GSC0);
+ } else {
+ intel_uncore_rmw(gt->uncore,
+ HECI_H_GS1(MTL_GSC_HECI2_BASE),
+ 0, HECI_H_GS1_ER_PREP);
+
+ /* make sure the reset bit is clear when writing the CSR reg */
+ intel_uncore_rmw(gt->uncore,
+ HECI_H_CSR(MTL_GSC_HECI2_BASE),
+ HECI_H_CSR_RST, HECI_H_CSR_IG);
+ msleep(200);
+ }
+
+ return engine_mask;
+}
+
+static void
+wa_14015076503_end(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+{
+ if (!needs_wa_14015076503(gt, engine_mask))
+ return;
+
+ intel_uncore_rmw(gt->uncore,
+ HECI_H_GS1(MTL_GSC_HECI2_BASE),
+ HECI_H_GS1_ER_PREP, 0);
+}
+
+static int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
reset_func reset;
@@ -707,10 +778,14 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
*/
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
- GT_TRACE(gt, "engine_mask=%x\n", engine_mask);
- preempt_disable();
- ret = reset(gt, engine_mask, retry);
- preempt_enable();
+ intel_engine_mask_t reset_mask;
+
+ reset_mask = wa_14015076503_start(gt, engine_mask, !retry);
+
+ GT_TRACE(gt, "engine_mask=%x\n", reset_mask);
+ ret = reset(gt, reset_mask, retry);
+
+ wa_14015076503_end(gt, reset_mask);
}
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
@@ -735,14 +810,12 @@ bool intel_has_reset_engine(const struct intel_gt *gt)
int intel_reset_guc(struct intel_gt *gt)
{
- u32 guc_domain =
- GRAPHICS_VER(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
int ret;
GEM_BUG_ON(!HAS_GT_UC(gt->i915));
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
- ret = gen6_hw_domain_reset(gt, guc_domain);
+ ret = __reset_guc(gt);
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
return ret;
@@ -803,8 +876,17 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
intel_engine_mask_t awake = 0;
enum intel_engine_id id;
- /* For GuC mode, ensure submission is disabled before stopping ring */
- intel_uc_reset_prepare(&gt->uc);
+ /**
+ * For GuC mode with submission enabled, ensure submission
+ * is disabled before stopping ring.
+ *
+ * For GuC mode with submission disabled, ensure that GuC is not
+ * sanitized, do that after engine reset. reset_prepare()
+ * is followed by engine reset which in this mode requires GuC to
+ * process any CSB FIFO entries generated by the resets.
+ */
+ if (intel_uc_uses_guc_submission(&gt->uc))
+ intel_uc_reset_prepare(&gt->uc);
for_each_engine(engine, gt, id) {
if (intel_engine_pm_get_if_awake(engine))
@@ -901,8 +983,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
awake = reset_prepare(gt);
/* Even if the GPU reset fails, it should still stop the engines */
- if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- __intel_gt_reset(gt, ALL_ENGINES);
+ if (!intel_gt_gpu_reset_clobbers_display(gt))
+ intel_gt_reset_all_engines(gt);
for_each_engine(engine, gt, id)
engine->submit_request = nop_submit_request;
@@ -928,6 +1010,15 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
GT_TRACE(gt, "end\n");
}
+static void set_wedged_work(struct work_struct *w)
+{
+ struct intel_gt *gt = container_of(w, struct intel_gt, wedge);
+ intel_wakeref_t wf;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wf)
+ __intel_gt_set_wedged(gt);
+}
+
void intel_gt_set_wedged(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
@@ -939,7 +1030,8 @@ void intel_gt_set_wedged(struct intel_gt *gt)
mutex_lock(&gt->reset.mutex);
if (GEM_SHOW_DEBUG()) {
- struct drm_printer p = drm_debug_printer(__func__);
+ struct drm_printer p = drm_dbg_printer(&gt->i915->drm,
+ DRM_UT_DRIVER, NULL);
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1003,7 +1095,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
dma_fence_put(fence);
- /* Restart iteration after droping lock */
+ /* Restart iteration after dropping lock */
spin_lock(&timelines->lock);
tl = list_entry(&timelines->active_list, typeof(*tl), link);
}
@@ -1011,8 +1103,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
/* We must reset pending GPU events before restoring our submission */
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
- if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
+ if (!intel_gt_gpu_reset_clobbers_display(gt))
+ ok = intel_gt_reset_all_engines(gt) == 0;
if (!ok) {
/*
* Warn CI about the unrecoverable wedged condition.
@@ -1056,10 +1148,10 @@ static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
{
int err, i;
- err = __intel_gt_reset(gt, ALL_ENGINES);
+ err = intel_gt_reset_all_engines(gt);
for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
msleep(10 * (i + 1));
- err = __intel_gt_reset(gt, ALL_ENGINES);
+ err = intel_gt_reset_all_engines(gt);
}
if (err)
return err;
@@ -1082,6 +1174,13 @@ static int resume(struct intel_gt *gt)
return 0;
}
+bool intel_gt_gpu_reset_clobbers_display(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ return INTEL_INFO(i915)->gpu_reset_clobbers_display;
+}
+
/**
* intel_gt_reset - reset chip after a hang
* @gt: #intel_gt to reset
@@ -1103,6 +1202,7 @@ void intel_gt_reset(struct intel_gt *gt,
intel_engine_mask_t stalled_mask,
const char *reason)
{
+ struct intel_display *display = gt->i915->display;
intel_engine_mask_t awake;
int ret;
@@ -1124,33 +1224,35 @@ void intel_gt_reset(struct intel_gt *gt,
goto unlock;
if (reason)
- drm_notice(&gt->i915->drm,
- "Resetting chip for %s\n", reason);
+ gt_notice(gt, "Resetting chip for %s\n", reason);
atomic_inc(&gt->i915->gpu_error.reset_count);
awake = reset_prepare(gt);
if (!intel_has_gpu_reset(gt)) {
if (gt->i915->params.reset)
- drm_err(&gt->i915->drm, "GPU reset not supported\n");
+ gt_err(gt, "GPU reset not supported\n");
else
- drm_dbg(&gt->i915->drm, "GPU reset disabled\n");
+ gt_dbg(gt, "GPU reset disabled\n");
goto error;
}
- if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_disable_interrupts(gt->i915);
+ if (intel_gt_gpu_reset_clobbers_display(gt))
+ intel_irq_suspend(gt->i915);
if (do_reset(gt, stalled_mask)) {
- drm_err(&gt->i915->drm, "Failed to reset chip\n");
+ gt_err(gt, "Failed to reset chip\n");
goto taint;
}
- if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_enable_interrupts(gt->i915);
+ if (intel_gt_gpu_reset_clobbers_display(gt))
+ intel_irq_resume(gt->i915);
- intel_overlay_reset(gt->i915);
+ intel_overlay_reset(display);
+ /* sanitize uC after engine reset */
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ intel_uc_reset_prepare(&gt->uc);
/*
* Next we need to restore the context, but we don't use those
* yet either...
@@ -1161,9 +1263,7 @@ void intel_gt_reset(struct intel_gt *gt,
*/
ret = intel_gt_init_hw(gt);
if (ret) {
- drm_err(&gt->i915->drm,
- "Failed to initialise HW following reset (%d)\n",
- ret);
+ gt_err(gt, "Failed to initialise HW following reset (%d)\n", ret);
goto taint;
}
@@ -1196,7 +1296,30 @@ error:
goto finish;
}
-static int intel_gt_reset_engine(struct intel_engine_cs *engine)
+/**
+ * intel_gt_reset_all_engines() - Reset all engines in the given gt.
+ * @gt: the GT to reset all engines for.
+ *
+ * This function resets all engines within the given gt.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int intel_gt_reset_all_engines(struct intel_gt *gt)
+{
+ return __intel_gt_reset(gt, ALL_ENGINES);
+}
+
+/**
+ * intel_gt_reset_engine() - Reset a specific engine within a gt.
+ * @engine: engine to be reset.
+ *
+ * This function resets the specified engine within a gt.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int intel_gt_reset_engine(struct intel_engine_cs *engine)
{
return __intel_gt_reset(engine->gt, engine->mask);
}
@@ -1220,7 +1343,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
if (msg)
drm_notice(&engine->i915->drm,
"Resetting %s for %s\n", engine->name, msg);
- atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
+ i915_increase_reset_engine_count(&engine->i915->gpu_error, engine);
ret = intel_gt_reset_engine(engine);
if (ret) {
@@ -1274,6 +1397,11 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
return err;
}
+static void display_reset_modeset_stuck(void *gt)
+{
+ intel_gt_set_wedged(gt);
+}
+
static void intel_gt_reset_global(struct intel_gt *gt,
u32 engine_mask,
const char *reason)
@@ -1291,15 +1419,34 @@ static void intel_gt_reset_global(struct intel_gt *gt,
/* Use a watchdog to ensure that our reset completes */
intel_wedge_on_timeout(&w, gt, 60 * HZ) {
- intel_display_prepare_reset(gt->i915);
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_display *display = i915->display;
+ bool need_display_reset;
+ bool reset_display;
+
+ need_display_reset = intel_gt_gpu_reset_clobbers_display(gt) &&
+ intel_has_gpu_reset(gt);
+
+ reset_display = intel_display_reset_test(display) ||
+ need_display_reset;
+
+ if (reset_display)
+ reset_display = intel_display_reset_prepare(display,
+ display_reset_modeset_stuck,
+ gt);
intel_gt_reset(gt, engine_mask, reason);
- intel_display_finish_reset(gt->i915);
+ if (reset_display)
+ intel_display_reset_finish(display, !need_display_reset);
}
if (!test_bit(I915_WEDGED, &gt->reset.flags))
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
+ else
+ drm_dev_wedged_event(&gt->i915->drm,
+ DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET,
+ NULL);
}
/**
@@ -1360,7 +1507,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
local_bh_disable();
for_each_engine_masked(engine, gt, engine_mask, tmp) {
- BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
+ BUILD_BUG_ON(I915_RESET_BACKOFF >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags))
continue;
@@ -1505,6 +1652,7 @@ void intel_gt_init_reset(struct intel_gt *gt)
init_waitqueue_head(&gt->reset.queue);
mutex_init(&gt->reset.mutex);
init_srcu_struct(&gt->reset.backoff_srcu);
+ INIT_WORK(&gt->wedge, set_wedged_work);
/*
* While undesirable to wait inside the shrinker, complain anyway.
@@ -1530,10 +1678,8 @@ static void intel_wedge_me(struct work_struct *work)
{
struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
- drm_err(&w->gt->i915->drm,
- "%s timed out, cancelling all in-flight rendering.\n",
- w->name);
- intel_gt_set_wedged(w->gt);
+ gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name);
+ set_wedged_work(&w->gt->wedge);
}
void __intel_init_wedge(struct intel_wedge_me *w,
@@ -1545,7 +1691,7 @@ void __intel_init_wedge(struct intel_wedge_me *w,
w->name = name;
INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
- schedule_delayed_work(&w->work, timeout);
+ queue_delayed_work(gt->i915->unordered_wq, &w->work, timeout);
}
void __intel_fini_wedge(struct intel_wedge_me *w)
@@ -1555,6 +1701,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
w->gt = NULL;
}
+/*
+ * Wa_22011802037 requires that we (or the GuC) ensure that no command
+ * streamers are executing MI_FORCE_WAKE while an engine reset is initiated.
+ */
+bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt)
+{
+ if (GRAPHICS_VER(gt->i915) < 11)
+ return false;
+
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0))
+ return true;
+
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ return false;
+
+ return true;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_reset.c"
#include "selftest_hangcheck.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 25c975b6e8fc..724ea6d64f33 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -28,6 +28,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
const char *fmt, ...);
#define I915_ERROR_CAPTURE BIT(0)
+bool intel_gt_gpu_reset_clobbers_display(struct intel_gt *gt);
+
void intel_gt_reset(struct intel_gt *gt,
intel_engine_mask_t stalled_mask,
const char *reason);
@@ -54,7 +56,8 @@ int intel_gt_terminally_wedged(struct intel_gt *gt);
void intel_gt_set_wedged_on_init(struct intel_gt *gt);
void intel_gt_set_wedged_on_fini(struct intel_gt *gt);
-int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
+int intel_gt_reset_engine(struct intel_engine_cs *engine);
+int intel_gt_reset_all_engines(struct intel_gt *gt);
int intel_reset_guc(struct intel_gt *gt);
@@ -78,4 +81,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
bool intel_has_gpu_reset(const struct intel_gt *gt);
bool intel_has_reset_engine(const struct intel_gt *gt);
+bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt);
+
#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h
index 9312b29f5a97..ee4eb574a219 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h
@@ -20,7 +20,7 @@ struct intel_reset {
* FENCE registers).
*
* #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
- * acquire the struct_mutex to reset an engine, we need an explicit
+ * acquire a global lock to reset an engine, we need an explicit
* flag to prevent two concurrent reset attempts in the same engine.
* As the number of engines continues to grow, allocate the flags from
* the most significant bits.
@@ -41,8 +41,7 @@ struct intel_reset {
*/
unsigned long flags;
#define I915_RESET_BACKOFF 0
-#define I915_RESET_MODESET 1
-#define I915_RESET_ENGINE 2
+#define I915_RESET_ENGINE 1
#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 3)
#define I915_WEDGED_ON_FINI (BITS_PER_LONG - 2)
#define I915_WEDGED (BITS_PER_LONG - 1)
@@ -51,7 +50,7 @@ struct intel_reset {
/**
* Waitqueue to signal when the reset has completed. Used by clients
- * that wait for dev_priv->mm.wedged to settle.
+ * that wait for i915->mm.wedged to settle.
*/
wait_queue_head_t queue;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 15ec64d881c4..b74d9205c0f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -13,6 +13,7 @@
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_ring.h"
+#include "intel_gt.h"
#include "intel_timeline.h"
unsigned int intel_ring_update_space(struct intel_ring *ring)
@@ -53,10 +54,10 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
if (unlikely(ret))
goto err_unpin;
- if (i915_vma_is_map_and_fenceable(vma)) {
+ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) {
addr = (void __force *)i915_vma_pin_iomap(vma);
} else {
- int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);
+ int type = intel_gt_coherent_map_type(vma->vm->gt, vma->obj, false);
addr = i915_gem_object_pin_map(vma->obj, type);
}
@@ -98,7 +99,7 @@ void intel_ring_unpin(struct intel_ring *ring)
return;
i915_vma_unset_ggtt_write(vma);
- if (i915_vma_is_map_and_fenceable(vma))
+ if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915))
i915_vma_unpin_iomap(vma);
else
i915_gem_object_unpin_map(vma->obj);
@@ -116,7 +117,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE |
I915_BO_ALLOC_PM_VOLATILE);
- if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
+ if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915))
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
obj = i915_gem_object_create_internal(i915, size);
@@ -307,30 +308,6 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
return cs;
}
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct i915_request *rq)
-{
- int num_dwords;
- void *cs;
-
- num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
- if (num_dwords == 0)
- return 0;
-
- num_dwords = CACHELINE_DWORDS - num_dwords;
- GEM_BUG_ON(num_dwords & 1);
-
- cs = intel_ring_begin(rq, num_dwords);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
- intel_ring_advance(rq, cs + num_dwords);
-
- GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
- return 0;
-}
-
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_ring.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index 1b32dadfb8c3..64b322e25f36 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -16,7 +16,6 @@ struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
-int intel_ring_cacheline_align(struct i915_request *rq);
unsigned int intel_ring_update_space(struct intel_ring *ring);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 827adb0cfaea..8314a4b0505e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -15,17 +15,19 @@
#include "i915_irq.h"
#include "i915_mitigations.h"
#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_gt.h"
#include "intel_gt_irq.h"
+#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_reset.h"
#include "intel_ring.h"
#include "shmem_utils.h"
-#include "intel_engine_heartbeat.h"
-#include "intel_engine_pm.h"
/* Rough estimate of the typical request size, performing a flush,
* set-context and then emitting the batch.
@@ -192,6 +194,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
static int xcs_resume(struct intel_engine_cs *engine)
{
struct intel_ring *ring = engine->legacy.ring;
+ ktime_t kt;
ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
ring->head, ring->tail);
@@ -229,10 +232,33 @@ static int xcs_resume(struct intel_engine_cs *engine)
set_pp_dir(engine);
- /* First wake the ring up to an empty/idle ring */
- ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
+ /*
+ * First wake the ring up to an empty/idle ring.
+ * Use 50ms of delay to let the engine write successfully
+ * for all platforms. Experimented with different values and
+ * determined that 50ms works best based on testing.
+ */
+ for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC);
+ ktime_before(ktime_get(), (kt)); cpu_relax()) {
+ /*
+ * In case of resets fails because engine resumes from
+ * incorrect RING_HEAD and then GPU may be then fed
+ * to invalid instructions, which may lead to unrecoverable
+ * hang. So at first write doesn't succeed then try again.
+ */
+ ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
+ if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head)
+ break;
+ }
+
ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
- ENGINE_POSTING_READ(engine, RING_TAIL);
+ if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) {
+ ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n",
+ ENGINE_READ_FW(engine, RING_HEAD),
+ ENGINE_READ_FW(engine, RING_TAIL),
+ ring->head);
+ goto err;
+ }
ENGINE_WRITE_FW(engine, RING_CTL,
RING_CTL_SIZE(ring->size) | RING_VALID);
@@ -241,12 +267,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
if (__intel_wait_for_register_fw(engine->uncore,
RING_CTL(engine->mmio_base),
RING_VALID, RING_VALID,
- 5000, 0, NULL))
+ 5000, 0, NULL)) {
+ ENGINE_TRACE(engine, "failed to restart\n");
goto err;
+ }
- if (GRAPHICS_VER(engine->i915) > 2)
+ if (GRAPHICS_VER(engine->i915) > 2) {
ENGINE_WRITE_FW(engine,
RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+ ENGINE_POSTING_READ(engine, RING_MI_MODE);
+ }
/* Now awake, let it get started */
if (ring->tail != ring->head) {
@@ -259,16 +289,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
return 0;
err:
- drm_err(&engine->i915->drm,
- "%s initialization failed; "
- "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
- engine->name,
- ENGINE_READ(engine, RING_CTL),
- ENGINE_READ(engine, RING_CTL) & RING_VALID,
- ENGINE_READ(engine, RING_HEAD), ring->head,
- ENGINE_READ(engine, RING_TAIL), ring->tail,
- ENGINE_READ(engine, RING_START),
- i915_ggtt_offset(ring->vma));
+ gt_err(engine->gt, "%s initialization failed\n", engine->name);
+ ENGINE_TRACE(engine,
+ "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_CTL) & RING_VALID,
+ ENGINE_READ(engine, RING_HEAD), ring->head,
+ ENGINE_READ(engine, RING_TAIL), ring->tail,
+ ENGINE_READ(engine, RING_START),
+ i915_ggtt_offset(ring->vma));
+ GEM_TRACE_DUMP();
return -EIO;
}
@@ -336,7 +366,13 @@ static void reset_prepare(struct intel_engine_cs *engine)
ENGINE_READ_FW(engine, RING_HEAD),
ENGINE_READ_FW(engine, RING_TAIL),
ENGINE_READ_FW(engine, RING_START));
- if (!stop_ring(engine)) {
+ /*
+ * Sometimes engine head failed to set to zero even after writing into it.
+ * Use wait_for_atomic() with 20ms delay to let engine resumes from
+ * correct RING_HEAD. Experimented different values and determined
+ * that 20ms works best based on testing.
+ */
+ if (wait_for_atomic((!stop_ring(engine) == 0), 20)) {
drm_err(&engine->i915->drm,
"failed to set %s head to zero "
"ctl %08x head %08x tail %08x start %08x\n",
@@ -474,8 +510,7 @@ static int ring_context_init_default_state(struct intel_context *ce,
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
- shmem_read(ce->engine->default_state, 0,
- vaddr, ce->engine->context_size);
+ shmem_read(ce->default_state, 0, vaddr, ce->engine->context_size);
i915_gem_object_flush_map(obj);
__i915_gem_object_release_map(obj);
@@ -491,7 +526,7 @@ static int ring_context_pre_pin(struct intel_context *ce,
struct i915_address_space *vm;
int err = 0;
- if (ce->engine->default_state &&
+ if (ce->default_state &&
!test_bit(CONTEXT_VALID_BIT, &ce->flags)) {
err = ring_context_init_default_state(ce, ww);
if (err)
@@ -570,10 +605,12 @@ static int ring_context_alloc(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
+ if (!intel_context_has_own_state(ce))
+ ce->default_state = engine->default_state;
+
/* One ringbuffer to rule them all */
GEM_BUG_ON(!engine->legacy.ring);
ce->ring = engine->legacy.ring;
- ce->timeline = intel_timeline_get(engine->legacy.timeline);
GEM_BUG_ON(ce->state);
if (engine->context_size) {
@@ -586,6 +623,8 @@ static int ring_context_alloc(struct intel_context *ce)
ce->state = vma;
}
+ ce->timeline = intel_timeline_get(engine->legacy.timeline);
+
return 0;
}
@@ -805,7 +844,7 @@ static int mi_set_context(struct i915_request *rq,
static int remap_l3_slice(struct i915_request *rq, int slice)
{
#define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32))
- u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice];
+ u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
int i;
if (!remap_info)
@@ -1052,9 +1091,9 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
static void ring_release(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_private *i915 = engine->i915;
- drm_WARN_ON(&dev_priv->drm, GRAPHICS_VER(dev_priv) > 2 &&
+ drm_WARN_ON(&i915->drm, GRAPHICS_VER(i915) > 2 &&
(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
intel_engine_cleanup_common(engine);
@@ -1088,9 +1127,6 @@ static void setup_irq(struct intel_engine_cs *engine)
} else if (GRAPHICS_VER(i915) >= 5) {
engine->irq_enable = gen5_irq_enable;
engine->irq_disable = gen5_irq_disable;
- } else if (GRAPHICS_VER(i915) >= 3) {
- engine->irq_enable = gen3_irq_enable;
- engine->irq_disable = gen3_irq_disable;
} else {
engine->irq_enable = gen2_irq_enable;
engine->irq_disable = gen2_irq_disable;
@@ -1144,7 +1180,7 @@ static void setup_common(struct intel_engine_cs *engine)
* equivalent to our next initial bread so we can elide
* engine->emit_init_breadcrumb().
*/
- engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
+ engine->emit_fini_breadcrumb = gen2_emit_breadcrumb;
if (GRAPHICS_VER(i915) == 5)
engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
@@ -1157,7 +1193,7 @@ static void setup_common(struct intel_engine_cs *engine)
else if (IS_I830(i915) || IS_I845G(i915))
engine->emit_bb_start = i830_emit_bb_start;
else
- engine->emit_bb_start = gen3_emit_bb_start;
+ engine->emit_bb_start = gen2_emit_bb_start;
}
static void setup_rcs(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index f5d7b5126433..b01c837ab646 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -5,21 +5,28 @@
#include <linux/string_helpers.h>
-#include <drm/i915_drm.h>
+#include <drm/intel/i915_drm.h>
+
+#include "display/intel_display_rps.h"
+#include "display/vlv_clock.h"
+#include "soc/intel_dram.h"
-#include "display/intel_display.h"
#include "i915_drv.h"
#include "i915_irq.h"
+#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"
+#include "intel_gt_pm.h"
#include "intel_gt_pm_irq.h"
+#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_mchbar_regs.h"
#include "intel_pcode.h"
#include "intel_rps.h"
-#include "vlv_sideband.h"
+#include "vlv_iosf_sb.h"
#include "../../../platform/x86/intel_ips.h"
#define BUSY_MAX_EI 20u /* ms */
@@ -48,7 +55,7 @@ static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- return &gt->uc.guc.slpc;
+ return &gt_to_guc(gt)->slpc;
}
static bool rps_uses_slpc(struct intel_rps *rps)
@@ -70,14 +77,15 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
static void rps_timer(struct timer_list *t)
{
- struct intel_rps *rps = from_timer(rps, t, timer);
+ struct intel_rps *rps = timer_container_of(rps, t, timer);
+ struct intel_gt *gt = rps_to_gt(rps);
struct intel_engine_cs *engine;
ktime_t dt, last, timestamp;
enum intel_engine_id id;
s64 max_busy[3] = {};
timestamp = 0;
- for_each_engine(engine, rps_to_gt(rps), id) {
+ for_each_engine(engine, gt, id) {
s64 busy;
int i;
@@ -121,7 +129,7 @@ static void rps_timer(struct timer_list *t)
busy += div_u64(max_busy[i], 1 << i);
}
- GT_TRACE(rps_to_gt(rps),
+ GT_TRACE(gt,
"busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
busy, (int)div64_u64(100 * busy, dt),
max_busy[0], max_busy[1], max_busy[2],
@@ -131,12 +139,12 @@ static void rps_timer(struct timer_list *t)
rps->cur_freq < rps->max_freq_softlimit) {
rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
rps->pm_interval = 1;
- schedule_work(&rps->work);
+ queue_work(gt->i915->unordered_wq, &rps->work);
} else if (100 * busy < rps->power.down_threshold * dt &&
rps->cur_freq > rps->min_freq_softlimit) {
rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
rps->pm_interval = 1;
- schedule_work(&rps->work);
+ queue_work(gt->i915->unordered_wq, &rps->work);
} else {
rps->last_adj = 0;
}
@@ -156,7 +164,7 @@ static void rps_start_timer(struct intel_rps *rps)
static void rps_stop_timer(struct intel_rps *rps)
{
- del_timer_sync(&rps->timer);
+ timer_delete_sync(&rps->timer);
rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
cancel_work_sync(&rps->work);
}
@@ -260,10 +268,10 @@ static const struct cparams {
u16 c;
} cparams[] = {
{ 1, 1333, 301, 28664 },
- { 1, 1066, 294, 24460 },
+ { 1, 1067, 294, 24460 },
{ 1, 800, 294, 25192 },
{ 0, 1333, 276, 27605 },
- { 0, 1066, 276, 27605 },
+ { 0, 1067, 276, 27605 },
{ 0, 800, 231, 23784 },
};
@@ -271,19 +279,24 @@ static void gen5_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+ unsigned int fsb_freq, mem_freq;
u8 fmax, fmin, fstart;
u32 rgvmodectl;
int c_m, i;
- if (i915->fsb_freq <= 3200)
+ fsb_freq = intel_fsb_freq(i915);
+ mem_freq = intel_mem_freq(i915);
+
+ if (fsb_freq <= 3200000)
c_m = 0;
- else if (i915->fsb_freq <= 4800)
+ else if (fsb_freq <= 4800000)
c_m = 1;
else
c_m = 2;
for (i = 0; i < ARRAY_SIZE(cparams); i++) {
- if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
+ if (cparams[i].i == c_m &&
+ cparams[i].t == DIV_ROUND_CLOSEST(mem_freq, 1000)) {
rps->ips.m = cparams[i].m;
rps->ips.c = cparams[i].c;
break;
@@ -544,6 +557,7 @@ static unsigned int init_emon(struct intel_uncore *uncore)
static bool gen5_rps_enable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_display *display = i915->display;
struct intel_uncore *uncore = rps_to_uncore(rps);
u8 fstart, vstart;
u32 rgvmodectl;
@@ -601,9 +615,7 @@ static bool gen5_rps_enable(struct intel_rps *rps)
rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
rps->ips.last_time2 = ktime_get_raw_ns();
- spin_lock(&i915->irq_lock);
- ilk_enable_display_irq(i915, DE_PCU_EVENT);
- spin_unlock(&i915->irq_lock);
+ ilk_display_rps_enable(display);
spin_unlock_irq(&mchdev_lock);
@@ -615,14 +627,13 @@ static bool gen5_rps_enable(struct intel_rps *rps)
static void gen5_rps_disable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_display *display = i915->display;
struct intel_uncore *uncore = rps_to_uncore(rps);
u16 rgvswctl;
spin_lock_irq(&mchdev_lock);
- spin_lock(&i915->irq_lock);
- ilk_disable_display_irq(i915, DE_PCU_EVENT);
- spin_unlock(&i915->irq_lock);
+ ilk_display_rps_disable(display);
rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
@@ -669,7 +680,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
{
struct intel_gt *gt = rps_to_gt(rps);
struct intel_uncore *uncore = gt->uncore;
- u32 threshold_up = 0, threshold_down = 0; /* in % */
u32 ei_up = 0, ei_down = 0;
lockdep_assert_held(&rps->power.mutex);
@@ -677,9 +687,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
if (new_power == rps->power.mode)
return;
- threshold_up = 95;
- threshold_down = 85;
-
/* Note the units here are not exactly 1us, but 1280ns. */
switch (new_power) {
case LOW_POWER:
@@ -706,17 +713,22 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
GT_TRACE(gt,
"changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
- new_power, threshold_up, ei_up, threshold_down, ei_down);
+ new_power,
+ rps->power.up_threshold, ei_up,
+ rps->power.down_threshold, ei_down);
set(uncore, GEN6_RP_UP_EI,
intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
set(uncore, GEN6_RP_UP_THRESHOLD,
- intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
+ intel_gt_ns_to_pm_interval(gt,
+ ei_up * rps->power.up_threshold * 10));
set(uncore, GEN6_RP_DOWN_EI,
intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
set(uncore, GEN6_RP_DOWN_THRESHOLD,
- intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
+ intel_gt_ns_to_pm_interval(gt,
+ ei_down *
+ rps->power.down_threshold * 10));
set(uncore, GEN6_RP_CONTROL,
(GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
@@ -728,8 +740,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
skip_hw_write:
rps->power.mode = new_power;
- rps->power.up_threshold = threshold_up;
- rps->power.down_threshold = threshold_down;
}
static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
@@ -817,9 +827,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val)
struct drm_i915_private *i915 = rps_to_i915(rps);
int err;
- vlv_punit_get(i915);
- err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
- vlv_punit_put(i915);
+ vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
+ err = vlv_iosf_sb_write(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_REQ, val);
+ vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
val, intel_gpu_freq(rps, val));
@@ -971,7 +981,7 @@ static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
}
mutex_unlock(&rps->lock);
if (boost)
- schedule_work(&rps->work);
+ queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
return 0;
}
@@ -996,6 +1006,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);
+ /* Don't decrement num_waiters for req where increment was skipped */
+ if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING)
+ return;
+
intel_guc_slpc_dec_waiters(slpc);
} else {
atomic_dec(&rps->num_waiters);
@@ -1009,6 +1023,10 @@ void intel_rps_boost(struct i915_request *rq)
if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
return;
+ /* Waitboost is not needed for contexts marked with a Freq hint */
+ if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags))
+ return;
+
/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
@@ -1016,14 +1034,23 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);
- if (slpc->min_freq_softlimit >= slpc->boost_freq)
+ /* Waitboost should not be done with power saving profile */
+ if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING)
return;
/* Return if old value is non zero */
if (!atomic_fetch_inc(&slpc->num_waiters)) {
+ /*
+ * Skip queuing boost work if frequency is already boosted,
+ * but still increment num_waiters.
+ */
+ if (slpc->min_freq_softlimit >= slpc->boost_freq)
+ return;
+
GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
rq->fence.context, rq->fence.seqno);
- schedule_work(&slpc->boost_work);
+ queue_work(rps_to_gt(rps)->i915->unordered_wq,
+ &slpc->boost_work);
}
return;
@@ -1039,7 +1066,7 @@ void intel_rps_boost(struct i915_request *rq)
rq->fence.context, rq->fence.seqno);
if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
- schedule_work(&rps->work);
+ queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
}
@@ -1081,11 +1108,7 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
- if (IS_PONTEVECCHIO(i915))
- return intel_uncore_read(uncore, PVC_RP_STATE_CAP);
- else if (IS_XEHPSDV(i915))
- return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
- else if (IS_GEN9_LP(i915))
+ if (IS_GEN9_LP(i915))
return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
else
return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
@@ -1156,7 +1179,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- if (IS_METEORLAKE(i915))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
return mtl_get_freq_caps(rps, caps);
else
return __gen6_rps_get_freq_caps(rps, caps);
@@ -1264,7 +1287,7 @@ static int chv_rps_max_freq(struct intel_rps *rps)
struct intel_gt *gt = rps_to_gt(rps);
u32 val;
- val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, FB_GFX_FMAX_AT_VMAX_FUSE);
switch (gt->info.sseu.eu_total) {
case 8:
@@ -1291,7 +1314,7 @@ static int chv_rps_rpe_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val;
- val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_GPU_DUTYCYCLE_REG);
val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
@@ -1302,7 +1325,7 @@ static int chv_rps_guar_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val;
- val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, FB_GFX_FMAX_AT_VMAX_FUSE);
return val & FB_GFX_FREQ_FUSE_MASK;
}
@@ -1312,7 +1335,7 @@ static u32 chv_rps_min_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val;
- val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, FB_GFX_FMIN_AT_VMIN_FUSE);
val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
return val & FB_GFX_FREQ_FUSE_MASK;
@@ -1346,14 +1369,14 @@ static bool chv_rps_enable(struct intel_rps *rps)
GEN6_PM_RP_DOWN_TIMEOUT);
/* Setting Fixed Bias */
- vlv_punit_get(i915);
+ vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
- vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+ vlv_iosf_sb_write(&i915->drm, VLV_IOSF_SB_PUNIT, VLV_TURBO_SOC_OVERRIDE, val);
- val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS);
- vlv_punit_put(i915);
+ vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
/* RPS code assumes GPLL is used */
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
@@ -1371,7 +1394,7 @@ static int vlv_rps_guar_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val, rp1;
- val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FREQ_FUSE);
rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
@@ -1384,7 +1407,7 @@ static int vlv_rps_max_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val, rp0;
- val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FREQ_FUSE);
rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
/* Clamp to max */
@@ -1398,9 +1421,9 @@ static int vlv_rps_rpe_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val, rpe;
- val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
- val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_NC, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
return rpe;
@@ -1411,7 +1434,7 @@ static int vlv_rps_min_freq(struct intel_rps *rps)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 val;
- val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_LFM) & 0xff;
/*
* According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
* for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
@@ -1447,15 +1470,15 @@ static bool vlv_rps_enable(struct intel_rps *rps)
/* WaGsvRC0ResidencyMethod:vlv */
rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
- vlv_punit_get(i915);
+ vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
/* Setting Fixed Bias */
val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
- vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+ vlv_iosf_sb_write(&i915->drm, VLV_IOSF_SB_PUNIT, VLV_TURBO_SOC_OVERRIDE, val);
- val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ val = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS);
- vlv_punit_put(i915);
+ vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
/* RPS code assumes GPLL is used */
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
@@ -1555,10 +1578,12 @@ void intel_rps_enable(struct intel_rps *rps)
return;
GT_TRACE(rps_to_gt(rps),
- "min:%x, max:%x, freq:[%d, %d]\n",
+ "min:%x, max:%x, freq:[%d, %d], thresholds:[%u, %u]\n",
rps->min_freq, rps->max_freq,
intel_gpu_freq(rps, rps->min_freq),
- intel_gpu_freq(rps, rps->max_freq));
+ intel_gpu_freq(rps, rps->max_freq),
+ rps->power.up_threshold,
+ rps->power.down_threshold);
GEM_BUG_ON(rps->max_freq < rps->min_freq);
GEM_BUG_ON(rps->idle_freq > rps->max_freq);
@@ -1665,10 +1690,7 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- rps->gpll_ref_freq =
- vlv_get_cck_clock(i915, "GPLL ref",
- CCK_GPLL_CLOCK_CONTROL,
- i915->czclk_freq);
+ rps->gpll_ref_freq = vlv_clock_get_gpll(&i915->drm);
drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
rps->gpll_ref_freq);
@@ -1677,30 +1699,14 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
static void vlv_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- u32 val;
- vlv_iosf_sb_get(i915,
+ vlv_init_gpll_ref_freq(rps);
+
+ vlv_iosf_sb_get(&i915->drm,
BIT(VLV_IOSF_SB_PUNIT) |
BIT(VLV_IOSF_SB_NC) |
BIT(VLV_IOSF_SB_CCK));
- vlv_init_gpll_ref_freq(rps);
-
- val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
- switch ((val >> 6) & 3) {
- case 0:
- case 1:
- i915->mem_freq = 800;
- break;
- case 2:
- i915->mem_freq = 1066;
- break;
- case 3:
- i915->mem_freq = 1333;
- break;
- }
- drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
-
rps->max_freq = vlv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
@@ -1718,7 +1724,7 @@ static void vlv_rps_init(struct intel_rps *rps)
drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
- vlv_iosf_sb_put(i915,
+ vlv_iosf_sb_put(&i915->drm,
BIT(VLV_IOSF_SB_PUNIT) |
BIT(VLV_IOSF_SB_NC) |
BIT(VLV_IOSF_SB_CCK));
@@ -1727,27 +1733,14 @@ static void vlv_rps_init(struct intel_rps *rps)
static void chv_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- u32 val;
- vlv_iosf_sb_get(i915,
+ vlv_init_gpll_ref_freq(rps);
+
+ vlv_iosf_sb_get(&i915->drm,
BIT(VLV_IOSF_SB_PUNIT) |
BIT(VLV_IOSF_SB_NC) |
BIT(VLV_IOSF_SB_CCK));
- vlv_init_gpll_ref_freq(rps);
-
- val = vlv_cck_read(i915, CCK_FUSE_REG);
-
- switch ((val >> 2) & 0x7) {
- case 3:
- i915->mem_freq = 2000;
- break;
- default:
- i915->mem_freq = 1600;
- break;
- }
- drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
-
rps->max_freq = chv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
@@ -1765,7 +1758,7 @@ static void chv_rps_init(struct intel_rps *rps)
drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
- vlv_iosf_sb_put(i915,
+ vlv_iosf_sb_put(&i915->drm,
BIT(VLV_IOSF_SB_PUNIT) |
BIT(VLV_IOSF_SB_NC) |
BIT(VLV_IOSF_SB_CCK));
@@ -1784,6 +1777,7 @@ static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
const struct intel_rps_ei *prev = &rps->ei;
struct intel_rps_ei now;
@@ -1800,7 +1794,7 @@ static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
time = ktime_us_delta(now.ktime, prev->ktime);
- time *= rps_to_i915(rps)->czclk_freq;
+ time *= vlv_clock_get_czclk(&i915->drm);
/* Workload can be split between render + media,
* e.g. SwapBuffers being blitted in X after being rendered in
@@ -1927,7 +1921,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
gen6_gt_pm_mask_irq(gt, events);
rps->pm_iir |= events;
- schedule_work(&rps->work);
+ queue_work(gt->i915->unordered_wq, &rps->work);
}
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
@@ -1944,7 +1938,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
gen6_gt_pm_mask_irq(gt, events);
rps->pm_iir |= events;
- schedule_work(&rps->work);
+ queue_work(gt->i915->unordered_wq, &rps->work);
spin_unlock(gt->irq_lock);
}
@@ -2040,6 +2034,12 @@ void intel_rps_init(struct intel_rps *rps)
}
}
+ /* Set default thresholds in % */
+ rps->power.up_threshold = 95;
+ rps_to_gt(rps)->defaults.rps_up_threshold = rps->power.up_threshold;
+ rps->power.down_threshold = 85;
+ rps_to_gt(rps)->defaults.rps_down_threshold = rps->power.down_threshold;
+
/* Finally allow us to boost to max by default */
rps->boost_freq = rps->max_freq;
rps->idle_freq = rps->min_freq;
@@ -2075,16 +2075,6 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
}
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
-{
- struct drm_i915_private *i915 = rps_to_i915(rps);
- i915_reg_t rpstat;
-
- rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
-
- return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
-}
-
u32 intel_rps_read_rpstat(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -2095,7 +2085,7 @@ u32 intel_rps_read_rpstat(struct intel_rps *rps)
return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
}
-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
+static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
@@ -2118,10 +2108,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
return cagf;
}
-static u32 read_cagf(struct intel_rps *rps)
+static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+ i915_reg_t r = INVALID_MMIO_REG;
u32 freq;
/*
@@ -2129,22 +2120,30 @@ static u32 read_cagf(struct intel_rps *rps)
* registers will return 0 freq when GT is in RC6
*/
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
- freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+ r = MTL_MIRROR_TARGET_WP1;
} else if (GRAPHICS_VER(i915) >= 12) {
- freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+ r = GEN12_RPSTAT1;
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
- vlv_punit_get(i915);
- freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
- vlv_punit_put(i915);
+ vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
+ freq = vlv_iosf_sb_read(&i915->drm, VLV_IOSF_SB_PUNIT, PUNIT_REG_GPU_FREQ_STS);
+ vlv_iosf_sb_put(&i915->drm, BIT(VLV_IOSF_SB_PUNIT));
} else if (GRAPHICS_VER(i915) >= 6) {
- freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
+ r = GEN6_RPSTAT1;
} else {
- freq = intel_uncore_read(uncore, MEMSTAT_ILK);
+ r = MEMSTAT_ILK;
}
+ if (i915_mmio_reg_valid(r))
+ freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r);
+
return intel_rps_get_cagf(rps, freq);
}
+static u32 read_cagf(struct intel_rps *rps)
+{
+ return __read_cagf(rps, true);
+}
+
u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
{
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2157,7 +2156,12 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
return freq;
}
-u32 intel_rps_read_punit_req(struct intel_rps *rps)
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
+{
+ return intel_gpu_freq(rps, __read_cagf(rps, false));
+}
+
+static u32 intel_rps_read_punit_req(struct intel_rps *rps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2590,6 +2594,58 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
return set_min_freq(rps, val);
}
+u8 intel_rps_get_up_threshold(struct intel_rps *rps)
+{
+ return rps->power.up_threshold;
+}
+
+static int rps_set_threshold(struct intel_rps *rps, u8 *threshold, u8 val)
+{
+ int ret;
+
+ if (val > 100)
+ return -EINVAL;
+
+ ret = mutex_lock_interruptible(&rps->lock);
+ if (ret)
+ return ret;
+
+ if (*threshold == val)
+ goto out_unlock;
+
+ *threshold = val;
+
+ /* Force reset. */
+ rps->last_freq = -1;
+ mutex_lock(&rps->power.mutex);
+ rps->power.mode = -1;
+ mutex_unlock(&rps->power.mutex);
+
+ intel_rps_set(rps, clamp(rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit));
+
+out_unlock:
+ mutex_unlock(&rps->lock);
+
+ return ret;
+}
+
+int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold)
+{
+ return rps_set_threshold(rps, &rps->power.up_threshold, threshold);
+}
+
+u8 intel_rps_get_down_threshold(struct intel_rps *rps)
+{
+ return rps->power.down_threshold;
+}
+
+int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold)
+{
+ return rps_set_threshold(rps, &rps->power.down_threshold, threshold);
+}
+
static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
@@ -2671,7 +2727,7 @@ bool rps_read_mask_mmio(struct intel_rps *rps,
static struct drm_i915_private __rcu *ips_mchdev;
-/**
+/*
* Tells the intel_ips driver that the i915 driver is now loaded, if
* IPS got loaded first.
*
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index c622962c6bef..92fb01f5a452 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -37,8 +37,12 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
int intel_gpu_freq(struct intel_rps *rps, int val);
int intel_freq_opcode(struct intel_rps *rps, int val);
-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
+u8 intel_rps_get_up_threshold(struct intel_rps *rps);
+int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold);
+u8 intel_rps_get_down_threshold(struct intel_rps *rps);
+int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold);
u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
u32 intel_rps_get_min_frequency(struct intel_rps *rps);
u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
@@ -49,10 +53,8 @@ int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val);
u32 intel_rps_get_rp0_frequency(struct intel_rps *rps);
u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
-u32 intel_rps_read_punit_req(struct intel_rps *rps);
u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
u32 intel_rps_read_rpstat(struct intel_rps *rps);
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps);
void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps);
void intel_rps_raise_unslice(struct intel_rps *rps);
void intel_rps_lower_unslice(struct intel_rps *rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
index 9173ec75f2b8..ece445109305 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -40,7 +40,7 @@ enum {
/**
* struct intel_rps_freq_caps - rps freq capabilities
* @rp0_freq: non-overclocked max frequency
- * @rp1_freq: "less than" RP0 power/freqency
+ * @rp1_freq: "less than" RP0 power/frequency
* @min_freq: aka RPn, minimum frequency
*
* Freq caps exposed by HW, values are in "hw units" and intel_gpu_freq()
@@ -57,7 +57,7 @@ struct intel_rps {
/*
* work, interrupts_enabled and pm_iir are protected by
- * dev_priv->irq_lock
+ * gt->irq_lock
*/
struct timer_list timer;
struct work_struct work;
@@ -90,7 +90,7 @@ struct intel_rps {
u8 boost_freq; /* Frequency to request when wait boosting */
u8 idle_freq; /* Frequency to request when we are idle */
u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
- u8 rp1_freq; /* "less than" RP0 power/freqency */
+ u8 rp1_freq; /* "less than" RP0 power/frequency */
u8 rp0_freq; /* Non-overclocked max frequency. */
u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */
diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c
index e8f3d18c12b8..fb260d1ec360 100644
--- a/drivers/gpu/drm/i915/gt/intel_sa_media.c
+++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c
@@ -4,6 +4,7 @@
*/
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include "i915_drv.h"
#include "gt/intel_gt.h"
@@ -27,9 +28,9 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr,
/*
* Standalone media shares the general MMIO space with the primary
- * GT. We'll re-use the primary GT's mapping.
+ * GT. We'll reuse the primary GT's mapping.
*/
- uncore->regs = i915->uncore.regs;
+ uncore->regs = intel_uncore_regs(&i915->uncore);
if (drm_WARN_ON(&i915->drm, uncore->regs == NULL))
return -EIO;
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6c6198a257ac..656a499b2706 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -5,7 +5,10 @@
#include <linux/string_helpers.h>
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
+#include "i915_perf_types.h"
#include "intel_engine_regs.h"
#include "intel_gt_regs.h"
#include "intel_sseu.h"
@@ -213,13 +216,8 @@ static void xehp_sseu_info_init(struct intel_gt *gt)
int num_compute_regs, num_geometry_regs;
int eu;
- if (IS_PONTEVECCHIO(gt->i915)) {
- num_geometry_regs = 0;
- num_compute_regs = 2;
- } else {
- num_geometry_regs = 1;
- num_compute_regs = 1;
- }
+ num_geometry_regs = 1;
+ num_compute_regs = 1;
/*
* The concept of slice has been removed in Xe_HP. To be compatible
@@ -240,7 +238,8 @@ static void xehp_sseu_info_init(struct intel_gt *gt)
GEN12_GT_COMPUTE_DSS_ENABLE,
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
- eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
+ eu_en_fuse = REG_FIELD_GET(XEHP_EU_ENA_MASK,
+ intel_uncore_read(uncore, XEHP_EU_ENABLE));
if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915))
eu_en = eu_en_fuse;
@@ -273,15 +272,15 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
* Although gen12 architecture supported multiple slices, TGL, RKL,
* DG1, and ADL only had a single slice.
*/
- s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
- GEN11_GT_S_ENA_MASK;
+ s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK,
+ intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE));
drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
/* one bit per pair of EUs */
- eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
- GEN11_EU_DIS_MASK);
+ eu_en_fuse = ~REG_FIELD_GET(GEN11_EU_DIS_MASK,
+ intel_uncore_read(uncore, GEN11_EU_DISABLE));
for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
if (eu_en_fuse & BIT(eu))
@@ -301,7 +300,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt)
u8 eu_en;
u8 s_en;
- if (IS_JSL_EHL(gt->i915))
+ if (IS_JASPERLAKE(gt->i915) || IS_ELKHARTLAKE(gt->i915))
intel_sseu_set_info(sseu, 1, 4, 8);
else
intel_sseu_set_info(sseu, 1, 8, 8);
@@ -310,14 +309,14 @@ static void gen11_sseu_info_init(struct intel_gt *gt)
* Although gen11 architecture supported multiple slices, ICL and
* EHL/JSL only had a single slice in practice.
*/
- s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
- GEN11_GT_S_ENA_MASK;
+ s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK,
+ intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE));
drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
- eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
- GEN11_EU_DIS_MASK);
+ eu_en = ~REG_FIELD_GET(GEN11_EU_DIS_MASK,
+ intel_uncore_read(uncore, GEN11_EU_DISABLE));
gen11_compute_sseu_info(sseu, ss_en, eu_en);
@@ -339,10 +338,8 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
if (!(fuse & CHV_FGT_DISABLE_SS0)) {
u8 disabled_mask =
- ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
- CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
- (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
- CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
+ REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R0_MASK, fuse) |
+ REG_FIELD_GET(CHV_FGT_EU_DIS_SS0_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS0_R0_MASK);
sseu->subslice_mask.hsw[0] |= BIT(0);
sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF);
@@ -350,10 +347,8 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
if (!(fuse & CHV_FGT_DISABLE_SS1)) {
u8 disabled_mask =
- ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
- CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
- (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
- CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
+ REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R0_MASK, fuse) |
+ REG_FIELD_GET(CHV_FGT_EU_DIS_SS1_R1_MASK, fuse) << hweight32(CHV_FGT_EU_DIS_SS1_R0_MASK);
sseu->subslice_mask.hsw[0] |= BIT(1);
sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF);
@@ -389,7 +384,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
int s, ss;
fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
- sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+ sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2);
/* BXT has a single slice and at most 3 subslices. */
intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
@@ -400,8 +395,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
* to each of the enabled slices.
*/
subslice_mask = (1 << sseu->max_subslices) - 1;
- subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
- GEN9_F2_SS_DIS_SHIFT);
+ subslice_mask &= ~REG_FIELD_GET(GEN9_F2_SS_DIS_MASK, fuse2);
/*
* Iterate through enabled slices and subslices to
@@ -494,7 +488,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
u32 eu_disable0, eu_disable1, eu_disable2;
fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
- sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+ sseu->slice_mask = REG_FIELD_GET(GEN8_F2_S_ENA_MASK, fuse2);
intel_sseu_set_info(sseu, 3, 3, 8);
/*
@@ -502,18 +496,18 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
* to each of the enabled slices.
*/
subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
- subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
- GEN8_F2_SS_DIS_SHIFT);
+ subslice_mask &= ~REG_FIELD_GET(GEN8_F2_SS_DIS_MASK, fuse2);
eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
- eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK;
- eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) |
- ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) <<
- (32 - GEN8_EU_DIS0_S1_SHIFT));
- eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) |
- ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) <<
- (32 - GEN8_EU_DIS1_S2_SHIFT));
+ eu_disable[0] =
+ REG_FIELD_GET(GEN8_EU_DIS0_S0_MASK, eu_disable0);
+ eu_disable[1] =
+ REG_FIELD_GET(GEN8_EU_DIS0_S1_MASK, eu_disable0) |
+ REG_FIELD_GET(GEN8_EU_DIS1_S1_MASK, eu_disable1) << hweight32(GEN8_EU_DIS0_S1_MASK);
+ eu_disable[2] =
+ REG_FIELD_GET(GEN8_EU_DIS1_S2_MASK, eu_disable1) |
+ REG_FIELD_GET(GEN8_EU_DIS2_S2_MASK, eu_disable2) << hweight32(GEN8_EU_DIS1_S2_MASK);
/*
* Iterate through enabled slices and subslices to
@@ -641,7 +635,7 @@ void intel_sseu_info_init(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
xehp_sseu_info_init(gt);
else if (GRAPHICS_VER(i915) >= 12)
gen12_sseu_info_init(gt);
@@ -677,7 +671,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
* If i915/perf is active, we want a stable powergating configuration
* on the system. Use the configuration pinned by i915/perf.
*/
- if (gt->perf.exclusive_stream)
+ if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
req_sseu = &gt->perf.sseu;
slices = hweight8(req_sseu->slice_mask);
@@ -691,7 +685,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
* According to documentation software must consider the configuration
* as 2x4x8 and hardware will translate this to 1x8x8.
*
- * Furthemore, even though SScount is three bits, maximum documented
+ * Furthermore, even though SScount is three bits, maximum documented
* value for it is four. From this some rules/restrictions follow:
*
* 1.
@@ -848,13 +842,12 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
const struct sseu_dev_info *sseu,
struct drm_printer *p)
{
- if (sseu->max_slices == 0) {
+ if (sseu->max_slices == 0)
drm_printf(p, "Unavailable\n");
- } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
sseu_print_xehp_topology(sseu, p);
- } else {
+ else
sseu_print_hsw_topology(sseu, p);
- }
}
void intel_sseu_print_ss_info(const char *type,
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index aa87d3832d60..d7e8c374f153 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -27,7 +27,7 @@ struct drm_printer;
* is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the
* I915_MAX_SS_FUSE_BITS value below).
*/
-#define GEN_MAX_SS_PER_HSW_SLICE 6
+#define GEN_MAX_SS_PER_HSW_SLICE 8
/*
* Maximum number of 32-bit registers used by hardware to express the
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
index c2ee5e1826b5..1dc8205bc64d 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
@@ -7,6 +7,8 @@
#include <linux/bitmap.h>
#include <linux/string_helpers.h>
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
#include "intel_gt_debugfs.h"
#include "intel_gt_regs.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index b9640212d659..843f72829a24 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -4,6 +4,7 @@
*/
#include <drm/drm_cache.h>
+#include <drm/drm_print.h>
#include "gem/i915_gem_internal.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index 57308c4d664a..85b43f9b9d95 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -9,6 +9,7 @@
#include <linux/lockdep.h>
#include "i915_active.h"
+#include "i915_list_util.h"
#include "i915_syncmap.h"
#include "intel_timeline_types.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
new file mode 100644
index 000000000000..2487768bc230
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_perf_oa_regs.h"
+#include "intel_engine_pm.h"
+#include "intel_gt.h"
+#include "intel_gt_mcr.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_print.h"
+#include "intel_gt_regs.h"
+#include "intel_tlb.h"
+#include "uc/intel_guc.h"
+
+/*
+ * HW architecture suggest typical invalidation time at 40us,
+ * with pessimistic cases up to 100us and a recommendation to
+ * cap at 1ms. We go a bit higher just in case.
+ */
+#define TLB_INVAL_TIMEOUT_US 100
+#define TLB_INVAL_TIMEOUT_MS 4
+
+/*
+ * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
+ * but are now considered MCR registers. Since they exist within a GAM range,
+ * the primary instance of the register rolls up the status from each unit.
+ */
+static int wait_for_invalidate(struct intel_engine_cs *engine)
+{
+ if (engine->tlb_inv.mcr)
+ return intel_gt_mcr_wait_for_reg(engine->gt,
+ engine->tlb_inv.reg.mcr_reg,
+ engine->tlb_inv.done,
+ 0,
+ TLB_INVAL_TIMEOUT_US,
+ TLB_INVAL_TIMEOUT_MS);
+ else
+ return __intel_wait_for_register_fw(engine->gt->uncore,
+ engine->tlb_inv.reg.reg,
+ engine->tlb_inv.done,
+ 0,
+ TLB_INVAL_TIMEOUT_US,
+ TLB_INVAL_TIMEOUT_MS,
+ NULL);
+}
+
+static void mmio_invalidate_full(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_engine_cs *engine;
+ intel_engine_mask_t awake, tmp;
+ enum intel_engine_id id;
+ unsigned long flags;
+
+ if (GRAPHICS_VER(i915) < 8)
+ return;
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ intel_gt_mcr_lock(gt, &flags);
+ spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
+
+ awake = 0;
+ for_each_engine(engine, gt, id) {
+ if (!intel_engine_pm_is_awake(engine))
+ continue;
+
+ if (engine->tlb_inv.mcr)
+ intel_gt_mcr_multicast_write_fw(gt,
+ engine->tlb_inv.reg.mcr_reg,
+ engine->tlb_inv.request);
+ else
+ intel_uncore_write_fw(uncore,
+ engine->tlb_inv.reg.reg,
+ engine->tlb_inv.request);
+
+ awake |= engine->mask;
+ }
+
+ GT_TRACE(gt, "invalidated engines %08x\n", awake);
+
+ /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
+ if (awake &&
+ (IS_TIGERLAKE(i915) ||
+ IS_DG1(i915) ||
+ IS_ROCKETLAKE(i915) ||
+ IS_ALDERLAKE_S(i915) ||
+ IS_ALDERLAKE_P(i915)))
+ intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
+
+ spin_unlock(&uncore->lock);
+ intel_gt_mcr_unlock(gt, flags);
+
+ for_each_engine_masked(engine, gt, awake, tmp) {
+ if (wait_for_invalidate(engine))
+ gt_err_ratelimited(gt,
+ "%s TLB invalidation did not complete in %ums!\n",
+ engine->name, TLB_INVAL_TIMEOUT_MS);
+ }
+
+ /*
+ * Use delayed put since a) we mostly expect a flurry of TLB
+ * invalidations so it is good to avoid paying the forcewake cost and
+ * b) it works around a bug in Icelake which cannot cope with too rapid
+ * transitions.
+ */
+ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+}
+
+static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
+{
+ u32 cur = intel_gt_tlb_seqno(gt);
+
+ /* Only skip if a *full* TLB invalidate barrier has passed */
+ return (s32)(cur - ALIGN(seqno, 2)) > 0;
+}
+
+void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
+{
+ intel_wakeref_t wakeref;
+
+ if (is_mock_gt(gt))
+ return;
+
+ if (intel_gt_is_wedged(gt))
+ return;
+
+ if (tlb_seqno_passed(gt, seqno))
+ return;
+
+ with_intel_gt_pm_if_awake(gt, wakeref) {
+ struct intel_guc *guc = gt_to_guc(gt);
+
+ mutex_lock(&gt->tlb.invalidate_lock);
+ if (tlb_seqno_passed(gt, seqno))
+ goto unlock;
+
+ if (HAS_GUC_TLB_INVALIDATION(gt->i915)) {
+ /*
+ * Only perform GuC TLB invalidation if GuC is ready.
+ * The only time GuC could not be ready is on GT reset,
+ * which would clobber all the TLBs anyways, making
+ * any TLB invalidation path here unnecessary.
+ */
+ if (intel_guc_is_ready(guc))
+ intel_guc_invalidate_tlb_engines(guc);
+ } else {
+ mmio_invalidate_full(gt);
+ }
+
+ write_seqcount_invalidate(&gt->tlb.seqno);
+unlock:
+ mutex_unlock(&gt->tlb.invalidate_lock);
+ }
+}
+
+void intel_gt_init_tlb(struct intel_gt *gt)
+{
+ mutex_init(&gt->tlb.invalidate_lock);
+ seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
+}
+
+void intel_gt_fini_tlb(struct intel_gt *gt)
+{
+ mutex_destroy(&gt->tlb.invalidate_lock);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_tlb.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h b/drivers/gpu/drm/i915/gt/intel_tlb.h
new file mode 100644
index 000000000000..ec7612216248
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef INTEL_TLB_H
+#define INTEL_TLB_H
+
+#include <linux/seqlock.h>
+#include <linux/types.h>
+
+#include "intel_gt_types.h"
+
+void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno);
+
+void intel_gt_init_tlb(struct intel_gt *gt);
+void intel_gt_fini_tlb(struct intel_gt *gt);
+
+static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
+{
+ return raw_read_seqcount(&gt->tlb.seqno);
+}
+
+static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
+{
+ return intel_gt_tlb_seqno(gt) | 1;
+}
+
+#endif /* INTEL_TLB_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_wopcm.c b/drivers/gpu/drm/i915/gt/intel_wopcm.c
index 7ebbcc191c2d..1b26ff6488b3 100644
--- a/drivers/gpu/drm/i915/gt/intel_wopcm.c
+++ b/drivers/gpu/drm/i915/gt/intel_wopcm.c
@@ -3,6 +3,8 @@
* Copyright © 2017-2019 Intel Corporation
*/
+#include <drm/drm_print.h>
+
#include "intel_wopcm.h"
#include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_wopcm.h b/drivers/gpu/drm/i915/gt/intel_wopcm.h
index 17d6aa86008a..d2038b6de5e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_wopcm.h
+++ b/drivers/gpu/drm/i915/gt/intel_wopcm.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
/*
- * SPDX-License-Identifier: MIT
- *
* Copyright © 2017-2018 Intel Corporation
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index c1b52a741698..ece88c612e27 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -5,16 +5,21 @@
#include "i915_drv.h"
#include "i915_reg.h"
+#include "i915_mmio_range.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
#include "intel_gt_mcr.h"
+#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_ring.h"
#include "intel_workarounds.h"
+#include "display/intel_fbc_regs.h"
+
/**
* DOC: Hardware workarounds
*
@@ -50,7 +55,8 @@
* registers belonging to BCS, VCS or VECS should be implemented in
* xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
* engine's MMIO range but that are part of of the common RCS/CCS reset domain
- * should be implemented in general_render_compute_wa_init().
+ * should be implemented in general_render_compute_wa_init(). The settings
+ * about the CCS load balancing should be added in ccs_engine_wa_mode().
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -106,9 +112,8 @@ static void wa_init_finish(struct i915_wa_list *wal)
{
/* Trim unused entries. */
if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
- struct i915_wa *list = kmemdup(wal->list,
- wal->count * sizeof(*list),
- GFP_KERNEL);
+ struct i915_wa *list = kmemdup_array(wal->list, wal->count,
+ sizeof(*list), GFP_KERNEL);
if (list) {
kfree(wal->list);
@@ -119,8 +124,24 @@ static void wa_init_finish(struct i915_wa_list *wal)
if (!wal->count)
return;
- drm_dbg(&wal->gt->i915->drm, "Initialized %u %s workarounds on %s\n",
- wal->wa_count, wal->name, wal->engine_name);
+ gt_dbg(wal->gt, "Initialized %u %s workarounds on %s\n",
+ wal->wa_count, wal->name, wal->engine_name);
+}
+
+static enum forcewake_domains
+wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
+{
+ enum forcewake_domains fw = 0;
+ struct i915_wa *wa;
+ unsigned int i;
+
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+ fw |= intel_uncore_forcewake_for_reg(uncore,
+ wa->reg,
+ FW_REG_READ |
+ FW_REG_WRITE);
+
+ return fw;
}
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
@@ -136,7 +157,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
struct i915_wa *list;
- list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
+ list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*list),
GFP_KERNEL);
if (!list) {
drm_err(&i915->drm, "No space for workaround init!\n");
@@ -225,13 +246,13 @@ static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
static void
wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
{
- wa_add(wal, reg, clear, set, clear, false);
+ wa_add(wal, reg, clear, set, clear | set, false);
}
static void
wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
{
- wa_mcr_add(wal, reg, clear, set, clear, false);
+ wa_mcr_add(wal, reg, clear, set, clear | set, false);
}
static void
@@ -241,12 +262,6 @@ wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
}
static void
-wa_mcr_write(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
-{
- wa_mcr_write_clr_set(wal, reg, ~0, set);
-}
-
-static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{
wa_write_clr_set(wal, reg, set, set);
@@ -323,12 +338,26 @@ static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
+
+ /* WaDisable_RenderCache_OperationalFlush:snb */
+ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
}
static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
+ /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
+
+ /*
+ * BSpec says this must be set, even though
+ * WaDisable4x2SubspanOptimization:ivb,hsw
+ * WaDisable4x2SubspanOptimization isn't listed for VLV.
+ */
+ wa_masked_en(wal,
+ CACHE_MODE_1,
+ PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
}
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -404,7 +433,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
/* WaForceContextSaveRestoreNonCoherent:bdw */
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
/* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
- (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
+ (INTEL_INFO(i915)->gt == 3 ? HDC_FENCE_DEST_SLM_DISABLE : 0));
}
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -584,7 +613,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
gen9_ctx_workarounds_init(engine, wal);
/* WaToEnableHwFixForPushConstHWBug:kbl */
- if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
+ if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
@@ -620,11 +649,10 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
+ struct drm_i915_private *i915 = engine->i915;
+
/* Wa_1406697149 (WaDisableBankHangMode:icl) */
- wa_write(wal,
- GEN8_L3CNTLREG,
- intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
- GEN8_ERRDETBCTRL);
+ wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL);
/* WaForceEnableNonCoherent:icl
* This is not the same workaround as in early Gen9 platforms, where
@@ -653,11 +681,20 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_1604278689:icl,ehl */
wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
- 0, /* write-only register; skip validation */
+ 0,
0xFFFFFFFF);
/* Wa_1406306137:icl,ehl */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
+
+ if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) {
+ /*
+ * Disable Repacking for Compression (masked R/W access)
+ * before rendering compressed surfaces for display.
+ */
+ wa_masked_en(wal, CACHE_MODE_0_GEN7,
+ DISABLE_REPACKING_FOR_COMPRESSION);
+ }
}
/*
@@ -670,38 +707,8 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
- wa_mcr_add(wal,
- XEHP_FF_MODE2,
- FF_MODE2_TDS_TIMER_MASK,
- FF_MODE2_TDS_TIMER_128,
- 0, false);
-}
-
-/*
- * These settings aren't actually workarounds, but general tuning settings that
- * need to be programmed on several platforms.
- */
-static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
-{
- /*
- * Although some platforms refer to it as Wa_1604555607, we need to
- * program it even on those that don't explicitly list that
- * workaround.
- *
- * Note that the programming of this register is further modified
- * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
- * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
- * value when read. The default value for this register is zero for all
- * fields and there are no bit masks. So instead of doing a RMW we
- * should just write TDS timer value. For the same reason read
- * verification is ignored.
- */
- wa_add(wal,
- GEN12_FF_MODE2,
- FF_MODE2_TDS_TIMER_MASK,
- FF_MODE2_TDS_TIMER_128,
- 0, false);
+ wa_mcr_write_clr_set(wal, XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_TDS_TIMER_128);
}
static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -709,19 +716,18 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
{
struct drm_i915_private *i915 = engine->i915;
- gen12_ctx_gt_tuning_init(engine, wal);
-
/*
- * Wa_1409142259:tgl,dg1,adl-p
+ * Wa_1409142259:tgl,dg1,adl-p,adl-n
* Wa_1409347922:tgl,dg1,adl-p
* Wa_1409252684:tgl,dg1,adl-p
* Wa_1409217633:tgl,dg1,adl-p
* Wa_1409207793:tgl,dg1,adl-p
- * Wa_1409178076:tgl,dg1,adl-p
- * Wa_1408979724:tgl,dg1,adl-p
- * Wa_14010443199:tgl,rkl,dg1,adl-p
- * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
- * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
+ * Wa_1409178076:tgl,dg1,adl-p,adl-n
+ * Wa_1408979724:tgl,dg1,adl-p,adl-n
+ * Wa_14010443199:tgl,rkl,dg1,adl-p,adl-n
+ * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p,adl-n
+ * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p,adl-n
+ * Wa_22010465259:tgl,rkl,dg1,adl-s,adl-p,adl-n
*/
wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
@@ -732,20 +738,42 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
/*
- * Wa_16011163337
+ * Wa_16011163337 - GS_TIMER
*
- * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
- * to Wa_1608008084.
+ * TDS_TIMER: Although some platforms refer to it as Wa_1604555607, we
+ * need to program it even on those that don't explicitly list that
+ * workaround.
+ *
+ * Note that the programming of GEN12_FF_MODE2 is further modified
+ * according to the FF_MODE2 guidance given by Wa_1608008084.
+ * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
+ * value when read from the CPU.
+ *
+ * The default value for this register is zero for all fields.
+ * So instead of doing a RMW we should just write the desired values
+ * for TDS and GS timers. Note that since the readback can't be trusted,
+ * the clear mask is just set to ~0 to make sure other bits are not
+ * inadvertently set. For the same reason read verification is ignored.
*/
wa_add(wal,
GEN12_FF_MODE2,
- FF_MODE2_GS_TIMER_MASK,
- FF_MODE2_GS_TIMER_224,
+ ~0,
+ FF_MODE2_TDS_TIMER_128 | FF_MODE2_GS_TIMER_224,
0, false);
- if (!IS_DG1(i915))
+ if (!IS_DG1(i915)) {
/* Wa_1806527549 */
wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
+
+ /* Wa_1606376872 */
+ wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC);
+ }
+
+ /*
+ * This bit must be set to enable performance optimization for fast
+ * clears.
+ */
+ wa_mcr_write_or(wal, GEN8_WM_CHICKEN2, WAIT_ON_DEPTH_STALL_DONE_DISABLE);
}
static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -767,54 +795,49 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
{
dg2_ctx_gt_tuning_init(engine, wal);
- /* Wa_16011186671:dg2_g11 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
- wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
- wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010469329:dg2_g10 */
- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
- XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
-
- /*
- * Wa_22010465075:dg2_g10
- * Wa_22010613112:dg2_g10
- * Wa_14010698770:dg2_g10
- */
- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
- GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
- }
-
/* Wa_16013271637:dg2 */
wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
/* Wa_14014947963:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
- wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
+ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
/* Wa_18018764978:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
- wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
-
- /* Wa_15010599737:dg2 */
- wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
+ wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
/* Wa_18019271663:dg2 */
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
+
+ /* Wa_14019877138:dg2 */
+ wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
}
-static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
+static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
+ dg2_ctx_gt_tuning_init(engine, wal);
+
+ /*
+ * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
+ * gen12_emit_indirect_ctx_rcs() rather than here on some early
+ * steppings.
+ */
+ if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
+ wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
+}
+
+static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ struct intel_gt *gt = engine->gt;
+
+ xelpg_ctx_gt_tuning_init(engine, wal);
+
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_14014947963 */
wa_masked_field_set(wal, VF_PREEMPTION,
PREEMPTION_VERTEX_COUNT, 0x4000);
@@ -832,6 +855,9 @@ static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_18019271663 */
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
+
+ /* Wa_14019877138 */
+ wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
}
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
@@ -920,14 +946,10 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
goto done;
- if (IS_METEORLAKE(i915))
- mtl_ctx_workarounds_init(engine, wal);
- else if (IS_PONTEVECCHIO(i915))
- ; /* noop; none at this time */
+ if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
+ xelpg_ctx_workarounds_init(engine, wal);
else if (IS_DG2(i915))
dg2_ctx_workarounds_init(engine, wal);
- else if (IS_XEHPSDV(i915))
- ; /* noop; none at this time */
else if (IS_DG1(i915))
dg1_ctx_workarounds_init(engine, wal);
else if (GRAPHICS_VER(i915) == 12)
@@ -969,6 +991,9 @@ void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
int intel_engine_emit_ctx_wa(struct i915_request *rq)
{
struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
+ struct intel_uncore *uncore = rq->engine->uncore;
+ enum forcewake_domains fw;
+ unsigned long flags;
struct i915_wa *wa;
unsigned int i;
u32 *cs;
@@ -981,17 +1006,54 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (ret)
return ret;
- cs = intel_ring_begin(rq, (wal->count * 2 + 2));
+ if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
+ IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS)
+ cs = intel_ring_begin(rq, (wal->count * 2 + 6));
+ else
+ cs = intel_ring_begin(rq, (wal->count * 2 + 2));
+
if (IS_ERR(cs))
return PTR_ERR(cs);
+ fw = wal_get_fw_for_rmw(uncore, wal);
+
+ intel_gt_mcr_lock(wal->gt, &flags);
+ spin_lock(&uncore->lock);
+ intel_uncore_forcewake_get__locked(uncore, fw);
+
*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+ u32 val;
+
+ /* Skip reading the register if it's not really needed */
+ if (wa->masked_reg || (wa->clr | wa->set) == U32_MAX) {
+ val = wa->set;
+ } else {
+ val = wa->is_mcr ?
+ intel_gt_mcr_read_any_fw(wal->gt, wa->mcr_reg) :
+ intel_uncore_read_fw(uncore, wa->reg);
+ val &= ~wa->clr;
+ val |= wa->set;
+ }
+
*cs++ = i915_mmio_reg_offset(wa->reg);
- *cs++ = wa->set;
+ *cs++ = val;
}
*cs++ = MI_NOOP;
+ /* Wa_14019789679 */
+ if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
+ IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) {
+ *cs++ = CMD_3DSTATE_MESH_CONTROL;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+ }
+
+ intel_uncore_forcewake_put__locked(uncore, fw);
+ spin_unlock(&uncore->lock);
+ intel_gt_mcr_unlock(wal->gt, flags);
+
intel_ring_advance(rq, cs);
ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
@@ -1155,7 +1217,7 @@ skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
/* WaInPlaceDecompressionHang:skl */
- if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
+ if (IS_SKYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
wa_write_or(wal,
GEN9_GAMT_ECO_REG_RW_IA,
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
@@ -1167,7 +1229,7 @@ kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
gen9_gt_workarounds_init(gt, wal);
/* WaDisableDynamicCreditSharing:kbl */
- if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
+ if (IS_KABYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
wa_write_or(wal,
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
@@ -1219,7 +1281,8 @@ static void __set_mcr_steering(struct i915_wa_list *wal,
static void debug_dump_steering(struct intel_gt *gt)
{
- struct drm_printer p = drm_debug_printer("MCR Steering:");
+ struct drm_printer p = drm_dbg_printer(&gt->i915->drm, DRM_UT_DRIVER,
+ "MCR Steering:");
if (drm_debug_enabled(DRM_UT_DRIVER))
intel_gt_mcr_report_steering(&p, gt, false);
@@ -1288,7 +1351,7 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
* We'll do our default/implicit steering based on GSLICE (in the
* sliceid field) and DSS (in the subsliceid field). If we can
* find overlap between the valid MSLICE and/or LNCF values with
- * a suitable GSLICE, then we can just re-use the default value and
+ * a suitable GSLICE, then we can just reuse the default value and
* skip and explicit steering at runtime.
*
* We only need to look for overlap between GSLICE/MSLICE/LNCF to find
@@ -1327,9 +1390,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
gt->steering_table[MSLICE] = NULL;
}
- if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
- gt->steering_table[GAM] = NULL;
-
slice = __ffs(slice_mask);
subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
GEN_DSS_PER_GSLICE;
@@ -1357,20 +1417,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
}
static void
-pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
-{
- unsigned int dss;
-
- /*
- * Setup implicit steering for COMPUTE and DSS ranges to the first
- * non-fused-off DSS. All other types of MCR registers will be
- * explicitly steered.
- */
- dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
- __add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
-}
-
-static void
icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = gt->i915;
@@ -1405,6 +1451,13 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
+ /*
+ * Wa_1408615072:icl,ehl (vsunit)
+ * Wa_1407596294:icl,ehl (hsunit)
+ */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+ VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
+
/* Wa_1407352427:icl,ehl */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
PSDUNIT_CLKGATE_DIS);
@@ -1416,7 +1469,8 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_1607087056:icl,ehl,jsl */
if (IS_ICELAKE(i915) ||
- IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
+ ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) &&
+ IS_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)))
wa_write_or(wal,
GEN11_SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
@@ -1460,148 +1514,42 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
-}
-
-static void
-tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
-{
- struct drm_i915_private *i915 = gt->i915;
-
- gen12_gt_workarounds_init(gt, wal);
-
- /* Wa_1409420604:tgl */
- if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
- wa_mcr_write_or(wal,
- SUBSLICE_UNIT_LEVEL_CLKGATE2,
- CPSSUNIT_CLKGATE_DIS);
-
- /* Wa_1607087056:tgl also know as BUG:1409180338 */
- if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
- wa_write_or(wal,
- GEN11_SLICE_UNIT_LEVEL_CLKGATE,
- L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
- /* Wa_1408615072:tgl[a0] */
- if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
- VSUNIT_CLKGATE_DIS_TGL);
+ /*
+ * Wa_14015795083
+ *
+ * Firmware on some gen12 platforms locks the MISCCPCTL register,
+ * preventing i915 from modifying it for this workaround. Skip the
+ * readback verification for this workaround on debug builds; if the
+ * workaround doesn't stick due to firmware behavior, it's not an error
+ * that we want CI to flag.
+ */
+ wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
+ 0, 0, false);
}
static void
dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = gt->i915;
-
gen12_gt_workarounds_init(gt, wal);
- /* Wa_1607087056:dg1 */
- if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
- wa_write_or(wal,
- GEN11_SLICE_UNIT_LEVEL_CLKGATE,
- L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
-
/* Wa_1409420604:dg1 */
- if (IS_DG1(i915))
- wa_mcr_write_or(wal,
- SUBSLICE_UNIT_LEVEL_CLKGATE2,
- CPSSUNIT_CLKGATE_DIS);
+ wa_mcr_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2,
+ CPSSUNIT_CLKGATE_DIS);
/* Wa_1408615072:dg1 */
/* Empirical testing shows this register is unaffected by engine reset. */
- if (IS_DG1(i915))
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
- VSUNIT_CLKGATE_DIS_TGL);
-}
-
-static void
-xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
-{
- struct drm_i915_private *i915 = gt->i915;
-
- xehp_init_mcr(gt, wal);
-
- /* Wa_1409757795:xehpsdv */
- wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
-
- /* Wa_16011155590:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
- TSGUNIT_CLKGATE_DIS);
-
- /* Wa_14011780169:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
- wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
- GAMTLBVDBOX7_CLKGATE_DIS |
- GAMTLBVDBOX6_CLKGATE_DIS |
- GAMTLBVDBOX5_CLKGATE_DIS |
- GAMTLBVDBOX4_CLKGATE_DIS |
- GAMTLBVDBOX3_CLKGATE_DIS |
- GAMTLBVDBOX2_CLKGATE_DIS |
- GAMTLBVDBOX1_CLKGATE_DIS |
- GAMTLBVDBOX0_CLKGATE_DIS |
- GAMTLBKCR_CLKGATE_DIS |
- GAMTLBGUC_CLKGATE_DIS |
- GAMTLBBLT_CLKGATE_DIS);
- wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
- GAMTLBGFXA1_CLKGATE_DIS |
- GAMTLBCOMPA0_CLKGATE_DIS |
- GAMTLBCOMPA1_CLKGATE_DIS |
- GAMTLBCOMPB0_CLKGATE_DIS |
- GAMTLBCOMPB1_CLKGATE_DIS |
- GAMTLBCOMPC0_CLKGATE_DIS |
- GAMTLBCOMPC1_CLKGATE_DIS |
- GAMTLBCOMPD0_CLKGATE_DIS |
- GAMTLBCOMPD1_CLKGATE_DIS |
- GAMTLBMERT_CLKGATE_DIS |
- GAMTLBVEBOX3_CLKGATE_DIS |
- GAMTLBVEBOX2_CLKGATE_DIS |
- GAMTLBVEBOX1_CLKGATE_DIS |
- GAMTLBVEBOX0_CLKGATE_DIS);
- }
-
- /* Wa_16012725990:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
-
- /* Wa_14011060649:xehpsdv */
- wa_14011060649(gt, wal);
-
- /* Wa_14012362059:xehpsdv */
- wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
-
- /* Wa_14014368820:xehpsdv */
- wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
- INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL);
}
static void
dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- struct intel_engine_cs *engine;
- int id;
-
xehp_init_mcr(gt, wal);
/* Wa_14011060649:dg2 */
wa_14011060649(gt, wal);
- /*
- * Although there are per-engine instances of these registers,
- * they technically exist outside the engine itself and are not
- * impacted by engine resets. Furthermore, they're part of the
- * GuC blacklist so trying to treat them as engine workarounds
- * will result in GuC initialization failure and a wedged GPU.
- */
- for_each_engine(engine, gt, id) {
- if (engine->class != VIDEO_DECODE_CLASS)
- continue;
-
- /* Wa_16010515920:dg2_g10 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
- wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
- ALNUNIT_CLKGATE_DIS);
- }
-
if (IS_DG2_G10(gt->i915)) {
/* Wa_22010523718:dg2 */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
@@ -1612,77 +1560,15 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
DSS_ROUTER_CLKGATE_DIS);
}
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012362059:dg2 */
- wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
- }
-
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010948348:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
-
- /* Wa_14011037102:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
-
- /* Wa_14011371254:dg2_g10 */
- wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
-
- /* Wa_14011431319:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
- GAMTLBVDBOX7_CLKGATE_DIS |
- GAMTLBVDBOX6_CLKGATE_DIS |
- GAMTLBVDBOX5_CLKGATE_DIS |
- GAMTLBVDBOX4_CLKGATE_DIS |
- GAMTLBVDBOX3_CLKGATE_DIS |
- GAMTLBVDBOX2_CLKGATE_DIS |
- GAMTLBVDBOX1_CLKGATE_DIS |
- GAMTLBVDBOX0_CLKGATE_DIS |
- GAMTLBKCR_CLKGATE_DIS |
- GAMTLBGUC_CLKGATE_DIS |
- GAMTLBBLT_CLKGATE_DIS);
- wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
- GAMTLBGFXA1_CLKGATE_DIS |
- GAMTLBCOMPA0_CLKGATE_DIS |
- GAMTLBCOMPA1_CLKGATE_DIS |
- GAMTLBCOMPB0_CLKGATE_DIS |
- GAMTLBCOMPB1_CLKGATE_DIS |
- GAMTLBCOMPC0_CLKGATE_DIS |
- GAMTLBCOMPC1_CLKGATE_DIS |
- GAMTLBCOMPD0_CLKGATE_DIS |
- GAMTLBCOMPD1_CLKGATE_DIS |
- GAMTLBMERT_CLKGATE_DIS |
- GAMTLBVEBOX3_CLKGATE_DIS |
- GAMTLBVEBOX2_CLKGATE_DIS |
- GAMTLBVEBOX1_CLKGATE_DIS |
- GAMTLBVEBOX0_CLKGATE_DIS);
-
- /* Wa_14010569222:dg2_g10 */
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
- GAMEDIA_CLKGATE_DIS);
-
- /* Wa_14011028019:dg2_g10 */
- wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
-
- /* Wa_14010680813:dg2_g10 */
- wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL,
- CONTROL_BLOCK_CLKGATE_DIS |
- EGRESS_BLOCK_CLKGATE_DIS |
- TAG_BLOCK_CLKGATE_DIS);
- }
-
/* Wa_14014830051:dg2 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
/*
- * The following are not actually "workarounds" but rather
- * recommended tuning settings documented in the bspec's
- * performance guide section.
+ * Wa_14015795083
+ * Skip verification for possibly locked register.
*/
- wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
-
- /* Wa_14015795083 */
- wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+ wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
+ 0, 0, false);
/* Wa_18018781329 */
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
@@ -1693,34 +1579,28 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_1509235366:dg2 */
wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
+
+ /* Wa_14010648519:dg2 */
+ wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
}
static void
-pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- pvc_init_mcr(gt, wal);
-
- /* Wa_14015795083 */
- wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
-
- /* Wa_18018781329 */
+ /* Wa_14018575942 / Wa_18018781329 */
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
- wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
- wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
-}
-static void
-xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
-{
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) {
+ /* Wa_22016670082 */
+ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
+
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_14014830051 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
- /* Wa_18018781329 */
- wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
- wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+ /* Wa_14015795083 */
+ wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
}
/*
@@ -1731,49 +1611,91 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
}
static void
+wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+ struct intel_engine_cs *engine;
+ int id;
+
+ for_each_engine(engine, gt, id)
+ if (engine->class == VIDEO_DECODE_CLASS)
+ wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base),
+ MFXPIPE_CLKGATE_DIS);
+}
+
+static void
xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0)) {
- /*
- * Wa_18018781329
- *
- * Note that although these registers are MCR on the primary
- * GT, the media GT's versions are regular singleton registers.
- */
- wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
- }
+ wa_16021867713(gt, wal);
+
+ /*
+ * Wa_14018778641
+ * Wa_18018781329
+ *
+ * Note that although these registers are MCR on the primary
+ * GT, the media GT's versions are regular singleton registers.
+ */
+ wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
+
+ /*
+ * Wa_14018575942
+ *
+ * Issue is seen on media KPI test running on VDBOX engine
+ * especially VP9 encoding WLs
+ */
+ wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+
+ /* Wa_22016670082 */
+ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
debug_dump_steering(gt);
}
+/*
+ * The bspec performance guide has recommended MMIO tuning settings. These
+ * aren't truly "workarounds" but we want to program them through the
+ * workaround infrastructure to make sure they're (re)applied at the proper
+ * times.
+ *
+ * The programming in this function is for settings that persist through
+ * engine resets and also are not part of any engine's register state context.
+ * I.e., settings that only need to be re-applied in the event of a full GT
+ * reset.
+ */
+static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
+ wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+ wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
+ }
+
+ if (IS_DG2(gt->i915)) {
+ wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+ wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
+ }
+}
+
static void
gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = gt->i915;
+ gt_tuning_settings(gt, wal);
+
if (gt->type == GT_MEDIA) {
- if (MEDIA_VER(i915) >= 13)
+ if (MEDIA_VER_FULL(i915) == IP_VER(13, 0))
xelpmp_gt_workarounds_init(gt, wal);
else
- MISSING_CASE(MEDIA_VER(i915));
+ MISSING_CASE(MEDIA_VER_FULL(i915));
return;
}
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_gt_workarounds_init(gt, wal);
- else if (IS_PONTEVECCHIO(i915))
- pvc_gt_workarounds_init(gt, wal);
else if (IS_DG2(i915))
dg2_gt_workarounds_init(gt, wal);
- else if (IS_XEHPSDV(i915))
- xehpsdv_gt_workarounds_init(gt, wal);
else if (IS_DG1(i915))
dg1_gt_workarounds_init(gt, wal);
- else if (IS_TIGERLAKE(i915))
- tgl_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 12)
gen12_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 11)
@@ -1817,31 +1739,15 @@ void intel_gt_init_workarounds(struct intel_gt *gt)
wa_init_finish(wal);
}
-static enum forcewake_domains
-wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
-{
- enum forcewake_domains fw = 0;
- struct i915_wa *wa;
- unsigned int i;
-
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- fw |= intel_uncore_forcewake_for_reg(uncore,
- wa->reg,
- FW_REG_READ |
- FW_REG_WRITE);
-
- return fw;
-}
-
static bool
wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
const char *name, const char *from)
{
if ((cur ^ wa->set) & wa->read) {
- drm_err(&gt->i915->drm,
- "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
- name, from, i915_mmio_reg_offset(wa->reg),
- cur, cur & wa->read, wa->set & wa->read);
+ gt_err(gt,
+ "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
+ name, from, i915_mmio_reg_offset(wa->reg),
+ cur, cur & wa->read, wa->set & wa->read);
return false;
}
@@ -2180,88 +2086,44 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
/* Wa_1806527549:tgl */
whitelist_reg(w, HIZ_CHICKEN);
+
+ /* Required by recommended tuning setting (not a workaround) */
+ whitelist_reg(w, GEN11_COMMON_SLICE_CHICKEN3);
+
break;
default:
break;
}
}
-static void dg1_whitelist_build(struct intel_engine_cs *engine)
-{
- struct i915_wa_list *w = &engine->whitelist;
-
- tgl_whitelist_build(engine);
-
- /* GEN:BUG:1409280441:dg1 */
- if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) &&
- (engine->class == RENDER_CLASS ||
- engine->class == COPY_ENGINE_CLASS))
- whitelist_reg_ext(w, RING_ID(engine->mmio_base),
- RING_FORCE_TO_NONPRIV_ACCESS_RD);
-}
-
-static void xehpsdv_whitelist_build(struct intel_engine_cs *engine)
-{
- allow_read_ctx_timestamp(engine);
-}
-
static void dg2_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
- allow_read_ctx_timestamp(engine);
-
switch (engine->class) {
case RENDER_CLASS:
- /*
- * Wa_1507100340:dg2_g10
- *
- * This covers 4 registers which are next to one another :
- * - PS_INVOCATION_COUNT
- * - PS_INVOCATION_COUNT_UDW
- * - PS_DEPTH_COUNT
- * - PS_DEPTH_COUNT_UDW
- */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
- whitelist_reg_ext(w, PS_INVOCATION_COUNT,
- RING_FORCE_TO_NONPRIV_ACCESS_RD |
- RING_FORCE_TO_NONPRIV_RANGE_4);
-
- break;
- case COMPUTE_CLASS:
- /* Wa_16011157294:dg2_g10 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
- whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
+ /* Required by recommended tuning setting (not a workaround) */
+ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
+ whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
break;
default:
break;
}
}
-static void blacklist_trtt(struct intel_engine_cs *engine)
+static void xelpg_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
- /*
- * Prevent read/write access to [0x4400, 0x4600) which covers
- * the TRTT range across all engines. Note that normally userspace
- * cannot access the other engines' trtt control, but for simplicity
- * we cover the entire range on each engine.
- */
- whitelist_reg_ext(w, _MMIO(0x4400),
- RING_FORCE_TO_NONPRIV_DENY |
- RING_FORCE_TO_NONPRIV_RANGE_64);
- whitelist_reg_ext(w, _MMIO(0x4500),
- RING_FORCE_TO_NONPRIV_DENY |
- RING_FORCE_TO_NONPRIV_RANGE_64);
-}
-
-static void pvc_whitelist_build(struct intel_engine_cs *engine)
-{
- allow_read_ctx_timestamp(engine);
-
- /* Wa_16014440446:pvc */
- blacklist_trtt(engine);
+ switch (engine->class) {
+ case RENDER_CLASS:
+ /* Required by recommended tuning setting (not a workaround) */
+ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
+ whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
+ break;
+ default:
+ break;
+ }
}
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
@@ -2271,16 +2133,12 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
wa_init_start(w, engine->gt, "whitelist", engine->name);
- if (IS_METEORLAKE(i915))
- ; /* noop; none at this time */
- else if (IS_PONTEVECCHIO(i915))
- pvc_whitelist_build(engine);
+ if (engine->gt->type == GT_MEDIA)
+ ; /* none yet */
+ else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
+ xelpg_whitelist_build(engine);
else if (IS_DG2(i915))
dg2_whitelist_build(engine);
- else if (IS_XEHPSDV(i915))
- xehpsdv_whitelist_build(engine);
- else if (IS_DG1(i915))
- dg1_whitelist_build(engine);
else if (GRAPHICS_VER(i915) == 12)
tgl_whitelist_build(engine);
else if (GRAPHICS_VER(i915) == 11)
@@ -2372,62 +2230,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
}
}
-static bool needs_wa_1308578152(struct intel_engine_cs *engine)
-{
- return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
- GEN_DSS_PER_GSLICE;
-}
-
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_22014600077 */
wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
ENABLE_EU_COUNT_FOR_TDL_FLUSH);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_1509727124 */
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
SC_DISABLE_POWER_OPTIMIZATION_EBB);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915) ||
- IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_22012856258 */
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
GEN12_DISABLE_READ_SUPPRESSION);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14013392000:dg2_g11 */
- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012419201:dg2 */
- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
- }
-
- /* Wa_1308578152:dg2_g10 when first gslice is fused off */
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
- needs_wa_1308578152(engine)) {
- wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
- GEN12_REPLAY_MODE_GRANULARITY);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_DG2(i915)) {
/*
* Wa_22010960976:dg2
* Wa_14013347512:dg2
@@ -2436,64 +2267,25 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- /*
- * Wa_1608949956:dg2_g10
- * Wa_14010198302:dg2_g10
- */
- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
- MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_22010430635:dg2 */
- wa_mcr_masked_en(wal,
- GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_GRF_CLEAR);
-
- /* Wa_14010648519:dg2 */
- wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
- }
-
- /* Wa_14013202645:dg2 */
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
- wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
-
- /* Wa_22012532006:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
- wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
-
- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G10(i915)) {
- /* Wa_22014600077:dg2 */
- wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
- _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
- 0 /* Wa_14012342262 write-only reg, so skip verification */,
- true);
- }
-
- if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
- IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
- /*
- * Wa_1607138336:tgl[a0],dg1[a0]
- * Wa_1607063988:tgl[a0],dg1[a0]
- */
- wa_write_or(wal,
- GEN9_CTX_PREEMPT_REG,
- GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) ||
+ IS_DG2(i915)) {
+ /* Wa_14015150844 */
+ wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0,
+ _MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES),
+ 0, true);
}
- if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
+ if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
+ IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/*
- * Wa_1606679103:tgl
- * (see also Wa_1606682166:icl)
+ * Wa_1606700617:tgl,dg1,adl-p
+ * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
+ * Wa_14010826681:tgl,dg1,rkl,adl-p
+ * Wa_18019627453:dg2
*/
- wa_write_or(wal,
- GEN7_SARCHKMD,
- GEN7_DISABLE_SAMPLER_PREFETCH);
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
}
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
@@ -2509,46 +2301,30 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
*/
wa_write_or(wal, GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
- }
- if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
- IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
- /*
- * Wa_1606700617:tgl,dg1,adl-p
- * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
- * Wa_14010826681:tgl,dg1,rkl,adl-p
- * Wa_18019627453:dg2
- */
- wa_masked_en(wal,
- GEN9_CS_DEBUG_MODE1,
- FF_DOP_CLOCK_GATE_DISABLE);
+ /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
+ wa_mcr_masked_en(wal,
+ GEN10_SAMPLER_MODE,
+ ENABLE_SMALLPL);
}
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
- IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
- /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
+ /* Wa_1409804808 */
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
GEN12_PUSH_CONST_DEREF_HOLD_DIS);
- /*
- * Wa_1409085225:tgl
- * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
- */
+ /* Wa_14010229206 */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
}
- if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
- IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
+ if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
/*
- * Wa_1607030317:tgl
- * Wa_1607186500:tgl
- * Wa_1607297627:tgl,rkl,dg1[a0],adlp
+ * Wa_1607297627
*
* On TGL and RKL there are multiple entries for this WA in the
* BSpec; some indicate this is an A0-only WA, others indicate
* it applies to all steppings so we trust the "all steppings."
- * For DG1 this only applies to A0.
*/
wa_masked_en(wal,
RING_PSMI_CTL(RENDER_RING_BASE),
@@ -2556,14 +2332,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
}
- if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
- IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
- /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
- wa_mcr_masked_en(wal,
- GEN10_SAMPLER_MODE,
- ENABLE_SMALLPL);
- }
-
if (GRAPHICS_VER(i915) == 11) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
@@ -2615,13 +2383,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN11_ENABLE_32_PLANE_MODE);
/*
- * Wa_1408615072:icl,ehl (vsunit)
- * Wa_1407596294:icl,ehl (hsunit)
- */
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
- VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
-
- /*
* Wa_1408767742:icl[a2..forever],ehl[all]
* Wa_1605460711:icl[a0..c0]
*/
@@ -2809,7 +2570,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_FF_DS_SCHED_HW);
/* WaDisablePSDDualDispatchEnable:ivb */
- if (IS_IVB_GT1(i915))
+ if (INTEL_INFO(i915)->gt == 1)
wa_masked_en(wal,
GEN7_HALF_SLICE_CHICKEN1,
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
@@ -2821,18 +2582,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
RING_MODE_GEN7(RENDER_RING_BASE),
GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
- /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
- wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
-
- /*
- * BSpec says this must be set, even though
- * WaDisable4x2SubspanOptimization:ivb,hsw
- * WaDisable4x2SubspanOptimization isn't listed for VLV.
- */
- wa_masked_en(wal,
- CACHE_MODE_1,
- PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
-
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
@@ -2899,9 +2648,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN6_WIZ_HASHING_MASK,
GEN6_WIZ_HASHING_16x4);
- /* WaDisable_RenderCache_OperationalFlush:snb */
- wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
-
/*
* From the Sandybridge PRM, volume 1 part 3, page 24:
* "If this bit is set, STCunit will have LRA as replacement
@@ -2943,20 +2689,22 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
struct drm_i915_private *i915 = engine->i915;
/* WaKBLVECSSemaphoreWaitPoll:kbl */
- if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
+ if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
wa_write(wal,
RING_SEMA_WAIT_POLL(engine->mmio_base),
1);
}
+ /* Wa_16018031267, Wa_16018063123 */
+ if (NEEDS_FASTCOLOR_BLT_WABB(engine))
+ wa_masked_field_set(wal, ECOSKPD(engine->mmio_base),
+ XEHP_BLITTER_SCHEDULING_MODE_MASK,
+ XEHP_BLITTER_ROUND_ROBIN_MODE);
}
static void
ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
- if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
- /* Wa_14014999345:pvc */
- wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
- }
+ /* boilerplate for any CCS engine workaround */
}
/*
@@ -2972,19 +2720,13 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* function invoked by __intel_engine_init_ctx_wa().
*/
static void
-add_render_compute_tuning_settings(struct drm_i915_private *i915,
+add_render_compute_tuning_settings(struct intel_gt *gt,
struct i915_wa_list *wal)
{
- if (IS_PONTEVECCHIO(i915)) {
- wa_mcr_write(wal, XEHPC_L3SCRUB,
- SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
- wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
- }
+ struct drm_i915_private *i915 = gt->i915;
- if (IS_DG2(i915)) {
- wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
- }
/*
* This tuning setting proves beneficial only on ATS-M designs; the
@@ -2995,10 +2737,34 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
THREAD_EX_ARB_MODE_RR_AFTER_DEP);
- if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+ if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
}
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct intel_gt *gt = engine->gt;
+ u32 mode;
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /*
+ * Wa_14019159160: This workaround, along with others, leads to
+ * significant challenges in utilizing load balancing among the
+ * CCS slices. Consequently, an architectural decision has been
+ * made to completely disable automatic CCS load balancing.
+ */
+ wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+
+ /*
+ * After having disabled automatic load balancing we need to
+ * assign all slices to a single CCS. We will call it CCS mode 1
+ */
+ mode = intel_gt_apply_ccs_mode(gt);
+ wa_masked_en(wal, XEHP_CCS_MODE, mode);
+}
+
/*
* The workarounds in this function apply to shared registers in
* the general render reset domain that aren't tied to a
@@ -3012,107 +2778,98 @@ static void
general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
- add_render_compute_tuning_settings(i915, wal);
+ add_render_compute_tuning_settings(gt, wal);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
- /* Wa_22013037850 */
- wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
- DISABLE_128B_EVICTION_COMMAND_UDW);
+ if (GRAPHICS_VER(i915) >= 11) {
+ /* This is not a Wa (although referred to as
+ * WaSetInidrectStateOverride in places), this allows
+ * applications that reference sampler states through
+ * the BindlessSamplerStateBaseAddress to have their
+ * border color relative to DynamicStateBaseAddress
+ * rather than BindlessSamplerStateBaseAddress.
+ *
+ * Otherwise SAMPLER_STATE border colors have to be
+ * copied in multiple heaps (DynamicStateBaseAddress &
+ * BindlessSamplerStateBaseAddress)
+ *
+ * BSpec: 46052
+ */
+ wa_mcr_masked_en(wal,
+ GEN10_SAMPLER_MODE,
+ GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_PONTEVECCHIO(i915) ||
- IS_DG2(i915)) {
- /* Wa_22014226127 */
- wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
- }
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) {
+ /* Wa_14017856879 */
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2(i915)) {
- /* Wa_18017747507 */
- wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
+ /* Wa_14020495402 */
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, XELPG_DISABLE_TDL_SVHS_GATING);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(i915)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
/*
- * Wa_22012826095:dg2
- * Wa_22013059131:dg2
+ * Wa_14017066071
+ * Wa_14017654203
*/
- wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
- MAXREQS_PER_BANK,
- REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
+ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+ MTL_DISABLE_SAMPLER_SC_OOO);
- /* Wa_22013059131:dg2 */
- wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
- FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
- }
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
+ /* Wa_22015279794 */
+ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
+ DISABLE_PREFETCH_INTO_IC);
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- /*
- * Wa_14010918519:dg2_g10
- *
- * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
- * so ignoring verification.
- */
- wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
- FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
- 0, false);
- }
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
+ /* Wa_22013037850 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
+ DISABLE_128B_EVICTION_COMMAND_UDW);
- if (IS_PONTEVECCHIO(i915)) {
- /* Wa_16016694945 */
- wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
+ /* Wa_18017747507 */
+ wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
}
- if (IS_XEHPSDV(i915)) {
- /* Wa_1409954639 */
- wa_mcr_masked_en(wal,
- GEN8_ROW_CHICKEN,
- SYSTOLIC_DOP_CLOCK_GATING_DIS);
-
- /* Wa_1607196519 */
- wa_mcr_masked_en(wal,
- GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_GRF_CLEAR);
-
- /* Wa_14010670810:xehpsdv */
- wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
-
- /* Wa_14010449647:xehpsdv */
- wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
- GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
-
- /* Wa_18011725039:xehpsdv */
- if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
- wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
- wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
- }
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
+ /* Wa_22014226127 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
}
- if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
+ if (IS_DG2(i915)) {
/* Wa_14015227452:dg2,pvc */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
- /* Wa_16015675438:dg2,pvc */
- wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
- }
-
- if (IS_DG2(i915)) {
/*
* Wa_16011620976:dg2_g11
* Wa_22015475538:dg2
*/
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+
+ /* Wa_18028616096 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
+ if (IS_DG2_G11(i915)) {
+ /*
+ * Wa_22012826095:dg2
+ * Wa_22013059131:dg2
+ */
+ wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
+ MAXREQS_PER_BANK,
+ REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
+
+ /* Wa_22013059131:dg2 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
+ FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
+
/*
* Wa_22012654132
*
@@ -3124,6 +2881,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
_MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
0 /* write-only, so skip validation */,
true);
+ }
}
static void
@@ -3139,8 +2897,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
* to a single RCS/CCS engine's workaround list since
* they're reset as part of the general render domain reset.
*/
- if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+ ccs_engine_wa_mode(engine, wal);
+ }
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
@@ -3164,7 +2924,7 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
wa_list_apply(&engine->wa_list);
}
-static const struct i915_range mcr_ranges_gen8[] = {
+static const struct i915_mmio_range mcr_ranges_gen8[] = {
{ .start = 0x5500, .end = 0x55ff },
{ .start = 0x7000, .end = 0x7fff },
{ .start = 0x9400, .end = 0x97ff },
@@ -3173,7 +2933,7 @@ static const struct i915_range mcr_ranges_gen8[] = {
{},
};
-static const struct i915_range mcr_ranges_gen12[] = {
+static const struct i915_mmio_range mcr_ranges_gen12[] = {
{ .start = 0x8150, .end = 0x815f },
{ .start = 0x9520, .end = 0x955f },
{ .start = 0xb100, .end = 0xb3ff },
@@ -3182,7 +2942,7 @@ static const struct i915_range mcr_ranges_gen12[] = {
{},
};
-static const struct i915_range mcr_ranges_xehp[] = {
+static const struct i915_mmio_range mcr_ranges_xehp[] = {
{ .start = 0x4000, .end = 0x4aff },
{ .start = 0x5200, .end = 0x52ff },
{ .start = 0x5400, .end = 0x7fff },
@@ -3201,10 +2961,10 @@ static const struct i915_range mcr_ranges_xehp[] = {
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
- const struct i915_range *mcr_ranges;
+ const struct i915_mmio_range *mcr_ranges;
int i;
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
mcr_ranges = mcr_ranges_xehp;
else if (GRAPHICS_VER(i915) >= 12)
mcr_ranges = mcr_ranges_gen12;
@@ -3231,7 +2991,7 @@ wa_list_srm(struct i915_request *rq,
const struct i915_wa_list *wal,
struct i915_vma *vma)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
unsigned int i, count = 0;
const struct i915_wa *wa;
u32 srm, *cs;
@@ -3330,7 +3090,7 @@ retry:
err = 0;
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
+ if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
continue;
if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index c0637bf799a3..79741f043f03 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -108,7 +108,7 @@ static void advance(struct i915_request *request)
static void hw_delay_complete(struct timer_list *t)
{
- struct mock_engine *engine = from_timer(engine, t, hw_delay);
+ struct mock_engine *engine = timer_container_of(engine, t, hw_delay);
struct i915_request *request;
unsigned long flags;
@@ -297,7 +297,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine)
struct i915_request *rq;
unsigned long flags;
- del_timer_sync(&mock->hw_delay);
+ timer_delete_sync(&mock->hw_delay);
spin_lock_irqsave(&engine->sched_engine->lock, flags);
@@ -432,7 +432,7 @@ void mock_engine_flush(struct intel_engine_cs *engine)
container_of(engine, typeof(*mock), base);
struct i915_request *request, *rn;
- del_timer_sync(&mock->hw_delay);
+ timer_delete_sync(&mock->hw_delay);
spin_lock_irq(&mock->hw_lock);
list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link)
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 76fbae358072..ab76703f6e8b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -3,6 +3,8 @@
* Copyright © 2019 Intel Corporation
*/
+#include <drm/drm_print.h>
+
#include "i915_selftest.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
@@ -88,8 +90,9 @@ static int __live_context_size(struct intel_engine_cs *engine)
goto err;
vaddr = i915_gem_object_pin_map_unlocked(ce->state->obj,
- i915_coherent_map_type(engine->i915,
- ce->state->obj, false));
+ intel_gt_coherent_map_type(engine->gt,
+ ce->state->obj,
+ false));
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
intel_context_unpin(ce);
@@ -284,7 +287,8 @@ out_engine:
intel_engine_pm_flush(engine);
if (intel_engine_pm_is_awake(engine)) {
- struct drm_printer p = drm_debug_printer(__func__);
+ struct drm_printer p = drm_dbg_printer(&engine->i915->drm,
+ DRM_UT_DRIVER, NULL);
intel_engine_dump(engine, &p,
"%s is still awake:%d after idle-barriers\n",
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 542ce6d2de19..5ffa5e30f419 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -21,20 +21,22 @@ static int cmp_u32(const void *A, const void *B)
return *a - *b;
}
-static void perf_begin(struct intel_gt *gt)
+static intel_wakeref_t perf_begin(struct intel_gt *gt)
{
- intel_gt_pm_get(gt);
+ intel_wakeref_t wakeref = intel_gt_pm_get(gt);
/* Boost gpufreq to max [waitboost] and keep it fixed */
atomic_inc(&gt->rps.num_waiters);
- schedule_work(&gt->rps.work);
+ queue_work(gt->i915->unordered_wq, &gt->rps.work);
flush_work(&gt->rps.work);
+
+ return wakeref;
}
-static int perf_end(struct intel_gt *gt)
+static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref)
{
atomic_dec(&gt->rps.num_waiters);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
return igt_flush_test(gt->i915);
}
@@ -62,7 +64,7 @@ static int write_timestamp(struct i915_request *rq, int slot)
return PTR_ERR(cs);
cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
- if (GRAPHICS_VER(rq->engine->i915) >= 8)
+ if (GRAPHICS_VER(rq->i915) >= 8)
cmd++;
*cs++ = cmd;
*cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine));
@@ -133,12 +135,13 @@ static int perf_mi_bb_start(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
- perf_begin(gt);
+ wakeref = perf_begin(gt);
for_each_engine(engine, gt, id) {
struct intel_context *ce = engine->kernel_context;
struct i915_vma *batch;
@@ -207,7 +210,7 @@ out:
pr_info("%s: MI_BB_START cycles: %u\n",
engine->name, trifilter(cycles));
}
- if (perf_end(gt))
+ if (perf_end(gt, wakeref))
err = -EIO;
return err;
@@ -260,12 +263,13 @@ static int perf_mi_noop(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
- perf_begin(gt);
+ wakeref = perf_begin(gt);
for_each_engine(engine, gt, id) {
struct intel_context *ce = engine->kernel_context;
struct i915_vma *base, *nop;
@@ -364,7 +368,7 @@ out:
pr_info("%s: 16K MI_NOOP cycles: %u\n",
engine->name, trifilter(cycles));
}
- if (perf_end(gt))
+ if (perf_end(gt, wakeref))
err = -EIO;
return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 273d440a53e3..9e4f0e417b3b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -84,7 +84,7 @@ static struct pulse *pulse_create(void)
static void pulse_unlock_wait(struct pulse *p)
{
- i915_active_unlock_wait(&p->active);
+ wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ);
}
static int __live_idle_pulse(struct intel_engine_cs *engine,
@@ -122,9 +122,9 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
if (engine_sync_barrier(engine)) {
- struct drm_printer m = drm_err_printer("pulse");
+ struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
- pr_err("%s: no heartbeat pulse?\n", engine->name);
+ drm_printf(&m, "%s: no heartbeat pulse?\n", engine->name);
intel_engine_dump(engine, &m, "%s", engine->name);
err = -ETIME;
@@ -136,10 +136,10 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
pulse_unlock_wait(p); /* synchronize with the retirement callback */
if (!i915_active_is_idle(&p->active)) {
- struct drm_printer m = drm_err_printer("pulse");
+ struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
- pr_err("%s: heartbeat pulse did not flush idle tasks\n",
- engine->name);
+ drm_printf(&m, "%s: heartbeat pulse did not flush idle tasks\n",
+ engine->name);
i915_active_print(&p->active, &m);
err = -EINVAL;
@@ -193,115 +193,6 @@ static int live_idle_pulse(void *arg)
return err;
}
-static int cmp_u32(const void *_a, const void *_b)
-{
- const u32 *a = _a, *b = _b;
-
- return *a - *b;
-}
-
-static int __live_heartbeat_fast(struct intel_engine_cs *engine)
-{
- const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
- struct intel_context *ce;
- struct i915_request *rq;
- ktime_t t0, t1;
- u32 times[5];
- int err;
- int i;
-
- ce = intel_context_create(engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
-
- intel_engine_pm_get(engine);
-
- err = intel_engine_set_heartbeat(engine, 1);
- if (err)
- goto err_pm;
-
- for (i = 0; i < ARRAY_SIZE(times); i++) {
- do {
- /* Manufacture a tick */
- intel_engine_park_heartbeat(engine);
- GEM_BUG_ON(engine->heartbeat.systole);
- engine->serial++; /* pretend we are not idle! */
- intel_engine_unpark_heartbeat(engine);
-
- flush_delayed_work(&engine->heartbeat.work);
- if (!delayed_work_pending(&engine->heartbeat.work)) {
- pr_err("%s: heartbeat %d did not start\n",
- engine->name, i);
- err = -EINVAL;
- goto err_pm;
- }
-
- rcu_read_lock();
- rq = READ_ONCE(engine->heartbeat.systole);
- if (rq)
- rq = i915_request_get_rcu(rq);
- rcu_read_unlock();
- } while (!rq);
-
- t0 = ktime_get();
- while (rq == READ_ONCE(engine->heartbeat.systole))
- yield(); /* work is on the local cpu! */
- t1 = ktime_get();
-
- i915_request_put(rq);
- times[i] = ktime_us_delta(t1, t0);
- }
-
- sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
-
- pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
- engine->name,
- times[ARRAY_SIZE(times) / 2],
- times[0],
- times[ARRAY_SIZE(times) - 1]);
-
- /*
- * Ideally, the upper bound on min work delay would be something like
- * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we
- * are, even with system_wq_highpri, at the mercy of the CPU scheduler
- * and may be stuck behind some slow work for many millisecond. Such
- * as our very own display workers.
- */
- if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
- pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
- engine->name,
- times[ARRAY_SIZE(times) / 2],
- error_threshold);
- err = -EINVAL;
- }
-
- reset_heartbeat(engine);
-err_pm:
- intel_engine_pm_put(engine);
- intel_context_put(ce);
- return err;
-}
-
-static int live_heartbeat_fast(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err = 0;
-
- /* Check that the heartbeat ticks at the desired rate. */
- if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
- return 0;
-
- for_each_engine(engine, gt, id) {
- err = __live_heartbeat_fast(engine);
- if (err)
- break;
- }
-
- return err;
-}
-
static int __live_heartbeat_off(struct intel_engine_cs *engine)
{
int err;
@@ -372,7 +263,6 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
static const struct i915_subtest tests[] = {
SUBTEST(live_idle_flush),
SUBTEST(live_idle_pulse),
- SUBTEST(live_heartbeat_fast),
SUBTEST(live_heartbeat_off),
};
int saved_hangcheck;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 87c94314cf67..10e556a7eac4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -5,6 +5,7 @@
#include <linux/sort.h>
+#include "gt/intel_gt_print.h"
#include "i915_selftest.h"
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
@@ -402,7 +403,7 @@ static int live_engine_pm(void *arg)
/* gt wakeref is async (deferred to workqueue) */
if (intel_gt_pm_wait_for_idle(gt)) {
- pr_err("GT failed to idle\n");
+ gt_err(gt, "GT failed to idle\n");
return -EINVAL;
}
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 736b89a8ecf5..a06b397b6d42 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -5,12 +5,15 @@
#include <linux/prime_numbers.h>
+#include <drm/drm_print.h>
+
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_pm.h"
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_reset.h"
#include "gt/selftest_engine_heartbeat.h"
+#include "i915_jiffies.h"
#include "i915_selftest.h"
#include "selftests/i915_random.h"
#include "selftests/igt_flush_test.h"
@@ -53,7 +56,7 @@ static int wait_for_submit(struct intel_engine_cs *engine,
if (i915_request_completed(rq)) /* that was quick! */
return 0;
- /* Wait until the HW has acknowleged the submission (or err) */
+ /* Wait until the HW has acknowledged the submission (or err) */
intel_engine_flush_submission(engine);
if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
return 0;
@@ -93,7 +96,7 @@ static int wait_for_reset(struct intel_engine_cs *engine,
return -EINVAL;
}
- /* Give the request a jiffie to complete after flushing the worker */
+ /* Give the request a jiffy to complete after flushing the worker */
if (i915_request_wait(rq, 0,
max(0l, (long)(timeout - jiffies)) + 1) < 0) {
pr_err("%s: hanging request %llx:%lld did not complete\n",
@@ -1198,7 +1201,7 @@ static int live_timeslice_rewind(void *arg)
ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
/* Wait for the timeslice to kick in */
- del_timer(&engine->execlists.timer);
+ timer_delete(&engine->execlists.timer);
tasklet_hi_schedule(&engine->sched_engine->tasklet);
intel_engine_flush_submission(engine);
}
@@ -1530,8 +1533,8 @@ static int live_busywait_preempt(void *arg)
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
enum intel_engine_id id;
- int err = -ENOMEM;
u32 *map;
+ int err;
/*
* Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
@@ -1539,13 +1542,17 @@ static int live_busywait_preempt(void *arg)
*/
ctx_hi = kernel_context(gt->i915, NULL);
- if (!ctx_hi)
- return -ENOMEM;
+ if (IS_ERR(ctx_hi))
+ return PTR_ERR(ctx_hi);
+
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915, NULL);
- if (!ctx_lo)
+ if (IS_ERR(ctx_lo)) {
+ err = PTR_ERR(ctx_lo);
goto err_ctx_hi;
+ }
+
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
@@ -2353,7 +2360,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg)
/* force preempt reset [failure] */
while (!engine->execlists.pending[0])
intel_engine_flush_submission(engine);
- del_timer_sync(&engine->execlists.preempt);
+ timer_delete_sync(&engine->execlists.preempt);
intel_engine_flush_submission(engine);
cancel_reset_timeout(engine);
@@ -3422,7 +3429,7 @@ static int live_preempt_timeout(void *arg)
cpu_relax();
saved_timeout = engine->props.preempt_timeout_ms;
- engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
+ engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffy */
i915_request_get(rq);
i915_request_add(rq);
@@ -3570,7 +3577,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
arg[id].batch = NULL;
arg[id].count = 0;
- worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
+ worker[id] = kthread_run_worker(0, "igt/smoke:%d", id);
if (IS_ERR(worker[id])) {
err = PTR_ERR(worker[id]);
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index b46425aeb2f0..33351deeea4f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -63,8 +63,8 @@ static void measure_clocks(struct intel_engine_cs *engine,
udelay(1000);
- dt[i] = ktime_sub(ktime_get(), dt[i]);
cycles[i] += read_timestamp(engine);
+ dt[i] = ktime_sub(ktime_get(), dt[i]);
local_irq_enable();
}
@@ -81,6 +81,7 @@ static int live_gt_clocks(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;
if (!gt->clock_frequency) { /* unknown */
@@ -91,7 +92,7 @@ static int live_gt_clocks(void *arg)
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
for_each_engine(engine, gt, id) {
@@ -128,7 +129,7 @@ static int live_gt_clocks(void *arg)
}
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 8b0d84f2aad2..4f252f704975 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -73,7 +73,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt)
h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
vaddr = i915_gem_object_pin_map_unlocked(h->obj,
- i915_coherent_map_type(gt->i915, h->obj, false));
+ intel_gt_coherent_map_type(gt, h->obj, false));
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto err_unpin_hws;
@@ -119,7 +119,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
return ERR_CAST(obj);
}
- vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915, obj, false));
+ vaddr = i915_gem_object_pin_map_unlocked(obj, intel_gt_coherent_map_type(gt, obj, false));
if (IS_ERR(vaddr)) {
i915_gem_object_put(obj);
i915_vm_put(vm);
@@ -319,7 +319,7 @@ static int igt_hang_sanitycheck(void *arg)
i915_request_add(rq);
timeout = 0;
- intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */)
+ intel_wedge_on_timeout(&w, gt, HZ / 5 /* 200ms */)
timeout = i915_request_wait(rq, 0,
MAX_SCHEDULE_TIMEOUT);
if (intel_gt_is_wedged(gt))
@@ -548,7 +548,7 @@ static int igt_reset_fail_engine(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- /* Check that we can recover from engine-reset failues */
+ /* Check that we can recover from engine-reset failures */
if (!intel_has_reset_engine(gt))
return 0;
@@ -904,7 +904,7 @@ static void active_engine(struct kthread_work *work)
arg->result = PTR_ERR(ce[count]);
pr_err("[%s] Create context #%ld failed: %d!\n",
engine->name, count, arg->result);
- while (--count)
+ while (count--)
intel_context_put(ce[count]);
return;
}
@@ -1025,7 +1025,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
threads[tmp].engine = other;
threads[tmp].flags = flags;
- worker = kthread_create_worker(0, "igt/%s",
+ worker = kthread_run_worker(0, "igt/%s",
other->name);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c
index cfd736d88939..779fadcec7c4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_llc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.c
@@ -3,7 +3,6 @@
* Copyright © 2019 Intel Corporation
*/
-#include "intel_pm.h" /* intel_gpu_freq() */
#include "selftest_llc.h"
#include "intel_rps.h"
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index a78a3d2c2e16..23f04f6f8fba 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@
#include "gem/i915_gem_internal.h"
+#include "i915_drv.h"
#include "i915_selftest.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
@@ -63,7 +64,7 @@ static int wait_for_submit(struct intel_engine_cs *engine,
if (i915_request_completed(rq)) /* that was quick! */
return 0;
- /* Wait until the HW has acknowleged the submission (or err) */
+ /* Wait until the HW has acknowledged the submission (or err) */
intel_engine_flush_submission(engine);
if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
return 0;
@@ -859,6 +860,14 @@ static int live_lrc_timestamp(void *arg)
};
/*
+ * This test was designed to isolate a hardware bug.
+ * The bug was found and fixed in future generations but
+ * now the test pollutes our CI on previous generation.
+ */
+ if (GRAPHICS_VER(gt->i915) == 12)
+ return 0;
+
+ /*
* We want to verify that the timestamp is saved and restore across
* context switches and is monotonic.
*
@@ -1292,9 +1301,9 @@ static int compare_isolation(struct intel_engine_cs *engine,
}
lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
- i915_coherent_map_type(engine->i915,
- ce->state->obj,
- false));
+ intel_gt_coherent_map_type(engine->gt,
+ ce->state->obj,
+ false));
if (IS_ERR(lrc)) {
err = PTR_ERR(lrc);
goto err_B1;
@@ -1555,7 +1564,7 @@ static int live_lrc_isolation(void *arg)
return err;
}
-static int indirect_ctx_submit_req(struct intel_context *ce)
+static int wabb_ctx_submit_req(struct intel_context *ce)
{
struct i915_request *rq;
int err = 0;
@@ -1579,7 +1588,8 @@ static int indirect_ctx_submit_req(struct intel_context *ce)
#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
static u32 *
-emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+emit_wabb_ctx_canary(const struct intel_context *ce,
+ u32 *cs, bool per_ctx)
{
*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT |
@@ -1587,26 +1597,43 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
*cs++ = i915_mmio_reg_offset(RING_START(0));
*cs++ = i915_ggtt_offset(ce->state) +
context_wa_bb_offset(ce) +
- CTX_BB_CANARY_OFFSET;
+ CTX_BB_CANARY_OFFSET +
+ (per_ctx ? PAGE_SIZE : 0);
*cs++ = 0;
return cs;
}
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+ return emit_wabb_ctx_canary(ce, cs, false);
+}
+
+static u32 *
+emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+ return emit_wabb_ctx_canary(ce, cs, true);
+}
+
static void
-indirect_ctx_bb_setup(struct intel_context *ce)
+wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
{
- u32 *cs = context_indirect_bb(ce);
+ u32 *cs = context_wabb(ce, per_ctx);
cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
- setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+ if (per_ctx)
+ setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
+ else
+ setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
}
-static bool check_ring_start(struct intel_context *ce)
+static bool check_ring_start(struct intel_context *ce, bool per_ctx)
{
const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
- LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+ LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
+ (per_ctx ? PAGE_SIZE : 0);
if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
return true;
@@ -1618,21 +1645,21 @@ static bool check_ring_start(struct intel_context *ce)
return false;
}
-static int indirect_ctx_bb_check(struct intel_context *ce)
+static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
{
int err;
- err = indirect_ctx_submit_req(ce);
+ err = wabb_ctx_submit_req(ce);
if (err)
return err;
- if (!check_ring_start(ce))
+ if (!check_ring_start(ce, per_ctx))
return -EINVAL;
return 0;
}
-static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
{
struct intel_context *a, *b;
int err;
@@ -1667,14 +1694,14 @@ static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
* As ring start is restored apriori of starting the indirect ctx bb and
* as it will be different for each context, it fits to this purpose.
*/
- indirect_ctx_bb_setup(a);
- indirect_ctx_bb_setup(b);
+ wabb_ctx_setup(a, per_ctx);
+ wabb_ctx_setup(b, per_ctx);
- err = indirect_ctx_bb_check(a);
+ err = wabb_ctx_check(a, per_ctx);
if (err)
goto unpin_b;
- err = indirect_ctx_bb_check(b);
+ err = wabb_ctx_check(b, per_ctx);
unpin_b:
intel_context_unpin(b);
@@ -1688,7 +1715,7 @@ put_a:
return err;
}
-static int live_lrc_indirect_ctx_bb(void *arg)
+static int lrc_wabb_ctx(void *arg, bool per_ctx)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
@@ -1697,7 +1724,7 @@ static int live_lrc_indirect_ctx_bb(void *arg)
for_each_engine(engine, gt, id) {
intel_engine_pm_get(engine);
- err = __live_lrc_indirect_ctx_bb(engine);
+ err = __lrc_wabb_ctx(engine, per_ctx);
intel_engine_pm_put(engine);
if (igt_flush_test(gt->i915))
@@ -1710,6 +1737,16 @@ static int live_lrc_indirect_ctx_bb(void *arg)
return err;
}
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+ return lrc_wabb_ctx(arg, false);
+}
+
+static int live_lrc_per_ctx_bb(void *arg)
+{
+ return lrc_wabb_ctx(arg, true);
+}
+
static void garbage_reset(struct intel_engine_cs *engine,
struct i915_request *rq)
{
@@ -1947,6 +1984,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_lrc_garbage),
SUBTEST(live_pphwsp_runtime),
SUBTEST(live_lrc_indirect_ctx_bb),
+ SUBTEST(live_lrc_per_ctx_bb),
};
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index e677f2da093d..fdf0e9858607 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -137,7 +137,7 @@ err_free_src:
static int intel_context_copy_ccs(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *sg,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
bool write_to_ccs,
struct i915_request **out)
{
@@ -185,7 +185,7 @@ static int intel_context_copy_ccs(struct intel_context *ce,
if (err)
goto out_rq;
- len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ);
+ len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
if (len <= 0) {
err = len;
goto out_rq;
@@ -223,7 +223,7 @@ intel_migrate_ccs_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *sg,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
bool write_to_ccs,
struct i915_request **out)
{
@@ -243,7 +243,7 @@ intel_migrate_ccs_copy(struct intel_migrate *m,
if (err)
goto out;
- err = intel_context_copy_ccs(ce, deps, sg, cache_level,
+ err = intel_context_copy_ccs(ce, deps, sg, pat_index,
write_to_ccs, out);
intel_context_unpin(ce);
@@ -262,7 +262,7 @@ static int clear(struct intel_migrate *migrate,
{
struct drm_i915_private *i915 = migrate->context->engine->i915;
struct drm_i915_gem_object *obj;
- struct i915_request *rq;
+ struct i915_request *rq = NULL;
struct i915_gem_ww_ctx ww;
u32 *vaddr, val = 0;
bool ccs_cap = false;
@@ -300,7 +300,7 @@ static int clear(struct intel_migrate *migrate,
/* Write the obj data into ccs surface */
err = intel_migrate_ccs_copy(migrate, &ww, NULL,
obj->mm.pages->sgl,
- obj->cache_level,
+ obj->pat_index,
true, &rq);
if (rq && !err) {
if (i915_request_wait(rq, 0, HZ) < 0) {
@@ -336,7 +336,7 @@ static int clear(struct intel_migrate *migrate,
if (vaddr[x] != val) {
pr_err("%ps failed, (%u != %u), offset: %zu\n",
- fn, vaddr[x], val, x * sizeof(u32));
+ fn, vaddr[x], val, x * sizeof(u32));
igt_hexdump(vaddr + i * 1024, 4096);
err = -EINVAL;
}
@@ -351,7 +351,7 @@ static int clear(struct intel_migrate *migrate,
err = intel_migrate_ccs_copy(migrate, &ww, NULL,
obj->mm.pages->sgl,
- obj->cache_level,
+ obj->pat_index,
false, &rq);
if (rq && !err) {
if (i915_request_wait(rq, 0, HZ) < 0) {
@@ -414,9 +414,9 @@ static int __migrate_copy(struct intel_migrate *migrate,
struct i915_request **out)
{
return intel_migrate_copy(migrate, ww, NULL,
- src->mm.pages->sgl, src->cache_level,
+ src->mm.pages->sgl, src->pat_index,
i915_gem_object_is_lmem(src),
- dst->mm.pages->sgl, dst->cache_level,
+ dst->mm.pages->sgl, dst->pat_index,
i915_gem_object_is_lmem(dst),
out);
}
@@ -428,9 +428,9 @@ static int __global_copy(struct intel_migrate *migrate,
struct i915_request **out)
{
return intel_context_migrate_copy(migrate->context, NULL,
- src->mm.pages->sgl, src->cache_level,
+ src->mm.pages->sgl, src->pat_index,
i915_gem_object_is_lmem(src),
- dst->mm.pages->sgl, dst->cache_level,
+ dst->mm.pages->sgl, dst->pat_index,
i915_gem_object_is_lmem(dst),
out);
}
@@ -455,7 +455,7 @@ static int __migrate_clear(struct intel_migrate *migrate,
{
return intel_migrate_clear(migrate, ww, NULL,
obj->mm.pages->sgl,
- obj->cache_level,
+ obj->pat_index,
i915_gem_object_is_lmem(obj),
value, out);
}
@@ -468,7 +468,7 @@ static int __global_clear(struct intel_migrate *migrate,
{
return intel_context_migrate_clear(migrate->context, NULL,
obj->mm.pages->sgl,
- obj->cache_level,
+ obj->pat_index,
i915_gem_object_is_lmem(obj),
value, out);
}
@@ -537,7 +537,7 @@ struct spinner_timer {
static void spinner_kill(struct timer_list *timer)
{
- struct spinner_timer *st = from_timer(st, timer, timer);
+ struct spinner_timer *st = timer_container_of(st, timer, timer);
igt_spinner_end(&st->spin);
pr_info("%s\n", __func__);
@@ -648,7 +648,7 @@ static int live_emit_pte_full_ring(void *arg)
*/
pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
it = sg_sgt(obj->mm.pages->sgl);
- len = emit_pte(rq, &it, obj->cache_level, false, 0, CHUNK_SZ);
+ len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
if (!len) {
err = -EINVAL;
goto out_rq;
@@ -660,8 +660,8 @@ static int live_emit_pte_full_ring(void *arg)
out_rq:
i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
- del_timer_sync(&st.timer);
- destroy_timer_on_stack(&st.timer);
+ timer_delete_sync(&st.timer);
+ timer_destroy_on_stack(&st.timer);
out_unpin:
intel_context_unpin(ce);
out_put:
@@ -710,7 +710,14 @@ static int threaded_migrate(struct intel_migrate *migrate,
thread[i].tsk = tsk;
}
- msleep(10); /* start all threads before we kthread_stop() */
+ /*
+ * Start all threads before we kthread_stop().
+ * In CHV / BXT+VTD environments, where VMA pinning is committed
+ * asynchronously, empirically determined 100ms delay is needed
+ * to avoid stopping threads that may still wait for completion of
+ * intel_ggtt_bind_vma and fail with -ERESTARTSYS when interrupted.
+ */
+ msleep((intel_vm_no_concurrent_access_wa(migrate->context->vm->i915) ? 100 : 10) * n_cpus);
for (i = 0; i < n_cpus; ++i) {
struct task_struct *tsk = thread[i].tsk;
@@ -719,11 +726,9 @@ static int threaded_migrate(struct intel_migrate *migrate,
if (IS_ERR_OR_NULL(tsk))
continue;
- status = kthread_stop(tsk);
+ status = kthread_stop_put(tsk);
if (status && !err)
err = status;
-
- put_task_struct(tsk);
}
kfree(thread);
@@ -844,7 +849,7 @@ static int wrap_ktime_compare(const void *A, const void *B)
static int __perf_clear_blt(struct intel_context *ce,
struct scatterlist *sg,
- enum i915_cache_level cache_level,
+ unsigned int pat_index,
bool is_lmem,
size_t sz)
{
@@ -858,7 +863,7 @@ static int __perf_clear_blt(struct intel_context *ce,
t0 = ktime_get();
- err = intel_context_migrate_clear(ce, NULL, sg, cache_level,
+ err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
is_lmem, 0, &rq);
if (rq) {
if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
@@ -904,7 +909,8 @@ static int perf_clear_blt(void *arg)
err = __perf_clear_blt(gt->migrate.context,
dst->mm.pages->sgl,
- I915_CACHE_NONE,
+ i915_gem_get_pat_index(gt->i915,
+ I915_CACHE_NONE),
i915_gem_object_is_lmem(dst),
sizes[i]);
@@ -919,10 +925,10 @@ static int perf_clear_blt(void *arg)
static int __perf_copy_blt(struct intel_context *ce,
struct scatterlist *src,
- enum i915_cache_level src_cache_level,
+ unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
- enum i915_cache_level dst_cache_level,
+ unsigned int dst_pat_index,
bool dst_is_lmem,
size_t sz)
{
@@ -937,9 +943,9 @@ static int __perf_copy_blt(struct intel_context *ce,
t0 = ktime_get();
err = intel_context_migrate_copy(ce, NULL,
- src, src_cache_level,
+ src, src_pat_index,
src_is_lmem,
- dst, dst_cache_level,
+ dst, dst_pat_index,
dst_is_lmem,
&rq);
if (rq) {
@@ -994,10 +1000,12 @@ static int perf_copy_blt(void *arg)
err = __perf_copy_blt(gt->migrate.context,
src->mm.pages->sgl,
- I915_CACHE_NONE,
+ i915_gem_get_pat_index(gt->i915,
+ I915_CACHE_NONE),
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl,
- I915_CACHE_NONE,
+ i915_gem_get_pat_index(gt->i915,
+ I915_CACHE_NONE),
i915_gem_object_is_lmem(dst),
sz);
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index ca009a6a13bd..d73e438fb85f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -131,13 +131,14 @@ static int read_mocs_table(struct i915_request *rq,
const struct drm_i915_mocs_table *table,
u32 *offset)
{
+ struct intel_gt *gt = rq->engine->gt;
u32 addr;
if (!table)
return 0;
- if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
- addr = global_mocs_offset();
+ if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915))
+ addr = global_mocs_offset() + gt->uncore->gsi_offset;
else
addr = mocs_offset(rq->engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 2ceeadecc639..41716ed454b7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -8,6 +8,7 @@
#include "intel_gpu_commands.h"
#include "intel_gt_requests.h"
#include "intel_ring.h"
+#include "intel_rps.h"
#include "selftest_rc6.h"
#include "selftests/i915_random.h"
@@ -32,12 +33,22 @@ int live_rc6_manual(void *arg)
{
struct intel_gt *gt = arg;
struct intel_rc6 *rc6 = &gt->rc6;
- u64 rc0_power, rc6_power;
+ struct intel_rps *rps = &gt->rps;
intel_wakeref_t wakeref;
+ u64 rc0_sample_energy[2];
+ u64 rc6_sample_energy[2];
+ u64 sleep_time = 1000;
+ u32 rc0_freq = 0;
+ u32 rc6_freq = 0;
+ u64 rc0_power;
+ u64 rc6_power;
bool has_power;
+ u64 threshold;
ktime_t dt;
u64 res[2];
int err = 0;
+ u64 diff;
+
/*
* Our claim is that we can "encourage" the GPU to enter rc6 at will.
@@ -56,28 +67,35 @@ int live_rc6_manual(void *arg)
/* Force RC6 off for starters */
__intel_rc6_disable(rc6);
- msleep(1); /* wakeup is not immediate, takes about 100us on icl */
+ /* wakeup is not immediate, takes about 100us on icl */
+ usleep_range(1000, 2000);
res[0] = rc6_residency(rc6);
dt = ktime_get();
- rc0_power = librapl_energy_uJ();
- msleep(250);
- rc0_power = librapl_energy_uJ() - rc0_power;
+ rc0_sample_energy[0] = librapl_energy_uJ();
+ msleep(sleep_time);
+ rc0_sample_energy[1] = librapl_energy_uJ() - rc0_sample_energy[0];
dt = ktime_sub(ktime_get(), dt);
res[1] = rc6_residency(rc6);
+ rc0_freq = intel_rps_read_actual_frequency_fw(rps);
if ((res[1] - res[0]) >> 10) {
- pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
+ pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n",
(res[1] - res[0]) >> 10);
err = -EINVAL;
goto out_unlock;
}
if (has_power) {
- rc0_power = div64_u64(NSEC_PER_SEC * rc0_power,
+ rc0_power = div64_u64(NSEC_PER_SEC * rc0_sample_energy[1],
ktime_to_ns(dt));
+
if (!rc0_power) {
- pr_err("No power measured while in RC0\n");
+ if (rc0_freq)
+ pr_debug("No power measured while in RC0! GPU Freq: %uMHz in RC0\n",
+ rc0_freq);
+ else
+ pr_err("No power and freq measured while in RC0\n");
err = -EINVAL;
goto out_unlock;
}
@@ -89,9 +107,10 @@ int live_rc6_manual(void *arg)
res[0] = rc6_residency(rc6);
intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
dt = ktime_get();
- rc6_power = librapl_energy_uJ();
- msleep(100);
- rc6_power = librapl_energy_uJ() - rc6_power;
+ rc6_sample_energy[0] = librapl_energy_uJ();
+ msleep(sleep_time);
+ rc6_freq = intel_rps_read_actual_frequency_fw(rps);
+ rc6_sample_energy[1] = librapl_energy_uJ() - rc6_sample_energy[0];
dt = ktime_sub(ktime_get(), dt);
res[1] = rc6_residency(rc6);
if (res[1] == res[0]) {
@@ -103,12 +122,24 @@ int live_rc6_manual(void *arg)
}
if (has_power) {
- rc6_power = div64_u64(NSEC_PER_SEC * rc6_power,
+ rc6_power = div64_u64(NSEC_PER_SEC * rc6_sample_energy[1],
ktime_to_ns(dt));
- pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
+ pr_info("GPU consumed %lluuW in RC0 and %lluuW in RC6\n",
rc0_power, rc6_power);
+
if (2 * rc6_power > rc0_power) {
- pr_err("GPU leaked energy while in RC6!\n");
+ pr_err("GPU leaked energy while in RC6!\n"
+ "GPU Freq: %uMHz in RC6 and %uMHz in RC0\n"
+ "RC0 energy before & after sleep respectively: %lluuJ %lluuJ\n"
+ "RC6 energy before & after sleep respectively: %lluuJ %lluuJ\n",
+ rc6_freq, rc0_freq, rc0_sample_energy[0], rc0_sample_energy[1],
+ rc6_sample_energy[0], rc6_sample_energy[1]);
+
+ diff = res[1] - res[0];
+ threshold = (9 * NSEC_PER_MSEC * sleep_time) / 10;
+ if (diff < threshold)
+ pr_err("Did not enter RC6 properly, RC6 start residency=%lluns, RC6 end residency=%lluns\n",
+ res[0], res[1]);
err = -EINVAL;
goto out_unlock;
}
@@ -140,7 +171,7 @@ static const u32 *__live_rc6_ctx(struct intel_context *ce)
}
cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
- if (GRAPHICS_VER(rq->engine->i915) >= 8)
+ if (GRAPHICS_VER(rq->i915) >= 8)
cmd++;
*cs++ = cmd;
@@ -211,7 +242,7 @@ int live_rc6_ctx_wa(void *arg)
i915_reset_engine_count(error, engine);
const u32 *res;
- /* Use a sacrifical context */
+ /* Use a sacrificial context */
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index a9e0a91bc0e0..2cfc23c58e90 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -86,7 +86,9 @@ __igt_reset_stolen(struct intel_gt *gt,
ggtt->vm.insert_page(&ggtt->vm, dma,
ggtt->error_capture.start,
- I915_CACHE_NONE, 0);
+ i915_gem_get_pat_index(gt->i915,
+ I915_CACHE_NONE),
+ 0);
mb();
s = io_mapping_map_wc(&ggtt->iomap,
@@ -127,7 +129,9 @@ __igt_reset_stolen(struct intel_gt *gt,
ggtt->vm.insert_page(&ggtt->vm, dma,
ggtt->error_capture.start,
- I915_CACHE_NONE, 0);
+ i915_gem_get_pat_index(gt->i915,
+ I915_CACHE_NONE),
+ 0);
mb();
s = io_mapping_map_wc(&ggtt->iomap,
@@ -257,11 +261,12 @@ static int igt_atomic_reset(void *arg)
{
struct intel_gt *gt = arg;
const typeof(*igt_atomic_phases) *p;
+ intel_wakeref_t wakeref;
int err = 0;
/* Check that the resets are usable from atomic context */
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
igt_global_reset_lock(gt);
/* Flush any requests before we get started and check basics */
@@ -276,7 +281,7 @@ static int igt_atomic_reset(void *arg)
awake = reset_prepare(gt);
p->critical_section_begin();
- err = __intel_gt_reset(gt, ALL_ENGINES);
+ err = intel_gt_reset_all_engines(gt);
p->critical_section_end();
reset_finish(gt, awake);
@@ -292,7 +297,7 @@ static int igt_atomic_reset(void *arg)
unlock:
igt_global_reset_unlock(gt);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
return err;
}
@@ -303,6 +308,7 @@ static int igt_atomic_engine_reset(void *arg)
const typeof(*igt_atomic_phases) *p;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;
/* Check that the resets are usable from atomic context */
@@ -313,7 +319,7 @@ static int igt_atomic_engine_reset(void *arg)
if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
igt_global_reset_lock(gt);
/* Flush any requests before we get started and check basics */
@@ -361,7 +367,7 @@ static int igt_atomic_engine_reset(void *arg)
out_unlock:
igt_global_reset_unlock(gt);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 6755bbc4ebda..73bc91c6ea07 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -8,6 +8,7 @@
#include "gem/i915_gem_internal.h"
+#include "i915_reg.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
@@ -21,7 +22,7 @@
#include "selftests/igt_spinner.h"
#include "selftests/librapl.h"
-/* Try to isolate the impact of cstates from determing frequency response */
+/* Try to isolate the impact of cstates from determining frequency response */
#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
static void dummy_rps_work(struct work_struct *wrk)
@@ -223,6 +224,7 @@ int live_rps_clock_interval(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
+ intel_wakeref_t wakeref;
int err = 0;
if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
@@ -235,7 +237,7 @@ int live_rps_clock_interval(void *arg)
saved_work = rps->work.func;
rps->work.func = dummy_rps_work;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
intel_rps_disable(&gt->rps);
intel_gt_check_clock_frequency(gt);
@@ -299,13 +301,13 @@ int live_rps_clock_interval(void *arg)
for (i = 0; i < 5; i++) {
preempt_disable();
- dt_[i] = ktime_get();
cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+ dt_[i] = ktime_get();
udelay(1000);
- dt_[i] = ktime_sub(ktime_get(), dt_[i]);
cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+ dt_[i] = ktime_sub(ktime_get(), dt_[i]);
preempt_enable();
}
@@ -354,7 +356,7 @@ int live_rps_clock_interval(void *arg)
}
intel_rps_enable(&gt->rps);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
igt_spinner_fini(&spin);
@@ -375,6 +377,7 @@ int live_rps_control(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
+ intel_wakeref_t wakeref;
int err = 0;
/*
@@ -397,7 +400,7 @@ int live_rps_control(void *arg)
saved_work = rps->work.func;
rps->work.func = dummy_rps_work;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
for_each_engine(engine, gt, id) {
struct i915_request *rq;
ktime_t min_dt, max_dt;
@@ -474,12 +477,13 @@ int live_rps_control(void *arg)
limit, intel_gpu_freq(rps, limit),
min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
- if (limit == rps->min_freq) {
- pr_err("%s: GPU throttled to minimum!\n",
- engine->name);
+ if (limit != rps->max_freq) {
+ u32 throttle = intel_uncore_read(gt->uncore,
+ intel_gt_perf_limit_reasons_reg(gt));
+
+ pr_warn("%s: GPU throttled with reasons 0x%08x\n",
+ engine->name, throttle & GT0_PERF_LIMIT_REASONS_MASK);
show_pstate_limits(rps);
- err = -ENODEV;
- break;
}
if (igt_flush_test(gt->i915)) {
@@ -487,7 +491,7 @@ int live_rps_control(void *arg)
break;
}
}
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
igt_spinner_fini(&spin);
@@ -537,8 +541,8 @@ static u64 __measure_frequency(u32 *cntr, int duration_ms)
{
u64 dc, dt;
- dt = ktime_get();
dc = READ_ONCE(*cntr);
+ dt = ktime_get();
usleep_range(1000 * duration_ms, 2000 * duration_ms);
dc = READ_ONCE(*cntr) - dc;
dt = ktime_get() - dt;
@@ -566,8 +570,8 @@ static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
{
u64 dc, dt;
- dt = ktime_get();
dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
+ dt = ktime_get();
usleep_range(1000 * duration_ms, 2000 * duration_ms);
dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
dt = ktime_get() - dt;
@@ -1022,6 +1026,7 @@ int live_rps_interrupt(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
+ intel_wakeref_t wakeref;
u32 pm_events;
int err = 0;
@@ -1032,9 +1037,9 @@ int live_rps_interrupt(void *arg)
if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
return 0;
- intel_gt_pm_get(gt);
- pm_events = rps->pm_events;
- intel_gt_pm_put(gt);
+ pm_events = 0;
+ with_intel_gt_pm(gt, wakeref)
+ pm_events = rps->pm_events;
if (!pm_events) {
pr_err("No RPS PM events registered, but RPS is enabled?\n");
return -ENODEV;
@@ -1094,8 +1099,8 @@ static u64 __measure_power(int duration_ms)
{
u64 dE, dt;
- dt = ktime_get();
dE = librapl_energy_uJ();
+ dt = ktime_get();
usleep_range(1000 * duration_ms, 2000 * duration_ms);
dE = librapl_energy_uJ() - dE;
dt = ktime_get() - dt;
@@ -1111,7 +1116,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq)
for (i = 0; i < 5; i++)
x[i] = __measure_power(5);
- *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
+ *freq = (*freq + read_cagf(rps)) / 2;
/* A simple triangle filter for better result stability */
sort(x, 5, sizeof(*x), cmp_u64, NULL);
@@ -1121,6 +1126,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq)
static u64 measure_power_at(struct intel_rps *rps, int *freq)
{
*freq = rps_set_check(rps, *freq);
+ msleep(100);
return measure_power(rps, freq);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index bd44ce73a504..e61bb0bad12c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -53,7 +53,7 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
static int slpc_set_freq(struct intel_gt *gt, u32 freq)
{
int err;
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
err = slpc_set_max_freq(slpc, freq);
if (err) {
@@ -70,6 +70,46 @@ static int slpc_set_freq(struct intel_gt *gt, u32 freq)
return err;
}
+static int slpc_restore_freq(struct intel_guc_slpc *slpc, u32 min, u32 max)
+{
+ int err;
+
+ err = slpc_set_max_freq(slpc, max);
+ if (err) {
+ pr_err("Unable to restore max freq");
+ return err;
+ }
+
+ err = slpc_set_min_freq(slpc, min);
+ if (err) {
+ pr_err("Unable to restore min freq");
+ return err;
+ }
+
+ err = intel_guc_slpc_set_ignore_eff_freq(slpc, false);
+ if (err) {
+ pr_err("Unable to restore efficient freq");
+ return err;
+ }
+
+ return 0;
+}
+
+static u64 slpc_measure_power(struct intel_rps *rps, int *freq)
+{
+ u64 x[5];
+ int i;
+
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_power(5);
+
+ *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power)
{
int err = 0;
@@ -78,7 +118,7 @@ static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power)
if (err)
return err;
*freq = intel_rps_read_actual_frequency(&gt->rps);
- *power = measure_power(&gt->rps, freq);
+ *power = slpc_measure_power(&gt->rps, freq);
return err;
}
@@ -157,7 +197,7 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine)
{
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
struct {
u64 power;
int freq;
@@ -237,10 +277,11 @@ static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
static int run_test(struct intel_gt *gt, int test_type)
{
- struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct intel_guc_slpc *slpc = &gt_to_guc(gt)->slpc;
struct intel_rps *rps = &gt->rps;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
struct igt_spinner spin;
u32 slpc_min_freq, slpc_max_freq;
int err = 0;
@@ -268,8 +309,7 @@ static int run_test(struct intel_gt *gt, int test_type)
/*
* Set min frequency to RPn so that we can test the whole
- * range of RPn-RP0. This also turns off efficient freq
- * usage and makes results more predictable.
+ * range of RPn-RP0.
*/
err = slpc_set_min_freq(slpc, slpc->min_freq);
if (err) {
@@ -277,8 +317,17 @@ static int run_test(struct intel_gt *gt, int test_type)
return err;
}
+ /*
+ * Turn off efficient frequency so RPn/RP0 ranges are obeyed.
+ */
+ err = intel_guc_slpc_set_ignore_eff_freq(slpc, true);
+ if (err) {
+ pr_err("Unable to turn off efficient freq!");
+ return err;
+ }
+
intel_gt_pm_wait_for_idle(gt);
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
for_each_engine(engine, gt, id) {
struct i915_request *rq;
u32 max_act_freq;
@@ -358,14 +407,13 @@ static int run_test(struct intel_gt *gt, int test_type)
break;
}
- /* Restore min/max frequencies */
- slpc_set_max_freq(slpc, slpc_max_freq);
- slpc_set_min_freq(slpc, slpc_min_freq);
+ /* Restore min/max/efficient frequencies */
+ err = slpc_restore_freq(slpc, slpc_min_freq, slpc_max_freq);
if (igt_flush_test(gt->i915))
err = -EIO;
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
igt_spinner_fini(&spin);
intel_gt_pm_wait_for_idle(gt);
@@ -456,7 +504,7 @@ static int live_slpc_tile_interaction(void *arg)
return -ENOMEM;
for_each_gt(gt, i915, i) {
- threads[i].worker = kthread_create_worker(0, "igt/slpc_parallel:%d", gt->info.id);
+ threads[i].worker = kthread_run_worker(0, "igt/slpc_parallel:%d", gt->info.id);
if (IS_ERR(threads[i].worker)) {
ret = PTR_ERR(threads[i].worker);
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 522d0190509c..fa36cf920bde 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -459,12 +459,12 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
if (IS_ERR(cs))
return PTR_ERR(cs);
- if (GRAPHICS_VER(rq->engine->i915) >= 8) {
+ if (GRAPHICS_VER(rq->i915) >= 8) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = addr;
*cs++ = 0;
*cs++ = value;
- } else if (GRAPHICS_VER(rq->engine->i915) >= 4) {
+ } else if (GRAPHICS_VER(rq->i915) >= 4) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = 0;
*cs++ = addr;
@@ -825,7 +825,8 @@ static bool cmp_gte(u32 a, u32 b)
return a >= b;
}
-static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
+static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt,
+ struct intel_timeline *tl)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
@@ -834,7 +835,10 @@ static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
if (IS_ERR(obj))
return PTR_ERR(obj);
- w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+ /* keep the same cache settings as timeline */
+ i915_gem_object_set_pat_index(obj, tl->hwsp_ggtt->obj->pat_index);
+ w->map = i915_gem_object_pin_map_unlocked(obj,
+ page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping));
if (IS_ERR(w->map)) {
i915_gem_object_put(obj);
return PTR_ERR(w->map);
@@ -1004,8 +1008,10 @@ static int live_hwsp_read(void *arg)
if (!tl->has_initial_breadcrumb)
goto out_free;
+ selftest_tl_pin(tl);
+
for (i = 0; i < ARRAY_SIZE(watcher); i++) {
- err = setup_watcher(&watcher[i], gt);
+ err = setup_watcher(&watcher[i], gt, tl);
if (err)
goto out;
}
@@ -1160,6 +1166,8 @@ out:
for (i = 0; i < ARRAY_SIZE(watcher); i++)
cleanup_watcher(&watcher[i]);
+ intel_timeline_unpin(tl);
+
if (igt_flush_test(gt->i915))
err = -EIO;
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
new file mode 100644
index 000000000000..a5184f09d1de
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
+#include "gem/i915_gem_region.h"
+
+#include "gen8_engine_cs.h"
+#include "i915_gem_ww.h"
+#include "i915_selftest.h"
+#include "i915_wait_util.h"
+#include "intel_context.h"
+#include "intel_engine_regs.h"
+#include "intel_gpu_commands.h"
+#include "intel_gt.h"
+#include "intel_ring.h"
+
+#include "selftests/igt_flush_test.h"
+#include "selftests/i915_random.h"
+
+static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val)
+{
+ GEM_BUG_ON(addr < i915_vma_offset(vma));
+ GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val));
+ memset64(page_mask_bits(vma->obj->mm.mapping) +
+ (addr - i915_vma_offset(vma)), val, 1);
+}
+
+static int
+pte_tlbinv(struct intel_context *ce,
+ struct i915_vma *va,
+ struct i915_vma *vb,
+ u64 align,
+ void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length),
+ u64 length,
+ struct rnd_state *prng)
+{
+ const unsigned int pat_index =
+ i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE);
+ struct drm_i915_gem_object *batch;
+ struct drm_mm_node vb_node;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u64 addr;
+ int err;
+ u32 *cs;
+
+ batch = i915_gem_object_create_internal(ce->vm->i915, 4096);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ vma = i915_vma_instance(batch, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto out;
+
+ /* Pin va at random but aligned offset after vma */
+ addr = round_up(vma->node.start + vma->node.size, align);
+ /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */
+ addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)),
+ va->size, align);
+ err = i915_vma_pin(va, 0, 0, addr | PIN_OFFSET_FIXED | PIN_USER);
+ if (err) {
+ pr_err("Cannot pin at %llx+%llx\n", addr, va->size);
+ goto out;
+ }
+ GEM_BUG_ON(i915_vma_offset(va) != addr);
+ if (vb != va) {
+ vb_node = vb->node;
+ vb->node = va->node; /* overwrites the _same_ PTE */
+ }
+
+ /*
+ * Now choose random dword at the 1st pinned page.
+ *
+ * SZ_64K pages on dg1 require that the whole PT be marked
+ * containing 64KiB entries. So we make sure that vma
+ * covers the whole PT, despite being randomly aligned to 64KiB
+ * and restrict our sampling to the 2MiB PT within where
+ * we know that we will be using 64KiB pages.
+ */
+ if (align == SZ_64K)
+ addr = round_up(addr, SZ_2M);
+ addr = igt_random_offset(prng, addr, addr + align, 8, 8);
+
+ if (va != vb)
+ pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n",
+ ce->engine->name, va->obj->mm.region->name ?: "smem",
+ addr, align, va->resource->page_sizes_gtt,
+ va->page_sizes.phys, va->page_sizes.sg,
+ addr & -length, length);
+
+ cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC);
+ *cs++ = MI_NOOP; /* for later termination */
+ /*
+ * Sample the target to see if we spot the updated backing store.
+ * Gen8 VCS compares immediate value with bitwise-and of two
+ * consecutive DWORDS pointed by addr, other gen/engines compare value
+ * with DWORD pointed by addr. Moreover we want to exercise DWORD size
+ * invalidations. To fulfill all these requirements below values
+ * have been chosen.
+ */
+ *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
+ *cs++ = 0; /* break if *addr == 0 */
+ *cs++ = lower_32_bits(addr);
+ *cs++ = upper_32_bits(addr);
+ vma_set_qw(va, addr, -1);
+ vma_set_qw(vb, addr, 0);
+
+ /* Keep sampling until we get bored */
+ *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1;
+ *cs++ = lower_32_bits(i915_vma_offset(vma));
+ *cs++ = upper_32_bits(i915_vma_offset(vma));
+
+ i915_gem_object_flush_map(batch);
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_va;
+ }
+
+ err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
+ if (err) {
+ i915_request_add(rq);
+ goto out_va;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ /*
+ * Short sleep to sanitycheck the batch is spinning before we begin.
+ * FIXME: Why is GSC so slow?
+ */
+ if (ce->engine->class == OTHER_CLASS)
+ msleep(200);
+ else
+ usleep_range(10000, 20000);
+
+ if (va == vb) {
+ if (!i915_request_completed(rq)) {
+ pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
+ ce->engine->name, va->obj->mm.region->name ?: "smem",
+ addr, align, va->resource->page_sizes_gtt,
+ va->page_sizes.phys, va->page_sizes.sg);
+ err = -EIO;
+ }
+ } else if (!i915_request_completed(rq)) {
+ struct i915_vma_resource vb_res = {
+ .bi.pages = vb->obj->mm.pages,
+ .bi.page_sizes = vb->obj->mm.page_sizes,
+ .start = i915_vma_offset(vb),
+ .vma_size = i915_vma_size(vb)
+ };
+ unsigned int pte_flags = 0;
+
+ /* Flip the PTE between A and B */
+ if (i915_gem_object_is_lmem(vb->obj))
+ pte_flags |= PTE_LM;
+ ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
+
+ /* Flush the PTE update to concurrent HW */
+ tlbinv(ce->vm, addr & -length, length);
+
+ if (wait_for(i915_request_completed(rq), HZ / 2)) {
+ pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n",
+ ce->engine->name);
+ err = -EINVAL;
+ }
+ } else {
+ pr_err("Spinner ended unexpectedly\n");
+ err = -EIO;
+ }
+ i915_request_put(rq);
+
+ cs = page_mask_bits(batch->mm.mapping);
+ *cs = MI_BATCH_BUFFER_END;
+ wmb();
+
+out_va:
+ if (vb != va)
+ vb->node = vb_node;
+ i915_vma_unpin(va);
+ if (i915_vma_unbind_unlocked(va))
+ err = -EIO;
+out:
+ i915_gem_object_put(batch);
+ return err;
+}
+
+static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
+{
+ struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
+ resource_size_t size = SZ_1G;
+
+ /*
+ * Allocation of largest possible page size allows to test all types
+ * of pages. To succeed with both allocations, especially in case of Small
+ * BAR, try to allocate no more than quarter of mappable memory.
+ */
+ if (mr && size > resource_size(&mr->io) / 4)
+ size = resource_size(&mr->io) / 4;
+
+ return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
+}
+
+static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
+{
+ /*
+ * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1).
+ * While that does not require the whole 2M block to be contiguous
+ * it is easier to make it so, since we need that for SZ_2M pagees.
+ * Since we randomly offset the start of the vma, we need a 4M object
+ * so that there is a 2M range within it is suitable for SZ_64K PTE.
+ */
+ return i915_gem_object_create_internal(gt->i915, SZ_4M);
+}
+
+static int
+mem_tlbinv(struct intel_gt *gt,
+ struct drm_i915_gem_object *(*create_fn)(struct intel_gt *),
+ void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length))
+{
+ unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size;
+ struct intel_engine_cs *engine;
+ struct drm_i915_gem_object *A, *B;
+ struct i915_ppgtt *ppgtt;
+ struct i915_vma *va, *vb;
+ enum intel_engine_id id;
+ I915_RND_STATE(prng);
+ void *vaddr;
+ int err;
+
+ /*
+ * Check that the TLB invalidate is able to revoke an active
+ * page. We load a page into a spinning COND_BBE loop and then
+ * remap that page to a new physical address. The old address, and
+ * so the loop keeps spinning, is retained in the TLB cache until
+ * we issue an invalidate.
+ */
+
+ A = create_fn(gt);
+ if (IS_ERR(A))
+ return PTR_ERR(A);
+
+ vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto out_a;
+ }
+
+ B = create_fn(gt);
+ if (IS_ERR(B)) {
+ err = PTR_ERR(B);
+ goto out_a;
+ }
+
+ vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto out_b;
+ }
+
+ GEM_BUG_ON(A->base.size != B->base.size);
+ if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1))
+ pr_warn("Failed to allocate contiguous pages for size %zx\n",
+ A->base.size);
+
+ ppgtt = i915_ppgtt_create(gt, 0);
+ if (IS_ERR(ppgtt)) {
+ err = PTR_ERR(ppgtt);
+ goto out_b;
+ }
+
+ va = i915_vma_instance(A, &ppgtt->vm, NULL);
+ if (IS_ERR(va)) {
+ err = PTR_ERR(va);
+ goto out_vm;
+ }
+
+ vb = i915_vma_instance(B, &ppgtt->vm, NULL);
+ if (IS_ERR(vb)) {
+ err = PTR_ERR(vb);
+ goto out_vm;
+ }
+
+ err = 0;
+ for_each_engine(engine, gt, id) {
+ struct i915_gem_ww_ctx ww;
+ struct intel_context *ce;
+ int bit;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ i915_vm_put(ce->vm);
+ ce->vm = i915_vm_get(&ppgtt->vm);
+
+ for_i915_gem_ww(&ww, err, true)
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto err_put;
+
+ for_each_set_bit(bit,
+ (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes,
+ BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) {
+ unsigned int len;
+
+ if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj))
+ continue;
+
+ /* sanitycheck the semaphore wake up */
+ err = pte_tlbinv(ce, va, va,
+ BIT_ULL(bit),
+ NULL, SZ_4K,
+ &prng);
+ if (err)
+ goto err_unpin;
+
+ for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) {
+ err = pte_tlbinv(ce, va, vb,
+ BIT_ULL(bit),
+ tlbinv,
+ BIT_ULL(len),
+ &prng);
+ if (err)
+ goto err_unpin;
+ if (len == ppgtt_size)
+ break;
+ }
+ }
+err_unpin:
+ intel_context_unpin(ce);
+err_put:
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+out_vm:
+ i915_vm_put(&ppgtt->vm);
+out_b:
+ i915_gem_object_put(B);
+out_a:
+ i915_gem_object_put(A);
+ return err;
+}
+
+static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
+{
+ intel_gt_invalidate_tlb_full(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
+}
+
+static int invalidate_full(void *arg)
+{
+ struct intel_gt *gt = arg;
+ int err;
+
+ if (GRAPHICS_VER(gt->i915) < 8)
+ return 0; /* TLB invalidate not implemented */
+
+ err = mem_tlbinv(gt, create_smem, tlbinv_full);
+ if (err == 0)
+ err = mem_tlbinv(gt, create_lmem, tlbinv_full);
+ if (err == -ENODEV || err == -ENXIO)
+ err = 0;
+
+ return err;
+}
+
+int intel_tlb_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(invalidate_full),
+ };
+ struct intel_gt *gt;
+ unsigned int i;
+
+ for_each_gt(gt, i915, i) {
+ int err;
+
+ if (intel_gt_is_wedged(gt))
+ continue;
+
+ err = intel_gt_live_subtests(tests, gt);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README
index e7e96d7073c7..22f8dabed434 100644
--- a/drivers/gpu/drm/i915/gt/shaders/README
+++ b/drivers/gpu/drm/i915/gt/shaders/README
@@ -10,7 +10,7 @@ i915/gt/shaders/clear_kernel directory.
The generated .c files should never be modified directly. Instead, any modification
needs to be done on the on their respective ASM files and build instructions below
-needes to be followed.
+needs to be followed.
Building
========
@@ -24,7 +24,7 @@ on building.
Please make sure your Mesa tool is compiled with "-Dtools=intel" and
"-Ddri-drivers=i965", and run this script from IGT source root directory"
-The instructions bellow assume:
+The instructions below assume:
* IGT gpu tools source code is located on your home directory (~) as ~/igt
* Mesa source code is located on your home directory (~) as ~/mesa
and built under the ~/mesa/build directory
@@ -43,4 +43,4 @@ igt $ ./scripts/generate_clear_kernel.sh -g ivb \
~/igt/lib/i915/shaders/clear_kernel/hsw.asm
~ $ cd ~/igt
igt $ ./scripts/generate_clear_kernel.sh -g hsw \
- -m ~/mesa/build/src/intel/tools/i965_asm \ No newline at end of file
+ -m ~/mesa/build/src/intel/tools/i965_asm
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
index 5fdf384bb621..6c0c89daf96c 100644
--- a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
@@ -24,7 +24,7 @@ mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
* DW 1.4 - Rsvd (intended for context ID)
* DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
* DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
- * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (intended for Total Thread Count)
*
* Binding Table
*
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
index 97c7ac9e3854..27c28e63d6cc 100644
--- a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
@@ -24,7 +24,7 @@ mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
* DW 1.4 - Rsvd (intended for context ID)
* DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
* DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
- * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (intended for Total Thread Count)
*
* Binding Table
*
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index 449c9ed44382..365c4b8b04f4 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/shmem_fs.h>
+#include <linux/vmalloc.h>
#include "i915_drv.h"
#include "gem/i915_gem_object.h"
@@ -33,18 +34,17 @@ struct file *shmem_create_from_data(const char *name, void *data, size_t len)
struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
enum i915_map_type map_type;
struct file *file;
void *ptr;
if (i915_gem_object_is_shmem(obj)) {
file = obj->base.filp;
- atomic_long_inc(&file->f_count);
+ get_file(file);
return file;
}
- map_type = i915_coherent_map_type(i915, obj, true);
+ map_type = i915_gem_object_is_lmem(obj) ? I915_MAP_WC : I915_MAP_WB;
ptr = i915_gem_object_pin_map_unlocked(obj, map_type);
if (IS_ERR(ptr))
return ERR_CAST(ptr);
@@ -108,7 +108,7 @@ static int __shmem_rw(struct file *file, loff_t off,
if (IS_ERR(page))
return PTR_ERR(page);
- vaddr = kmap(page);
+ vaddr = kmap_local_page(page);
if (write) {
memcpy(vaddr + offset_in_page(off), ptr, this);
set_page_dirty(page);
@@ -116,7 +116,7 @@ static int __shmem_rw(struct file *file, loff_t off,
memcpy(ptr, vaddr + offset_in_page(off), this);
}
mark_page_accessed(page);
- kunmap(page);
+ kunmap_local(vaddr);
put_page(page);
len -= this;
@@ -143,11 +143,11 @@ int shmem_read_to_iosys_map(struct file *file, loff_t off,
if (IS_ERR(page))
return PTR_ERR(page);
- vaddr = kmap(page);
+ vaddr = kmap_local_page(page);
iosys_map_memcpy_to(map, map_off, vaddr + offset_in_page(off),
this);
mark_page_accessed(page);
- kunmap(page);
+ kunmap_local(vaddr);
put_page(page);
len -= this;
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index f2d9858d827c..4a81bc396b21 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -7,6 +7,7 @@
#include <linux/sysfs.h>
#include "i915_drv.h"
+#include "i915_timer_util.h"
#include "intel_engine.h"
#include "intel_engine_heartbeat.h"
#include "sysfs_engines.h"
@@ -24,37 +25,37 @@ static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj)
static ssize_t
name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name);
+ return sysfs_emit(buf, "%s\n", kobj_to_engine(kobj)->name);
}
-static struct kobj_attribute name_attr =
+static const struct kobj_attribute name_attr =
__ATTR(name, 0444, name_show, NULL);
static ssize_t
class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
+ return sysfs_emit(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
}
-static struct kobj_attribute class_attr =
+static const struct kobj_attribute class_attr =
__ATTR(class, 0444, class_show, NULL);
static ssize_t
inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
+ return sysfs_emit(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
}
-static struct kobj_attribute inst_attr =
+static const struct kobj_attribute inst_attr =
__ATTR(instance, 0444, inst_show, NULL);
static ssize_t
mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base);
+ return sysfs_emit(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base);
}
-static struct kobj_attribute mmio_attr =
+static const struct kobj_attribute mmio_attr =
__ATTR(mmio_base, 0444, mmio_show, NULL);
static const char * const vcs_caps[] = {
@@ -107,11 +108,9 @@ __caps_show(struct intel_engine_cs *engine,
for_each_set_bit(n, &caps, show_unknown ? BITS_PER_LONG : count) {
if (n >= count || !repr[n]) {
if (GEM_WARN_ON(show_unknown))
- len += snprintf(buf + len, PAGE_SIZE - len,
- "[%x] ", n);
+ len += sysfs_emit_at(buf, len, "[%x] ", n);
} else {
- len += snprintf(buf + len, PAGE_SIZE - len,
- "%s ", repr[n]);
+ len += sysfs_emit_at(buf, len, "%s ", repr[n]);
}
if (GEM_WARN_ON(len >= PAGE_SIZE))
break;
@@ -127,7 +126,7 @@ caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
return __caps_show(engine, engine->uabi_capabilities, buf, true);
}
-static struct kobj_attribute caps_attr =
+static const struct kobj_attribute caps_attr =
__ATTR(capabilities, 0444, caps_show, NULL);
static ssize_t
@@ -136,7 +135,7 @@ all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
return __caps_show(kobj_to_engine(kobj), -1, buf, false);
}
-static struct kobj_attribute all_caps_attr =
+static const struct kobj_attribute all_caps_attr =
__ATTR(known_capabilities, 0444, all_caps_show, NULL);
static ssize_t
@@ -182,10 +181,10 @@ max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns);
+ return sysfs_emit(buf, "%lu\n", engine->props.max_busywait_duration_ns);
}
-static struct kobj_attribute max_spin_attr =
+static const struct kobj_attribute max_spin_attr =
__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
static ssize_t
@@ -193,10 +192,10 @@ max_spin_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->defaults.max_busywait_duration_ns);
+ return sysfs_emit(buf, "%lu\n", engine->defaults.max_busywait_duration_ns);
}
-static struct kobj_attribute max_spin_def =
+static const struct kobj_attribute max_spin_def =
__ATTR(max_busywait_duration_ns, 0444, max_spin_default, NULL);
static ssize_t
@@ -236,10 +235,10 @@ timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->props.timeslice_duration_ms);
+ return sysfs_emit(buf, "%lu\n", engine->props.timeslice_duration_ms);
}
-static struct kobj_attribute timeslice_duration_attr =
+static const struct kobj_attribute timeslice_duration_attr =
__ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
static ssize_t
@@ -247,10 +246,10 @@ timeslice_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->defaults.timeslice_duration_ms);
+ return sysfs_emit(buf, "%lu\n", engine->defaults.timeslice_duration_ms);
}
-static struct kobj_attribute timeslice_duration_def =
+static const struct kobj_attribute timeslice_duration_def =
__ATTR(timeslice_duration_ms, 0444, timeslice_default, NULL);
static ssize_t
@@ -287,10 +286,10 @@ stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms);
+ return sysfs_emit(buf, "%lu\n", engine->props.stop_timeout_ms);
}
-static struct kobj_attribute stop_timeout_attr =
+static const struct kobj_attribute stop_timeout_attr =
__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
static ssize_t
@@ -298,10 +297,10 @@ stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->defaults.stop_timeout_ms);
+ return sysfs_emit(buf, "%lu\n", engine->defaults.stop_timeout_ms);
}
-static struct kobj_attribute stop_timeout_def =
+static const struct kobj_attribute stop_timeout_def =
__ATTR(stop_timeout_ms, 0444, stop_default, NULL);
static ssize_t
@@ -343,10 +342,10 @@ preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr,
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->props.preempt_timeout_ms);
+ return sysfs_emit(buf, "%lu\n", engine->props.preempt_timeout_ms);
}
-static struct kobj_attribute preempt_timeout_attr =
+static const struct kobj_attribute preempt_timeout_attr =
__ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store);
static ssize_t
@@ -355,10 +354,10 @@ preempt_timeout_default(struct kobject *kobj, struct kobj_attribute *attr,
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->defaults.preempt_timeout_ms);
+ return sysfs_emit(buf, "%lu\n", engine->defaults.preempt_timeout_ms);
}
-static struct kobj_attribute preempt_timeout_def =
+static const struct kobj_attribute preempt_timeout_def =
__ATTR(preempt_timeout_ms, 0444, preempt_timeout_default, NULL);
static ssize_t
@@ -399,10 +398,10 @@ heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->props.heartbeat_interval_ms);
+ return sysfs_emit(buf, "%lu\n", engine->props.heartbeat_interval_ms);
}
-static struct kobj_attribute heartbeat_interval_attr =
+static const struct kobj_attribute heartbeat_interval_attr =
__ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store);
static ssize_t
@@ -410,10 +409,10 @@ heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- return sprintf(buf, "%lu\n", engine->defaults.heartbeat_interval_ms);
+ return sysfs_emit(buf, "%lu\n", engine->defaults.heartbeat_interval_ms);
}
-static struct kobj_attribute heartbeat_interval_def =
+static const struct kobj_attribute heartbeat_interval_def =
__ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL);
static void kobj_engine_release(struct kobject *kobj)
@@ -421,7 +420,7 @@ static void kobj_engine_release(struct kobject *kobj)
kfree(kobj);
}
-static struct kobj_type kobj_engine_type = {
+static const struct kobj_type kobj_engine_type = {
.release = kobj_engine_release,
.sysfs_ops = &kobj_sysfs_ops
};
@@ -449,7 +448,7 @@ kobj_engine(struct kobject *dir, struct intel_engine_cs *engine)
static void add_defaults(struct kobj_engine *parent)
{
- static const struct attribute *files[] = {
+ static const struct attribute * const files[] = {
&max_spin_def.attr,
&stop_timeout_def.attr,
#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
@@ -485,7 +484,7 @@ static void add_defaults(struct kobj_engine *parent)
void intel_engines_add_sysfs(struct drm_i915_private *i915)
{
- static const struct attribute *files[] = {
+ static const struct attribute * const files[] = {
&name_attr.attr,
&class_attr.attr,
&inst_attr.attr,
@@ -532,9 +531,8 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
err_object:
kobject_put(kobj);
err_engine:
- dev_err(kdev, "Failed to add sysfs engine '%s'\n",
- engine->name);
- break;
+ dev_warn(kdev, "Failed to add sysfs engine '%s'\n",
+ engine->name);
}
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index f359bef046e0..33f253410d0c 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -138,6 +138,8 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+ INTEL_GUC_ACTION_TLB_INVALIDATION = 0x7000,
+ INTEL_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
@@ -181,4 +183,35 @@ enum intel_guc_state_capture_event_status {
#define INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x000000FF
+#define INTEL_GUC_TLB_INVAL_TYPE_MASK REG_GENMASK(7, 0)
+#define INTEL_GUC_TLB_INVAL_MODE_MASK REG_GENMASK(11, 8)
+#define INTEL_GUC_TLB_INVAL_FLUSH_CACHE REG_BIT(31)
+
+enum intel_guc_tlb_invalidation_type {
+ INTEL_GUC_TLB_INVAL_ENGINES = 0x0,
+ INTEL_GUC_TLB_INVAL_GUC = 0x3,
+};
+
+/*
+ * 0: Heavy mode of Invalidation:
+ * The pipeline of the engine(s) for which the invalidation is targeted to is
+ * blocked, and all the in-flight transactions are guaranteed to be Globally
+ * Observed before completing the TLB invalidation
+ * 1: Lite mode of Invalidation:
+ * TLBs of the targeted engine(s) are immediately invalidated.
+ * In-flight transactions are NOT guaranteed to be Globally Observed before
+ * completing TLB invalidation.
+ * Light Invalidation Mode is to be used only when
+ * it can be guaranteed (by SW) that the address translations remain invariant
+ * for the in-flight transactions across the TLB invalidation. In other words,
+ * this mode can be used when the TLB invalidation is intended to clear out the
+ * stale cached translations that are no longer in use. Light Invalidation Mode
+ * is much faster than the Heavy Invalidation Mode, as it does not wait for the
+ * in-flight transactions to be GOd.
+ */
+enum intel_guc_tlb_inval_mode {
+ INTEL_GUC_TLB_INVAL_MODE_HEAVY = 0x0,
+ INTEL_GUC_TLB_INVAL_MODE_LITE = 0x1,
+};
+
#endif /* _ABI_GUC_ACTIONS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
index 811add10c30d..6de87ae5669e 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
@@ -207,6 +207,32 @@ struct slpc_shared_data {
u8 reserved_mode_definition[4096];
} __packed;
+struct slpc_context_frequency_request {
+ u32 frequency_request:16;
+ u32 reserved:12;
+ u32 is_compute:1;
+ u32 ignore_busyness:1;
+ u32 is_minimum:1;
+ u32 is_predefined:1;
+} __packed;
+
+#define SLPC_CTX_FREQ_REQ_IS_COMPUTE REG_BIT(28)
+
+struct slpc_optimized_strategies {
+ u32 compute:1;
+ u32 async_flip:1;
+ u32 media:1;
+ u32 vsync_flip:1;
+ u32 reserved:28;
+} __packed;
+
+#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0)
+
+enum slpc_power_profiles {
+ SLPC_POWER_PROFILES_BASE = 0x0,
+ SLPC_POWER_PROFILES_POWER_SAVING = 0x1
+};
+
/**
* DOC: SLPC H2G MESSAGE FORMAT
*
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
index 28b8387f97b7..f7d70db16d76 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
@@ -167,25 +167,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
* - **flags**, holds various bits to control message handling
*/
-/*
- * Definition of the command transport message header (DW0)
- *
- * bit[4..0] message len (in dwords)
- * bit[7..5] reserved
- * bit[8] response (G2H only)
- * bit[8] write fence to desc (H2G only)
- * bit[9] write status to H2G buff (H2G only)
- * bit[10] send status back via G2H (H2G only)
- * bit[15..11] reserved
- * bit[31..16] action code
- */
-#define GUC_CT_MSG_LEN_SHIFT 0
-#define GUC_CT_MSG_LEN_MASK 0x1F
-#define GUC_CT_MSG_IS_RESPONSE (1 << 8)
-#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8)
-#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9)
-#define GUC_CT_MSG_SEND_STATUS (1 << 10)
-#define GUC_CT_MSG_ACTION_SHIFT 16
-#define GUC_CT_MSG_ACTION_MASK 0xFFFF
-
#endif /* _ABI_GUC_COMMUNICATION_CTB_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index 8085fb181274..00d6402333f8 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -21,6 +21,9 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02,
INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03,
INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04,
+ INTEL_GUC_LOAD_STATUS_HWCONFIG_START = 0x05,
+ INTEL_GUC_LOAD_STATUS_HWCONFIG_DONE = 0x06,
+ INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07,
INTEL_GUC_LOAD_STATUS_GDT_DONE = 0x10,
INTEL_GUC_LOAD_STATUS_IDT_DONE = 0x20,
INTEL_GUC_LOAD_STATUS_LAPIC_DONE = 0x30,
@@ -33,9 +36,25 @@ enum intel_guc_load_status {
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73,
INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74,
+ INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR = 0x75,
INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
INTEL_GUC_LOAD_STATUS_READY = 0xF0,
};
+enum intel_bootrom_load_status {
+ INTEL_BOOTROM_STATUS_NO_KEY_FOUND = 0x13,
+ INTEL_BOOTROM_STATUS_AES_PROD_KEY_FOUND = 0x1A,
+ INTEL_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE = 0x2B,
+ INTEL_BOOTROM_STATUS_RSA_FAILED = 0x50,
+ INTEL_BOOTROM_STATUS_PAVPC_FAILED = 0x73,
+ INTEL_BOOTROM_STATUS_WOPCM_FAILED = 0x74,
+ INTEL_BOOTROM_STATUS_LOADLOC_FAILED = 0x75,
+ INTEL_BOOTROM_STATUS_JUMP_PASSED = 0x76,
+ INTEL_BOOTROM_STATUS_JUMP_FAILED = 0x77,
+ INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED = 0x79,
+ INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT = 0x7A,
+ INTEL_BOOTROM_STATUS_EXCEPTION = 0x7E,
+};
+
#endif /* _ABI_GUC_ERRORS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
index 58012edd4eb0..0c709e6c15be 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -29,9 +29,9 @@
*/
#define GUC_KLV_LEN_MIN 1u
-#define GUC_KLV_0_KEY (0xffff << 16)
-#define GUC_KLV_0_LEN (0xffff << 0)
-#define GUC_KLV_n_VALUE (0xffffffff << 0)
+#define GUC_KLV_0_KEY (0xffffu << 16)
+#define GUC_KLV_0_LEN (0xffffu << 0)
+#define GUC_KLV_n_VALUE (0xffffffffu << 0)
/**
* DOC: GuC Self Config KLVs
@@ -101,4 +101,13 @@ enum {
GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
};
+/*
+ * Workaround keys:
+ */
+enum {
+ GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001,
+ GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002,
+ GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE = 0x9006,
+};
+
#endif /* _ABI_GUC_KLVS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
index 7d5ba4d97d70..98eb4f46572b 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h
@@ -24,6 +24,7 @@
* | | 30:28 | **TYPE** - message type |
* | | | - _`GUC_HXG_TYPE_REQUEST` = 0 |
* | | | - _`GUC_HXG_TYPE_EVENT` = 1 |
+ * | | | - _`GUC_HXG_TYPE_FAST_REQUEST` = 2 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 |
* | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 |
@@ -46,6 +47,7 @@
#define GUC_HXG_MSG_0_TYPE (0x7 << 28)
#define GUC_HXG_TYPE_REQUEST 0u
#define GUC_HXG_TYPE_EVENT 1u
+#define GUC_HXG_TYPE_FAST_REQUEST 2u
#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u
#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u
#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u
@@ -90,6 +92,34 @@
#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD
/**
+ * DOC: HXG Fast Request
+ *
+ * The `HXG Request`_ message should be used to initiate asynchronous activity
+ * for which confirmation or return data is not expected.
+ *
+ * If confirmation is required then `HXG Request`_ shall be used instead.
+ *
+ * The recipient of this message may only use `HXG Failure`_ message if it was
+ * unable to accept this request (like invalid data).
+ *
+ * Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN - see `HXG Message`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 - see `HXG Request`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION - see `HXG Request`_ |
+ * +---+-------+--------------------------------------------------------------+
+ * |...| | DATAn - see `HXG Request`_ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+/**
* DOC: HXG Event
*
* The `HXG Event`_ message should be used to initiate asynchronous activity
diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
index 9d589c28f40f..803c0379d97d 100644
--- a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
@@ -12,7 +12,7 @@
struct intel_guc;
struct file;
-/**
+/*
* struct __guc_capture_bufstate
*
* Book-keeping structure used to track read and write pointers
@@ -26,7 +26,7 @@ struct __guc_capture_bufstate {
u32 wr;
};
-/**
+/*
* struct __guc_capture_parsed_output - extracted error capture node
*
* A single unit of extracted error-capture output data grouped together
@@ -58,7 +58,7 @@ struct __guc_capture_parsed_output {
#define GCAP_PARSED_REGLIST_INDEX_ENGINST BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE)
};
-/**
+/*
* struct guc_debug_capture_list_header / struct guc_debug_capture_list
*
* As part of ADS registration, these header structures (followed by
@@ -76,12 +76,12 @@ struct guc_debug_capture_list {
struct guc_mmio_reg regs[];
} __packed;
-/**
+/*
* struct __guc_mmio_reg_descr / struct __guc_mmio_reg_descr_group
*
* intel_guc_capture module uses these structures to maintain static
* tables (per unique platform) that consists of lists of registers
- * (offsets, names, flags,...) that are used at the ADS regisration
+ * (offsets, names, flags,...) that are used at the ADS registration
* time as well as during runtime processing and reporting of error-
* capture states generated by GuC just prior to engine reset events.
*/
@@ -101,7 +101,7 @@ struct __guc_mmio_reg_descr_group {
struct __guc_mmio_reg_descr *extlist; /* only used for steered registers */
};
-/**
+/*
* struct guc_state_capture_header_t / struct guc_state_capture_t /
* guc_state_capture_group_header_t / guc_state_capture_group_t
*
@@ -148,7 +148,7 @@ struct guc_state_capture_group_t {
struct guc_state_capture_t capture_entries[];
} __packed;
-/**
+/*
* struct __guc_capture_ads_cache
*
* A structure to cache register lists that were populated and registered
@@ -187,6 +187,10 @@ struct intel_guc_state_capture {
struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
[GUC_CAPTURE_LIST_TYPE_MAX]
[GUC_MAX_ENGINE_CLASSES];
+
+ /**
+ * @ads_null_cache: ADS null cache.
+ */
void *ads_null_cache;
/**
@@ -196,12 +200,16 @@ struct intel_guc_state_capture {
* dynamically allocate new nodes when receiving the G2H notification
* because the event handlers for all G2H event-processing is called
* by the ct processing worker queue and when that queue is being
- * processed, there is no absoluate guarantee that we are not in the
+ * processed, there is no absolute guarantee that we are not in the
* midst of a GT reset operation (which doesn't allow allocations).
*/
struct list_head cachelist;
#define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
#define PREALLOC_NODES_DEFAULT_NUMREGS 64
+
+ /**
+ * @max_mmio_per_node: Max MMIO per node.
+ */
int max_mmio_per_node;
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h
new file mode 100644
index 000000000000..6d009a905269
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_GSC_BINARY_HEADERS_H_
+#define _INTEL_GSC_BINARY_HEADERS_H_
+
+#include <linux/types.h>
+
+struct intel_gsc_version {
+ u16 major;
+ u16 minor;
+ u16 hotfix;
+ u16 build;
+} __packed;
+
+struct intel_gsc_partition {
+ u32 offset;
+ u32 size;
+} __packed;
+
+struct intel_gsc_layout_pointers {
+ u8 rom_bypass_vector[16];
+
+ /* size of pointers layout not including ROM bypass vector */
+ u16 size;
+
+ /*
+ * bit0: Backup copy of layout pointers exist
+ * bits1-15: reserved
+ */
+ u8 flags;
+
+ u8 reserved;
+
+ u32 crc32;
+
+ struct intel_gsc_partition datap;
+ struct intel_gsc_partition boot1;
+ struct intel_gsc_partition boot2;
+ struct intel_gsc_partition boot3;
+ struct intel_gsc_partition boot4;
+ struct intel_gsc_partition boot5;
+ struct intel_gsc_partition temp_pages;
+} __packed;
+
+/* Boot partition structures */
+struct intel_gsc_bpdt_header {
+ u32 signature;
+#define INTEL_GSC_BPDT_HEADER_SIGNATURE 0x000055AA
+
+ u16 descriptor_count; /* num of entries after the header */
+
+ u8 version;
+ u8 configuration;
+
+ u32 crc32;
+
+ u32 build_version;
+ struct intel_gsc_version tool_version;
+} __packed;
+
+struct intel_gsc_bpdt_entry {
+ /*
+ * Bits 0-15: BPDT entry type
+ * Bits 16-17: reserved
+ * Bit 18: code sub-partition
+ * Bits 19-31: reserved
+ */
+ u32 type;
+#define INTEL_GSC_BPDT_ENTRY_TYPE_MASK GENMASK(15, 0)
+#define INTEL_GSC_BPDT_ENTRY_TYPE_GSC_RBE 0x1
+
+ u32 sub_partition_offset; /* from the base of the BPDT header */
+ u32 sub_partition_size;
+} __packed;
+
+/* Code partition directory (CPD) structures */
+struct intel_gsc_cpd_header_v2 {
+ u32 header_marker;
+#define INTEL_GSC_CPD_HEADER_MARKER 0x44504324
+
+ u32 num_of_entries;
+ u8 header_version;
+ u8 entry_version;
+ u8 header_length; /* in bytes */
+ u8 flags;
+ u32 partition_name;
+ u32 crc32;
+} __packed;
+
+struct intel_gsc_cpd_entry {
+ u8 name[12];
+
+ /*
+ * Bits 0-24: offset from the beginning of the code partition
+ * Bit 25: huffman compressed
+ * Bits 26-31: reserved
+ */
+ u32 offset;
+#define INTEL_GSC_CPD_ENTRY_OFFSET_MASK GENMASK(24, 0)
+#define INTEL_GSC_CPD_ENTRY_HUFFMAN_COMP BIT(25)
+
+ /*
+ * Module/Item length, in bytes. For Huffman-compressed modules, this
+ * refers to the uncompressed size. For software-compressed modules,
+ * this refers to the compressed size.
+ */
+ u32 length;
+
+ u8 reserved[4];
+} __packed;
+
+struct intel_gsc_manifest_header {
+ u32 header_type; /* 0x4 for manifest type */
+ u32 header_length; /* in dwords */
+ u32 header_version;
+ u32 flags;
+ u32 vendor;
+ u32 date;
+ u32 size; /* In dwords, size of entire manifest (header + extensions) */
+ u32 header_id;
+ u32 internal_data;
+ struct intel_gsc_version fw_version;
+ u32 security_version;
+ struct intel_gsc_version meu_kit_version;
+ u32 meu_manifest_version;
+ u8 general_data[4];
+ u8 reserved3[56];
+ u32 modulus_size; /* in dwords */
+ u32 exponent_size; /* in dwords */
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
index e73d4440c5e8..d550eb6edfb8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
@@ -3,31 +3,261 @@
* Copyright © 2022 Intel Corporation
*/
+#include "gem/i915_gem_lmem.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_print.h"
#include "gt/intel_ring.h"
+#include "intel_gsc_binary_headers.h"
#include "intel_gsc_fw.h"
-
-#define GSC_FW_STATUS_REG _MMIO(0x116C40)
-#define GSC_FW_CURRENT_STATE REG_GENMASK(3, 0)
-#define GSC_FW_CURRENT_STATE_RESET 0
-#define GSC_FW_INIT_COMPLETE_BIT REG_BIT(9)
+#include "intel_gsc_uc_heci_cmd_submit.h"
+#include "i915_reg.h"
static bool gsc_is_in_reset(struct intel_uncore *uncore)
{
- u32 fw_status = intel_uncore_read(uncore, GSC_FW_STATUS_REG);
+ u32 fw_status = intel_uncore_read(uncore, HECI_FWSTS(MTL_GSC_HECI1_BASE, 1));
+
+ return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fw_status) ==
+ HECI1_FWSTS1_CURRENT_STATE_RESET;
+}
+
+static u32 gsc_uc_get_fw_status(struct intel_uncore *uncore, bool needs_wakeref)
+{
+ intel_wakeref_t wakeref;
+ u32 fw_status = 0;
+
+ if (needs_wakeref)
+ wakeref = intel_runtime_pm_get(uncore->rpm);
+
+ fw_status = intel_uncore_read(uncore, HECI_FWSTS(MTL_GSC_HECI1_BASE, 1));
+
+ if (needs_wakeref)
+ intel_runtime_pm_put(uncore->rpm, wakeref);
+ return fw_status;
+}
+
+bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc, bool needs_wakeref)
+{
+ return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE,
+ gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore,
+ needs_wakeref)) ==
+ HECI1_FWSTS1_PROXY_STATE_NORMAL;
+}
+
+int intel_gsc_uc_fw_proxy_get_status(struct intel_gsc_uc *gsc)
+{
+ if (!(IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY)))
+ return -ENODEV;
+ if (!intel_uc_fw_is_loadable(&gsc->fw))
+ return -ENODEV;
+ if (__intel_uc_fw_status(&gsc->fw) == INTEL_UC_FIRMWARE_LOAD_FAIL)
+ return -ENOLINK;
+ if (!intel_gsc_uc_fw_proxy_init_done(gsc, true))
+ return -EAGAIN;
- return REG_FIELD_GET(GSC_FW_CURRENT_STATE, fw_status) ==
- GSC_FW_CURRENT_STATE_RESET;
+ return 0;
}
bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc)
{
- struct intel_uncore *uncore = gsc_uc_to_gt(gsc)->uncore;
- u32 fw_status = intel_uncore_read(uncore, GSC_FW_STATUS_REG);
+ return gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore, false) &
+ HECI1_FWSTS1_INIT_COMPLETE;
+}
+
+static inline u32 cpd_entry_offset(const struct intel_gsc_cpd_entry *entry)
+{
+ return entry->offset & INTEL_GSC_CPD_ENTRY_OFFSET_MASK;
+}
+
+int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, size_t size)
+{
+ struct intel_gsc_uc *gsc = container_of(gsc_fw, struct intel_gsc_uc, fw);
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ const struct intel_gsc_layout_pointers *layout = data;
+ const struct intel_gsc_bpdt_header *bpdt_header = NULL;
+ const struct intel_gsc_bpdt_entry *bpdt_entry = NULL;
+ const struct intel_gsc_cpd_header_v2 *cpd_header = NULL;
+ const struct intel_gsc_cpd_entry *cpd_entry = NULL;
+ const struct intel_gsc_manifest_header *manifest;
+ struct intel_uc_fw_ver min_ver = { 0 };
+ size_t min_size = sizeof(*layout);
+ int i;
+
+ if (size < min_size) {
+ gt_err(gt, "GSC FW too small! %zu < %zu\n", size, min_size);
+ return -ENODATA;
+ }
+
+ /*
+ * The GSC binary starts with the pointer layout, which contains the
+ * locations of the various partitions of the binary. The one we're
+ * interested in to get the version is the boot1 partition, where we can
+ * find a BPDT header followed by entries, one of which points to the
+ * RBE sub-section of the partition. From here, we can parse the CPD
+ * header and the following entries to find the manifest location
+ * (entry identified by the "RBEP.man" name), from which we can finally
+ * extract the version.
+ *
+ * --------------------------------------------------
+ * [ intel_gsc_layout_pointers ]
+ * [ ... ]
+ * [ boot1.offset >---------------------------]------o
+ * [ ... ] |
+ * -------------------------------------------------- |
+ * |
+ * -------------------------------------------------- |
+ * [ intel_gsc_bpdt_header ]<-----o
+ * --------------------------------------------------
+ * [ intel_gsc_bpdt_entry[] ]
+ * [ entry1 ]
+ * [ ... ]
+ * [ entryX ]
+ * [ type == GSC_RBE ]
+ * [ offset >-----------------------------]------o
+ * [ ... ] |
+ * -------------------------------------------------- |
+ * |
+ * -------------------------------------------------- |
+ * [ intel_gsc_cpd_header_v2 ]<-----o
+ * --------------------------------------------------
+ * [ intel_gsc_cpd_entry[] ]
+ * [ entry1 ]
+ * [ ... ]
+ * [ entryX ]
+ * [ "RBEP.man" ]
+ * [ ... ]
+ * [ offset >----------------------------]------o
+ * [ ... ] |
+ * -------------------------------------------------- |
+ * |
+ * -------------------------------------------------- |
+ * [ intel_gsc_manifest_header ]<-----o
+ * [ ... ]
+ * [ intel_gsc_version fw_version ]
+ * [ ... ]
+ * --------------------------------------------------
+ */
+
+ min_size = layout->boot1.offset + layout->boot1.size;
+ if (size < min_size) {
+ gt_err(gt, "GSC FW too small for boot section! %zu < %zu\n",
+ size, min_size);
+ return -ENODATA;
+ }
+
+ min_size = sizeof(*bpdt_header);
+ if (layout->boot1.size < min_size) {
+ gt_err(gt, "GSC FW boot section too small for BPDT header: %u < %zu\n",
+ layout->boot1.size, min_size);
+ return -ENODATA;
+ }
+
+ bpdt_header = data + layout->boot1.offset;
+ if (bpdt_header->signature != INTEL_GSC_BPDT_HEADER_SIGNATURE) {
+ gt_err(gt, "invalid signature for BPDT header: 0x%08x!\n",
+ bpdt_header->signature);
+ return -EINVAL;
+ }
+
+ min_size += sizeof(*bpdt_entry) * bpdt_header->descriptor_count;
+ if (layout->boot1.size < min_size) {
+ gt_err(gt, "GSC FW boot section too small for BPDT entries: %u < %zu\n",
+ layout->boot1.size, min_size);
+ return -ENODATA;
+ }
+
+ bpdt_entry = (void *)bpdt_header + sizeof(*bpdt_header);
+ for (i = 0; i < bpdt_header->descriptor_count; i++, bpdt_entry++) {
+ if ((bpdt_entry->type & INTEL_GSC_BPDT_ENTRY_TYPE_MASK) !=
+ INTEL_GSC_BPDT_ENTRY_TYPE_GSC_RBE)
+ continue;
+
+ cpd_header = (void *)bpdt_header + bpdt_entry->sub_partition_offset;
+ min_size = bpdt_entry->sub_partition_offset + sizeof(*cpd_header);
+ break;
+ }
+
+ if (!cpd_header) {
+ gt_err(gt, "couldn't find CPD header in GSC binary!\n");
+ return -ENODATA;
+ }
+
+ if (layout->boot1.size < min_size) {
+ gt_err(gt, "GSC FW boot section too small for CPD header: %u < %zu\n",
+ layout->boot1.size, min_size);
+ return -ENODATA;
+ }
- return fw_status & GSC_FW_INIT_COMPLETE_BIT;
+ if (cpd_header->header_marker != INTEL_GSC_CPD_HEADER_MARKER) {
+ gt_err(gt, "invalid marker for CPD header in GSC bin: 0x%08x!\n",
+ cpd_header->header_marker);
+ return -EINVAL;
+ }
+
+ min_size += sizeof(*cpd_entry) * cpd_header->num_of_entries;
+ if (layout->boot1.size < min_size) {
+ gt_err(gt, "GSC FW boot section too small for CPD entries: %u < %zu\n",
+ layout->boot1.size, min_size);
+ return -ENODATA;
+ }
+
+ cpd_entry = (void *)cpd_header + cpd_header->header_length;
+ for (i = 0; i < cpd_header->num_of_entries; i++, cpd_entry++) {
+ if (strcmp(cpd_entry->name, "RBEP.man") == 0) {
+ manifest = (void *)cpd_header + cpd_entry_offset(cpd_entry);
+ intel_uc_fw_version_from_gsc_manifest(&gsc->release,
+ manifest);
+ gsc->security_version = manifest->security_version;
+ break;
+ }
+ }
+
+ /*
+ * ARL SKUs require newer firmwares, but the blob is actually common
+ * across all MTL and ARL SKUs, so we need to do an explicit version check
+ * here rather than using a separate table entry. If a too old version
+ * is found, then just don't use GSC rather than aborting the driver load.
+ * Note that the major number in the GSC FW version is used to indicate
+ * the platform, so we expect it to always be 102 for MTL/ARL binaries.
+ */
+ if (IS_ARROWLAKE_S(gt->i915))
+ min_ver = (struct intel_uc_fw_ver){ 102, 0, 10, 1878 };
+ else if (IS_ARROWLAKE_H(gt->i915) || IS_ARROWLAKE_U(gt->i915))
+ min_ver = (struct intel_uc_fw_ver){ 102, 1, 15, 1926 };
+
+ if (IS_METEORLAKE(gt->i915) && gsc->release.major != 102) {
+ gt_info(gt, "Invalid GSC firmware for MTL/ARL, got %d.%d.%d.%d but need 102.x.x.x",
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build);
+ return -EINVAL;
+ }
+
+ if (min_ver.major) {
+ bool too_old = false;
+
+ if (gsc->release.minor < min_ver.minor) {
+ too_old = true;
+ } else if (gsc->release.minor == min_ver.minor) {
+ if (gsc->release.patch < min_ver.patch) {
+ too_old = true;
+ } else if (gsc->release.patch == min_ver.patch) {
+ if (gsc->release.build < min_ver.build)
+ too_old = true;
+ }
+ }
+
+ if (too_old) {
+ gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least %d.%d.%d.%d",
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build,
+ min_ver.major, min_ver.minor,
+ min_ver.patch, min_ver.build);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
}
static int emit_gsc_fw_load(struct i915_request *rq, struct intel_gsc_uc *gsc)
@@ -88,9 +318,8 @@ out_rq:
i915_request_put(rq);
if (err)
- drm_err(&gsc_uc_to_gt(gsc)->i915->drm,
- "Request submission for GSC load failed (%d)\n",
- err);
+ gt_err(gsc_uc_to_gt(gsc), "Request submission for GSC load failed %pe\n",
+ ERR_PTR(err));
return err;
}
@@ -98,35 +327,25 @@ out_rq:
static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
{
struct intel_gt *gt = gsc_uc_to_gt(gsc);
- struct drm_i915_private *i915 = gt->i915;
- struct drm_i915_gem_object *obj;
- void *src, *dst;
+ void *src;
if (!gsc->local)
return -ENODEV;
- obj = gsc->local->obj;
-
- if (obj->base.size < gsc->fw.size)
+ if (gsc->local->size < gsc->fw.size)
return -ENOSPC;
- dst = i915_gem_object_pin_map_unlocked(obj,
- i915_coherent_map_type(i915, obj, true));
- if (IS_ERR(dst))
- return PTR_ERR(dst);
-
src = i915_gem_object_pin_map_unlocked(gsc->fw.obj,
- i915_coherent_map_type(i915, gsc->fw.obj, true));
- if (IS_ERR(src)) {
- i915_gem_object_unpin_map(obj);
+ intel_gt_coherent_map_type(gt, gsc->fw.obj, true));
+ if (IS_ERR(src))
return PTR_ERR(src);
- }
- memset(dst, 0, obj->base.size);
- memcpy(dst, src, gsc->fw.size);
+ memcpy_toio(gsc->local_vaddr, src, gsc->fw.size);
+ memset_io(gsc->local_vaddr + gsc->fw.size, 0, gsc->local->size - gsc->fw.size);
+
+ intel_guc_write_barrier(gt_to_guc(gt));
i915_gem_object_unpin_map(gsc->fw.obj);
- i915_gem_object_unpin_map(obj);
return 0;
}
@@ -134,12 +353,94 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
static int gsc_fw_wait(struct intel_gt *gt)
{
return intel_wait_for_register(gt->uncore,
- GSC_FW_STATUS_REG,
- GSC_FW_INIT_COMPLETE_BIT,
- GSC_FW_INIT_COMPLETE_BIT,
+ HECI_FWSTS(MTL_GSC_HECI1_BASE, 1),
+ HECI1_FWSTS1_INIT_COMPLETE,
+ HECI1_FWSTS1_INIT_COMPLETE,
500);
}
+struct intel_gsc_mkhi_header {
+ u8 group_id;
+#define MKHI_GROUP_ID_GFX_SRV 0x30
+
+ u8 command;
+#define MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION (0x42)
+
+ u8 reserved;
+ u8 result;
+} __packed;
+
+struct mtl_gsc_ver_msg_in {
+ struct intel_gsc_mtl_header header;
+ struct intel_gsc_mkhi_header mkhi;
+} __packed;
+
+struct mtl_gsc_ver_msg_out {
+ struct intel_gsc_mtl_header header;
+ struct intel_gsc_mkhi_header mkhi;
+ u16 proj_major;
+ u16 compat_major;
+ u16 compat_minor;
+ u16 reserved[5];
+} __packed;
+
+#define GSC_VER_PKT_SZ SZ_4K
+
+static int gsc_fw_query_compatibility_version(struct intel_gsc_uc *gsc)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct mtl_gsc_ver_msg_in *msg_in;
+ struct mtl_gsc_ver_msg_out *msg_out;
+ struct i915_vma *vma;
+ u64 offset;
+ void *vaddr;
+ int err;
+
+ err = intel_guc_allocate_and_map_vma(gt_to_guc(gt), GSC_VER_PKT_SZ * 2,
+ &vma, &vaddr);
+ if (err) {
+ gt_err(gt, "failed to allocate vma for GSC version query\n");
+ return err;
+ }
+
+ offset = i915_ggtt_offset(vma);
+ msg_in = vaddr;
+ msg_out = vaddr + GSC_VER_PKT_SZ;
+
+ intel_gsc_uc_heci_cmd_emit_mtl_header(&msg_in->header,
+ HECI_MEADDRESS_MKHI,
+ sizeof(*msg_in), 0);
+ msg_in->mkhi.group_id = MKHI_GROUP_ID_GFX_SRV;
+ msg_in->mkhi.command = MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION;
+
+ err = intel_gsc_uc_heci_cmd_submit_packet(&gt->uc.gsc,
+ offset,
+ sizeof(*msg_in),
+ offset + GSC_VER_PKT_SZ,
+ GSC_VER_PKT_SZ);
+ if (err) {
+ gt_err(gt,
+ "failed to submit GSC request for compatibility version: %d\n",
+ err);
+ goto out_vma;
+ }
+
+ if (msg_out->header.message_size != sizeof(*msg_out)) {
+ gt_err(gt, "invalid GSC reply length %u [expected %zu], s=0x%x, f=0x%x, r=0x%x\n",
+ msg_out->header.message_size, sizeof(*msg_out),
+ msg_out->header.status, msg_out->header.flags, msg_out->mkhi.result);
+ err = -EPROTO;
+ goto out_vma;
+ }
+
+ gsc->fw.file_selected.ver.major = msg_out->compat_major;
+ gsc->fw.file_selected.ver.minor = msg_out->compat_minor;
+
+out_vma:
+ i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
+ return err;
+}
+
int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc)
{
struct intel_gt *gt = gsc_uc_to_gt(gsc);
@@ -197,11 +498,24 @@ int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc)
if (err)
goto fail;
+ err = gsc_fw_query_compatibility_version(gsc);
+ if (err)
+ goto fail;
+
+ /* we only support compatibility version 1.0 at the moment */
+ err = intel_uc_check_file_version(gsc_fw, NULL);
+ if (err)
+ goto fail;
+
/* FW is not fully operational until we enable SW proxy */
intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED);
- drm_info(&gt->i915->drm, "Loaded GSC firmware %s\n",
- gsc_fw->file_selected.path);
+ gt_info(gt, "Loaded GSC firmware %s (cv%u.%u, r%u.%u.%u.%u, svn %u)\n",
+ gsc_fw->file_selected.path,
+ gsc_fw->file_selected.ver.major, gsc_fw->file_selected.ver.minor,
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build,
+ gsc->security_version);
return 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h
index 4b5dbb44afb4..bc9dd0de8aaf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h
@@ -9,7 +9,13 @@
#include <linux/types.h>
struct intel_gsc_uc;
+struct intel_uc_fw;
+struct intel_uncore;
+int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, size_t size);
int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc);
bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc);
+bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc, bool needs_wakeref);
+int intel_gsc_uc_fw_proxy_get_status(struct intel_gsc_uc *gsc);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
new file mode 100644
index 000000000000..e7444ebc373e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/component.h>
+
+#include <drm/intel/i915_component.h>
+#include <drm/intel/i915_gsc_proxy_mei_interface.h>
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_print.h"
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "i915_wait_util.h"
+#include "intel_gsc_proxy.h"
+#include "intel_gsc_uc.h"
+#include "intel_gsc_uc_heci_cmd_submit.h"
+
+/*
+ * GSC proxy:
+ * The GSC uC needs to communicate with the CSME to perform certain operations.
+ * Since the GSC can't perform this communication directly on platforms where it
+ * is integrated in GT, i915 needs to transfer the messages from GSC to CSME
+ * and back. i915 must manually start the proxy flow after the GSC is loaded to
+ * signal to GSC that we're ready to handle its messages and allow it to query
+ * its init data from CSME; GSC will then trigger an HECI2 interrupt if it needs
+ * to send messages to CSME again.
+ * The proxy flow is as follow:
+ * 1 - i915 submits a request to GSC asking for the message to CSME
+ * 2 - GSC replies with the proxy header + payload for CSME
+ * 3 - i915 sends the reply from GSC as-is to CSME via the mei proxy component
+ * 4 - CSME replies with the proxy header + payload for GSC
+ * 5 - i915 submits a request to GSC with the reply from CSME
+ * 6 - GSC replies either with a new header + payload (same as step 2, so we
+ * restart from there) or with an end message.
+ */
+
+/*
+ * The component should load quite quickly in most cases, but it could take
+ * a bit. Using a very big timeout just to cover the worst case scenario
+ */
+#define GSC_PROXY_INIT_TIMEOUT_MS 20000
+
+/* the protocol supports up to 32K in each direction */
+#define GSC_PROXY_BUFFER_SIZE SZ_32K
+#define GSC_PROXY_CHANNEL_SIZE (GSC_PROXY_BUFFER_SIZE * 2)
+#define GSC_PROXY_MAX_MSG_SIZE (GSC_PROXY_BUFFER_SIZE - sizeof(struct intel_gsc_mtl_header))
+
+/* FW-defined proxy header */
+struct intel_gsc_proxy_header {
+ /*
+ * hdr:
+ * Bits 0-7: type of the proxy message (see enum intel_gsc_proxy_type)
+ * Bits 8-15: rsvd
+ * Bits 16-31: length in bytes of the payload following the proxy header
+ */
+ u32 hdr;
+#define GSC_PROXY_TYPE GENMASK(7, 0)
+#define GSC_PROXY_PAYLOAD_LENGTH GENMASK(31, 16)
+
+ u32 source; /* Source of the Proxy message */
+ u32 destination; /* Destination of the Proxy message */
+#define GSC_PROXY_ADDRESSING_KMD 0x10000
+#define GSC_PROXY_ADDRESSING_GSC 0x20000
+#define GSC_PROXY_ADDRESSING_CSME 0x30000
+
+ u32 status; /* Command status */
+} __packed;
+
+/* FW-defined proxy types */
+enum intel_gsc_proxy_type {
+ GSC_PROXY_MSG_TYPE_PROXY_INVALID = 0,
+ GSC_PROXY_MSG_TYPE_PROXY_QUERY = 1,
+ GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD = 2,
+ GSC_PROXY_MSG_TYPE_PROXY_END = 3,
+ GSC_PROXY_MSG_TYPE_PROXY_NOTIFICATION = 4,
+};
+
+struct gsc_proxy_msg {
+ struct intel_gsc_mtl_header header;
+ struct intel_gsc_proxy_header proxy_header;
+} __packed;
+
+static int proxy_send_to_csme(struct intel_gsc_uc *gsc)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct i915_gsc_proxy_component *comp = gsc->proxy.component;
+ struct intel_gsc_mtl_header *hdr;
+ void *in = gsc->proxy.to_csme;
+ void *out = gsc->proxy.to_gsc;
+ u32 in_size;
+ int ret;
+
+ /* CSME msg only includes the proxy */
+ hdr = in;
+ in += sizeof(struct intel_gsc_mtl_header);
+ out += sizeof(struct intel_gsc_mtl_header);
+
+ in_size = hdr->message_size - sizeof(struct intel_gsc_mtl_header);
+
+ /* the message must contain at least the proxy header */
+ if (in_size < sizeof(struct intel_gsc_proxy_header) ||
+ in_size > GSC_PROXY_MAX_MSG_SIZE) {
+ gt_err(gt, "Invalid CSME message size: %u\n", in_size);
+ return -EINVAL;
+ }
+
+ ret = comp->ops->send(comp->mei_dev, in, in_size);
+ if (ret < 0) {
+ gt_err(gt, "Failed to send CSME message\n");
+ return ret;
+ }
+
+ ret = comp->ops->recv(comp->mei_dev, out, GSC_PROXY_MAX_MSG_SIZE);
+ if (ret < 0) {
+ gt_err(gt, "Failed to receive CSME message\n");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int proxy_send_to_gsc(struct intel_gsc_uc *gsc)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ u32 *marker = gsc->proxy.to_csme; /* first dw of the reply header */
+ u64 addr_in = i915_ggtt_offset(gsc->proxy.vma);
+ u64 addr_out = addr_in + GSC_PROXY_BUFFER_SIZE;
+ u32 size = ((struct gsc_proxy_msg *)gsc->proxy.to_gsc)->header.message_size;
+ int err;
+
+ /* the message must contain at least the gsc and proxy headers */
+ if (size < sizeof(struct gsc_proxy_msg) || size > GSC_PROXY_BUFFER_SIZE) {
+ gt_err(gt, "Invalid GSC proxy message size: %u\n", size);
+ return -EINVAL;
+ }
+
+ /* clear the message marker */
+ *marker = 0;
+
+ /* make sure the marker write is flushed */
+ wmb();
+
+ /* send the request */
+ err = intel_gsc_uc_heci_cmd_submit_packet(gsc, addr_in, size,
+ addr_out, GSC_PROXY_BUFFER_SIZE);
+
+ if (!err) {
+ /* wait for the reply to show up */
+ err = wait_for(*marker != 0, 300);
+ if (err)
+ gt_err(gt, "Failed to get a proxy reply from gsc\n");
+ }
+
+ return err;
+}
+
+static int validate_proxy_header(struct intel_gsc_proxy_header *header,
+ u32 source, u32 dest)
+{
+ u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr);
+ u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr);
+ int ret = 0;
+
+ if (header->destination != dest || header->source != source) {
+ ret = -ENOEXEC;
+ goto fail;
+ }
+
+ switch (type) {
+ case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD:
+ if (length > 0)
+ break;
+ fallthrough;
+ case GSC_PROXY_MSG_TYPE_PROXY_INVALID:
+ ret = -EIO;
+ goto fail;
+ default:
+ break;
+ }
+
+fail:
+ return ret;
+}
+
+static int proxy_query(struct intel_gsc_uc *gsc)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct gsc_proxy_msg *to_gsc = gsc->proxy.to_gsc;
+ struct gsc_proxy_msg *to_csme = gsc->proxy.to_csme;
+ int ret;
+
+ intel_gsc_uc_heci_cmd_emit_mtl_header(&to_gsc->header,
+ HECI_MEADDRESS_PROXY,
+ sizeof(struct gsc_proxy_msg),
+ 0);
+
+ to_gsc->proxy_header.hdr =
+ FIELD_PREP(GSC_PROXY_TYPE, GSC_PROXY_MSG_TYPE_PROXY_QUERY) |
+ FIELD_PREP(GSC_PROXY_PAYLOAD_LENGTH, 0);
+
+ to_gsc->proxy_header.source = GSC_PROXY_ADDRESSING_KMD;
+ to_gsc->proxy_header.destination = GSC_PROXY_ADDRESSING_GSC;
+ to_gsc->proxy_header.status = 0;
+
+ while (1) {
+ /* clear the GSC response header space */
+ memset(gsc->proxy.to_csme, 0, sizeof(struct gsc_proxy_msg));
+
+ /* send proxy message to GSC */
+ ret = proxy_send_to_gsc(gsc);
+ if (ret) {
+ gt_err(gt, "failed to send proxy message to GSC! %d\n", ret);
+ goto proxy_error;
+ }
+
+ /* stop if this was the last message */
+ if (FIELD_GET(GSC_PROXY_TYPE, to_csme->proxy_header.hdr) ==
+ GSC_PROXY_MSG_TYPE_PROXY_END)
+ break;
+
+ /* make sure the GSC-to-CSME proxy header is sane */
+ ret = validate_proxy_header(&to_csme->proxy_header,
+ GSC_PROXY_ADDRESSING_GSC,
+ GSC_PROXY_ADDRESSING_CSME);
+ if (ret) {
+ gt_err(gt, "invalid GSC to CSME proxy header! %d\n", ret);
+ goto proxy_error;
+ }
+
+ /* send the GSC message to the CSME */
+ ret = proxy_send_to_csme(gsc);
+ if (ret < 0) {
+ gt_err(gt, "failed to send proxy message to CSME! %d\n", ret);
+ goto proxy_error;
+ }
+
+ /* update the GSC message size with the returned value from CSME */
+ to_gsc->header.message_size = ret + sizeof(struct intel_gsc_mtl_header);
+
+ /* make sure the CSME-to-GSC proxy header is sane */
+ ret = validate_proxy_header(&to_gsc->proxy_header,
+ GSC_PROXY_ADDRESSING_CSME,
+ GSC_PROXY_ADDRESSING_GSC);
+ if (ret) {
+ gt_err(gt, "invalid CSME to GSC proxy header! %d\n", ret);
+ goto proxy_error;
+ }
+ }
+
+proxy_error:
+ return ret < 0 ? ret : 0;
+}
+
+int intel_gsc_proxy_request_handler(struct intel_gsc_uc *gsc)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ int err;
+
+ if (!gsc->proxy.component_added)
+ return -ENODEV;
+
+ assert_rpm_wakelock_held(gt->uncore->rpm);
+
+ /* when GSC is loaded, we can queue this before the component is bound */
+ err = wait_for(gsc->proxy.component, GSC_PROXY_INIT_TIMEOUT_MS);
+ if (err) {
+ gt_err(gt, "GSC proxy component didn't bind within the expected timeout\n");
+ return -EIO;
+ }
+
+ mutex_lock(&gsc->proxy.mutex);
+ if (!gsc->proxy.component) {
+ gt_err(gt, "GSC proxy worker called without the component being bound!\n");
+ err = -EIO;
+ } else {
+ /*
+ * write the status bit to clear it and allow new proxy
+ * interrupts to be generated while we handle the current
+ * request, but be sure not to write the reset bit
+ */
+ intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE),
+ HECI_H_CSR_RST, HECI_H_CSR_IS);
+ err = proxy_query(gsc);
+ }
+ mutex_unlock(&gsc->proxy.mutex);
+ return err;
+}
+
+void intel_gsc_proxy_irq_handler(struct intel_gsc_uc *gsc, u32 iir)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+
+ if (unlikely(!iir))
+ return;
+
+ lockdep_assert_held(gt->irq_lock);
+
+ if (!gsc->proxy.component) {
+ gt_err(gt, "GSC proxy irq received without the component being bound!\n");
+ return;
+ }
+
+ gsc->gsc_work_actions |= GSC_ACTION_SW_PROXY;
+ queue_work(gsc->wq, &gsc->work);
+}
+
+static int i915_gsc_proxy_component_bind(struct device *i915_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct drm_i915_private *i915 = kdev_to_i915(i915_kdev);
+ struct intel_gt *gt = i915->media_gt;
+ struct intel_gsc_uc *gsc = &gt->uc.gsc;
+ intel_wakeref_t wakeref;
+
+ /* enable HECI2 IRQs */
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE),
+ HECI_H_CSR_RST, HECI_H_CSR_IE);
+
+ mutex_lock(&gsc->proxy.mutex);
+ gsc->proxy.component = data;
+ gsc->proxy.component->mei_dev = mei_kdev;
+ mutex_unlock(&gsc->proxy.mutex);
+ gt_dbg(gt, "GSC proxy mei component bound\n");
+
+ return 0;
+}
+
+static void i915_gsc_proxy_component_unbind(struct device *i915_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct drm_i915_private *i915 = kdev_to_i915(i915_kdev);
+ struct intel_gt *gt = i915->media_gt;
+ struct intel_gsc_uc *gsc = &gt->uc.gsc;
+ intel_wakeref_t wakeref;
+
+ mutex_lock(&gsc->proxy.mutex);
+ gsc->proxy.component = NULL;
+ mutex_unlock(&gsc->proxy.mutex);
+
+ /* disable HECI2 IRQs */
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE),
+ HECI_H_CSR_IE | HECI_H_CSR_RST, 0);
+ gt_dbg(gt, "GSC proxy mei component unbound\n");
+}
+
+static const struct component_ops i915_gsc_proxy_component_ops = {
+ .bind = i915_gsc_proxy_component_bind,
+ .unbind = i915_gsc_proxy_component_unbind,
+};
+
+static int proxy_channel_alloc(struct intel_gsc_uc *gsc)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct i915_vma *vma;
+ void *vaddr;
+ int err;
+
+ err = intel_guc_allocate_and_map_vma(gt_to_guc(gt),
+ GSC_PROXY_CHANNEL_SIZE,
+ &vma, &vaddr);
+ if (err)
+ return err;
+
+ gsc->proxy.vma = vma;
+ gsc->proxy.to_gsc = vaddr;
+ gsc->proxy.to_csme = vaddr + GSC_PROXY_BUFFER_SIZE;
+
+ return 0;
+}
+
+static void proxy_channel_free(struct intel_gsc_uc *gsc)
+{
+ if (!gsc->proxy.vma)
+ return;
+
+ gsc->proxy.to_gsc = NULL;
+ gsc->proxy.to_csme = NULL;
+ i915_vma_unpin_and_release(&gsc->proxy.vma, I915_VMA_RELEASE_MAP);
+}
+
+void intel_gsc_proxy_fini(struct intel_gsc_uc *gsc)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (fetch_and_zero(&gsc->proxy.component_added))
+ component_del(i915->drm.dev, &i915_gsc_proxy_component_ops);
+
+ proxy_channel_free(gsc);
+}
+
+int intel_gsc_proxy_init(struct intel_gsc_uc *gsc)
+{
+ int err;
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct drm_i915_private *i915 = gt->i915;
+
+ mutex_init(&gsc->proxy.mutex);
+
+ if (!IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY)) {
+ gt_info(gt, "can't init GSC proxy due to missing mei component\n");
+ return -ENODEV;
+ }
+
+ err = proxy_channel_alloc(gsc);
+ if (err)
+ return err;
+
+ err = component_add_typed(i915->drm.dev, &i915_gsc_proxy_component_ops,
+ I915_COMPONENT_GSC_PROXY);
+ if (err < 0) {
+ gt_err(gt, "Failed to add GSC_PROXY component (%d)\n", err);
+ goto out_free;
+ }
+
+ gsc->proxy.component_added = true;
+
+ return 0;
+
+out_free:
+ proxy_channel_free(gsc);
+ return err;
+}
+
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h
new file mode 100644
index 000000000000..fc5aef10bfb4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_GSC_PROXY_H_
+#define _INTEL_GSC_PROXY_H_
+
+#include <linux/types.h>
+
+struct intel_gsc_uc;
+
+int intel_gsc_proxy_init(struct intel_gsc_uc *gsc);
+void intel_gsc_proxy_fini(struct intel_gsc_uc *gsc);
+int intel_gsc_proxy_request_handler(struct intel_gsc_uc *gsc);
+void intel_gsc_proxy_irq_handler(struct intel_gsc_uc *gsc, u32 iir);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
index fd21dbd2663b..3d3191deb0ab 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
@@ -6,18 +6,93 @@
#include <linux/types.h>
#include "gt/intel_gt.h"
-#include "intel_gsc_uc.h"
+#include "gt/intel_gt_print.h"
#include "intel_gsc_fw.h"
+#include "intel_gsc_proxy.h"
+#include "intel_gsc_uc.h"
#include "i915_drv.h"
+#include "i915_reg.h"
static void gsc_work(struct work_struct *work)
{
struct intel_gsc_uc *gsc = container_of(work, typeof(*gsc), work);
struct intel_gt *gt = gsc_uc_to_gt(gsc);
intel_wakeref_t wakeref;
+ u32 actions;
+ int ret;
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+ spin_lock_irq(gt->irq_lock);
+ actions = gsc->gsc_work_actions;
+ gsc->gsc_work_actions = 0;
+ spin_unlock_irq(gt->irq_lock);
+
+ if (actions & GSC_ACTION_FW_LOAD) {
+ ret = intel_gsc_uc_fw_upload(gsc);
+ if (!ret)
+ /* setup proxy on a new load */
+ actions |= GSC_ACTION_SW_PROXY;
+ else if (ret != -EEXIST)
+ goto out_put;
+
+ /*
+ * The HuC auth can be done both before or after the proxy init;
+ * if done after, a proxy request will be issued and must be
+ * serviced before the authentication can complete.
+ * Since this worker also handles proxy requests, we can't
+ * perform an action that requires the proxy from within it and
+ * then stall waiting for it, because we'd be blocking the
+ * service path. Therefore, it is easier for us to load HuC
+ * first and do proxy later. The GSC will ack the HuC auth and
+ * then send the HuC proxy request as part of the proxy init
+ * flow.
+ * Note that we can only do the GSC auth if the GuC auth was
+ * successful.
+ */
+ if (intel_uc_uses_huc(&gt->uc) &&
+ intel_huc_is_authenticated(&gt->uc.huc, INTEL_HUC_AUTH_BY_GUC))
+ intel_huc_auth(&gt->uc.huc, INTEL_HUC_AUTH_BY_GSC);
+ }
+
+ if (actions & GSC_ACTION_SW_PROXY) {
+ if (!intel_gsc_uc_fw_init_done(gsc)) {
+ gt_err(gt, "Proxy request received with GSC not loaded!\n");
+ goto out_put;
+ }
+
+ ret = intel_gsc_proxy_request_handler(gsc);
+ if (ret) {
+ if (actions & GSC_ACTION_FW_LOAD) {
+ /*
+ * A proxy failure right after firmware load means the proxy-init
+ * step has failed so mark GSC as not usable after this
+ */
+ gt_err(gt, "GSC proxy handler failed to init\n");
+ intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
+ }
+ goto out_put;
+ }
+
+ /* mark the GSC FW init as done the first time we run this */
+ if (actions & GSC_ACTION_FW_LOAD) {
+ /*
+ * If there is a proxy establishment error, the GSC might still
+ * complete the request handling cleanly, so we need to check the
+ * status register to check if the proxy init was actually successful
+ */
+ if (intel_gsc_uc_fw_proxy_init_done(gsc, false)) {
+ gt_dbg(gt, "GSC Proxy initialized\n");
+ intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_RUNNING);
+ } else {
+ gt_err(gt, "GSC status reports proxy init not complete\n");
+ intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
+ }
+ }
+ }
- with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- intel_gsc_uc_fw_upload(gsc);
+out_put:
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
static bool gsc_engine_supported(struct intel_gt *gt)
@@ -33,7 +108,7 @@ static bool gsc_engine_supported(struct intel_gt *gt)
GEM_BUG_ON(!gt_is_root(gt) && !gt->info.engine_mask);
if (gt_is_root(gt))
- mask = RUNTIME_INFO(gt->i915)->platform_engine_mask;
+ mask = INTEL_INFO(gt->i915)->platform_engine_mask;
else
mask = gt->info.engine_mask;
@@ -42,63 +117,136 @@ static bool gsc_engine_supported(struct intel_gt *gt)
void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc)
{
- intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC);
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+
+ /*
+ * GSC FW needs to be copied to a dedicated memory allocations for
+ * loading (see gsc->local), so we don't need to GGTT map the FW image
+ * itself into GGTT.
+ */
+ intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC, false);
INIT_WORK(&gsc->work, gsc_work);
/* we can arrive here from i915_driver_early_probe for primary
* GT with it being not fully setup hence check device info's
* engine mask
*/
- if (!gsc_engine_supported(gsc_uc_to_gt(gsc))) {
+ if (!gsc_engine_supported(gt)) {
intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED);
return;
}
+
+ gsc->wq = alloc_ordered_workqueue("i915_gsc", 0);
+ if (!gsc->wq) {
+ gt_err(gt, "failed to allocate WQ for GSC, disabling FW\n");
+ intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED);
+ }
+}
+
+static int gsc_allocate_and_map_vma(struct intel_gsc_uc *gsc, u32 size)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ void __iomem *vaddr;
+ int ret = 0;
+
+ /*
+ * The GSC FW doesn't immediately suspend after becoming idle, so there
+ * is a chance that it could still be awake after we successfully
+ * return from the pci suspend function, even if there are no pending
+ * operations.
+ * The FW might therefore try to access memory for its suspend operation
+ * after the kernel has completed the HW suspend flow; this can cause
+ * issues if the FW is mapped in normal RAM memory, as some of the
+ * involved HW units might've already lost power.
+ * The driver must therefore avoid this situation and the recommended
+ * way to do so is to use stolen memory for the GSC memory allocation,
+ * because stolen memory takes a different path in HW and it is
+ * guaranteed to always work as long as the GPU itself is awake (which
+ * it must be if the GSC is awake).
+ */
+ obj = i915_gem_object_create_stolen(gt->i915, size);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err;
+ }
+
+ vaddr = i915_vma_pin_iomap(vma);
+ i915_vma_unpin(vma);
+ if (IS_ERR(vaddr)) {
+ ret = PTR_ERR(vaddr);
+ goto err;
+ }
+
+ i915_vma_make_unshrinkable(vma);
+
+ gsc->local = vma;
+ gsc->local_vaddr = vaddr;
+
+ return 0;
+
+err:
+ i915_gem_object_put(obj);
+ return ret;
+}
+
+static void gsc_unmap_and_free_vma(struct intel_gsc_uc *gsc)
+{
+ struct i915_vma *vma = fetch_and_zero(&gsc->local);
+
+ if (!vma)
+ return;
+
+ gsc->local_vaddr = NULL;
+ i915_vma_unpin_iomap(vma);
+ i915_gem_object_put(vma->obj);
}
int intel_gsc_uc_init(struct intel_gsc_uc *gsc)
{
static struct lock_class_key gsc_lock;
struct intel_gt *gt = gsc_uc_to_gt(gsc);
- struct drm_i915_private *i915 = gt->i915;
struct intel_engine_cs *engine = gt->engine[GSC0];
struct intel_context *ce;
- struct i915_vma *vma;
int err;
err = intel_uc_fw_init(&gsc->fw);
if (err)
goto out;
- vma = intel_guc_allocate_vma(&gt->uc.guc, SZ_8M);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
+ err = gsc_allocate_and_map_vma(gsc, SZ_4M);
+ if (err)
goto out_fw;
- }
-
- gsc->local = vma;
ce = intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K,
I915_GEM_HWS_GSC_ADDR,
&gsc_lock, "gsc_context");
if (IS_ERR(ce)) {
- drm_err(&gt->i915->drm,
- "failed to create GSC CS ctx for FW communication\n");
+ gt_err(gt, "failed to create GSC CS ctx for FW communication\n");
err = PTR_ERR(ce);
goto out_vma;
}
gsc->ce = ce;
+ /* if we fail to init proxy we still want to load GSC for PM */
+ intel_gsc_proxy_init(gsc);
+
intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOADABLE);
return 0;
out_vma:
- i915_vma_unpin_and_release(&gsc->local, 0);
+ gsc_unmap_and_free_vma(gsc);
out_fw:
intel_uc_fw_fini(&gsc->fw);
out:
- i915_probe_error(i915, "failed with %d\n", err);
+ gt_probe_error(gt, "GSC init failed %pe\n", ERR_PTR(err));
return err;
}
@@ -108,16 +256,22 @@ void intel_gsc_uc_fini(struct intel_gsc_uc *gsc)
return;
flush_work(&gsc->work);
+ if (gsc->wq) {
+ destroy_workqueue(gsc->wq);
+ gsc->wq = NULL;
+ }
+
+ intel_gsc_proxy_fini(gsc);
if (gsc->ce)
intel_engine_destroy_pinned_context(fetch_and_zero(&gsc->ce));
- i915_vma_unpin_and_release(&gsc->local, 0);
+ gsc_unmap_and_free_vma(gsc);
intel_uc_fw_fini(&gsc->fw);
}
-void intel_gsc_uc_suspend(struct intel_gsc_uc *gsc)
+void intel_gsc_uc_flush_work(struct intel_gsc_uc *gsc)
{
if (!intel_uc_fw_is_loadable(&gsc->fw))
return;
@@ -125,13 +279,80 @@ void intel_gsc_uc_suspend(struct intel_gsc_uc *gsc)
flush_work(&gsc->work);
}
-void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc)
+void intel_gsc_uc_resume(struct intel_gsc_uc *gsc)
{
if (!intel_uc_fw_is_loadable(&gsc->fw))
return;
+ /*
+ * we only want to start the GSC worker from here in the actual resume
+ * flow and not during driver load. This is because GSC load is slow and
+ * therefore we want to make sure that the default state init completes
+ * first to not slow down the init thread. A separate call to
+ * intel_gsc_uc_load_start will ensure that the GSC is loaded during
+ * driver load.
+ */
+ if (!gsc_uc_to_gt(gsc)->engine[GSC0]->default_state)
+ return;
+
+ intel_gsc_uc_load_start(gsc);
+}
+
+void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+
+ if (!intel_uc_fw_is_loadable(&gsc->fw) || intel_uc_fw_is_in_error(&gsc->fw))
+ return;
+
if (intel_gsc_uc_fw_init_done(gsc))
return;
- queue_work(system_unbound_wq, &gsc->work);
+ spin_lock_irq(gt->irq_lock);
+ gsc->gsc_work_actions |= GSC_ACTION_FW_LOAD;
+ spin_unlock_irq(gt->irq_lock);
+
+ queue_work(gsc->wq, &gsc->work);
+}
+
+void intel_gsc_uc_load_status(struct intel_gsc_uc *gsc, struct drm_printer *p)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct intel_uncore *uncore = gt->uncore;
+ intel_wakeref_t wakeref;
+
+ if (!intel_gsc_uc_is_supported(gsc)) {
+ drm_printf(p, "GSC not supported\n");
+ return;
+ }
+
+ if (!intel_gsc_uc_is_wanted(gsc)) {
+ drm_printf(p, "GSC disabled\n");
+ return;
+ }
+
+ drm_printf(p, "GSC firmware: %s\n", gsc->fw.file_selected.path);
+ if (gsc->fw.file_selected.path != gsc->fw.file_wanted.path)
+ drm_printf(p, "GSC firmware wanted: %s\n", gsc->fw.file_wanted.path);
+ drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(gsc->fw.status));
+
+ drm_printf(p, "Release: %u.%u.%u.%u\n",
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build);
+
+ drm_printf(p, "Compatibility Version: %u.%u [min expected %u.%u]\n",
+ gsc->fw.file_selected.ver.major, gsc->fw.file_selected.ver.minor,
+ gsc->fw.file_wanted.ver.major, gsc->fw.file_wanted.ver.minor);
+
+ drm_printf(p, "SVN: %u\n", gsc->security_version);
+
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ u32 i;
+
+ for (i = 1; i <= 6; i++) {
+ u32 status = intel_uncore_read(uncore,
+ HECI_FWSTS(MTL_GSC_HECI1_BASE, i));
+ drm_printf(p, "HECI1 FWSTST%u = 0x%08x\n", i, status);
+ }
+ }
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h
index 03fd0a8e8db1..c8082cf200fc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h
@@ -8,25 +8,63 @@
#include "intel_uc_fw.h"
+struct drm_printer;
struct i915_vma;
struct intel_context;
+struct i915_gsc_proxy_component;
struct intel_gsc_uc {
/* Generic uC firmware management */
struct intel_uc_fw fw;
/* GSC-specific additions */
+
+ /*
+ * The GSC has 3 version numbers:
+ * - Release version (incremented with each build)
+ * - Security version (incremented on security fix)
+ * - Compatibility version (incremented on interface change)
+ *
+ * The one we care about to use the binary is the last one, so that's
+ * the one we save inside the intel_uc_fw structure. The other two
+ * versions are only used for debug/info purposes, so we save them here.
+ *
+ * Note that the release and security versions are available in the
+ * binary header, while the compatibility version must be queried after
+ * loading the binary.
+ */
+ struct intel_uc_fw_ver release;
+ u32 security_version;
+
struct i915_vma *local; /* private memory for GSC usage */
+ void __iomem *local_vaddr; /* pointer to access the private memory */
struct intel_context *ce; /* for submission to GSC FW via GSC engine */
- struct work_struct work; /* for delayed load */
+ /* for delayed load and proxy handling */
+ struct workqueue_struct *wq;
+ struct work_struct work;
+ u32 gsc_work_actions; /* protected by gt->irq_lock */
+#define GSC_ACTION_FW_LOAD BIT(0)
+#define GSC_ACTION_SW_PROXY BIT(1)
+
+ struct {
+ struct i915_gsc_proxy_component *component;
+ bool component_added;
+ struct i915_vma *vma;
+ void *to_gsc;
+ void *to_csme;
+ struct mutex mutex; /* protects the tee channel binding */
+ } proxy;
};
void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc);
int intel_gsc_uc_init(struct intel_gsc_uc *gsc);
void intel_gsc_uc_fini(struct intel_gsc_uc *gsc);
void intel_gsc_uc_suspend(struct intel_gsc_uc *gsc);
+void intel_gsc_uc_resume(struct intel_gsc_uc *gsc);
+void intel_gsc_uc_flush_work(struct intel_gsc_uc *gsc);
void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc);
+void intel_gsc_uc_load_status(struct intel_gsc_uc *gsc, struct drm_printer *p);
static inline bool intel_gsc_uc_is_supported(struct intel_gsc_uc *gsc)
{
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c
new file mode 100644
index 000000000000..5baacd822a1c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_debugfs.h"
+#include "gt/intel_gt_print.h"
+#include "intel_gsc_uc.h"
+#include "intel_gsc_uc_debugfs.h"
+#include "i915_drv.h"
+
+static int gsc_info_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct intel_gsc_uc *gsc = m->private;
+
+ if (!intel_gsc_uc_is_supported(gsc))
+ return -ENODEV;
+
+ intel_gsc_uc_load_status(gsc, &p);
+
+ return 0;
+}
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(gsc_info);
+
+void intel_gsc_uc_debugfs_register(struct intel_gsc_uc *gsc_uc, struct dentry *root)
+{
+ static const struct intel_gt_debugfs_file files[] = {
+ { "gsc_info", &gsc_info_fops, NULL },
+ };
+
+ if (!intel_gsc_uc_is_supported(gsc_uc))
+ return;
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gsc_uc);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h
new file mode 100644
index 000000000000..3415ad39aabb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GSC_UC_H
+#define DEBUGFS_GSC_UC_H
+
+struct intel_gsc_uc;
+struct dentry;
+
+void intel_gsc_uc_debugfs_register(struct intel_gsc_uc *gsc, struct dentry *root);
+
+#endif /* DEBUGFS_GSC_UC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
new file mode 100644
index 000000000000..dabb870dcdb1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "gt/intel_context.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_ring.h"
+
+#include "i915_wait_util.h"
+#include "intel_gsc_uc_heci_cmd_submit.h"
+
+struct gsc_heci_pkt {
+ u64 addr_in;
+ u32 size_in;
+ u64 addr_out;
+ u32 size_out;
+};
+
+static int emit_gsc_heci_pkt(struct i915_request *rq, struct gsc_heci_pkt *pkt)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 8);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GSC_HECI_CMD_PKT;
+ *cs++ = lower_32_bits(pkt->addr_in);
+ *cs++ = upper_32_bits(pkt->addr_in);
+ *cs++ = pkt->size_in;
+ *cs++ = lower_32_bits(pkt->addr_out);
+ *cs++ = upper_32_bits(pkt->addr_out);
+ *cs++ = pkt->size_out;
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int intel_gsc_uc_heci_cmd_submit_packet(struct intel_gsc_uc *gsc, u64 addr_in,
+ u32 size_in, u64 addr_out,
+ u32 size_out)
+{
+ struct intel_context *ce = gsc->ce;
+ struct i915_request *rq;
+ struct gsc_heci_pkt pkt = {
+ .addr_in = addr_in,
+ .size_in = size_in,
+ .addr_out = addr_out,
+ .size_out = size_out
+ };
+ int err;
+
+ if (!ce)
+ return -ENODEV;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ if (ce->engine->emit_init_breadcrumb) {
+ err = ce->engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto out_rq;
+ }
+
+ err = emit_gsc_heci_pkt(rq, &pkt);
+
+ if (err)
+ goto out_rq;
+
+ err = ce->engine->emit_flush(rq, 0);
+
+out_rq:
+ i915_request_get(rq);
+
+ if (unlikely(err))
+ i915_request_set_error_once(rq, err);
+
+ i915_request_add(rq);
+
+ if (!err) {
+ /*
+ * Start timeout for i915_request_wait only after considering one possible
+ * pending GSC-HECI submission cycle on the other (non-privileged) path.
+ */
+ if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS))
+ drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm,
+ "Delay in gsc-heci-priv submission to gsccs-hw");
+ if (i915_request_wait(rq, 0, msecs_to_jiffies(GSC_HECI_REPLY_LATENCY_MS)) < 0)
+ err = -ETIME;
+ }
+
+ i915_request_put(rq);
+
+ if (err)
+ drm_err(&gsc_uc_to_gt(gsc)->i915->drm,
+ "Request submission for GSC heci cmd failed (%d)\n",
+ err);
+
+ return err;
+}
+
+void intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header,
+ u8 heci_client_id, u32 message_size,
+ u64 host_session_id)
+{
+ host_session_id &= ~HOST_SESSION_MASK;
+ if (host_session_id && heci_client_id == HECI_MEADDRESS_PXP)
+ host_session_id |= HOST_SESSION_PXP_SINGLE;
+
+ header->validity_marker = GSC_HECI_VALIDITY_MARKER;
+ header->heci_client_id = heci_client_id;
+ header->host_session_handle = host_session_id;
+ header->header_version = MTL_GSC_HEADER_VERSION;
+ header->message_size = message_size;
+}
+
+static void
+emit_gsc_heci_pkt_nonpriv(u32 *cmd, struct intel_gsc_heci_non_priv_pkt *pkt)
+{
+ *cmd++ = GSC_HECI_CMD_PKT;
+ *cmd++ = lower_32_bits(pkt->addr_in);
+ *cmd++ = upper_32_bits(pkt->addr_in);
+ *cmd++ = pkt->size_in;
+ *cmd++ = lower_32_bits(pkt->addr_out);
+ *cmd++ = upper_32_bits(pkt->addr_out);
+ *cmd++ = pkt->size_out;
+ *cmd++ = 0;
+ *cmd++ = MI_BATCH_BUFFER_END;
+}
+
+int
+intel_gsc_uc_heci_cmd_submit_nonpriv(struct intel_gsc_uc *gsc,
+ struct intel_context *ce,
+ struct intel_gsc_heci_non_priv_pkt *pkt,
+ u32 *cmd, int timeout_ms)
+{
+ struct intel_engine_cs *engine;
+ struct i915_gem_ww_ctx ww;
+ struct i915_request *rq;
+ int err, trials = 0;
+
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(pkt->bb_vma->obj, &ww);
+ if (err)
+ goto out_ww;
+ err = i915_gem_object_lock(pkt->heci_pkt_vma->obj, &ww);
+ if (err)
+ goto out_ww;
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto out_ww;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_unpin_ce;
+ }
+
+ emit_gsc_heci_pkt_nonpriv(cmd, pkt);
+
+ err = i915_vma_move_to_active(pkt->bb_vma, rq, 0);
+ if (err)
+ goto out_rq;
+ err = i915_vma_move_to_active(pkt->heci_pkt_vma, rq, EXEC_OBJECT_WRITE);
+ if (err)
+ goto out_rq;
+
+ engine = rq->context->engine;
+ if (engine->emit_init_breadcrumb) {
+ err = engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto out_rq;
+ }
+
+ err = engine->emit_bb_start(rq, i915_vma_offset(pkt->bb_vma), PAGE_SIZE, 0);
+ if (err)
+ goto out_rq;
+
+ err = ce->engine->emit_flush(rq, 0);
+ if (err)
+ drm_err(&gsc_uc_to_gt(gsc)->i915->drm,
+ "Failed emit-flush for gsc-heci-non-priv-pkterr=%d\n", err);
+
+out_rq:
+ i915_request_get(rq);
+
+ if (unlikely(err))
+ i915_request_set_error_once(rq, err);
+
+ i915_request_add(rq);
+
+ if (!err) {
+ /*
+ * Start timeout for i915_request_wait only after considering one possible
+ * pending GSC-HECI submission cycle on the other (privileged) path.
+ */
+ if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS))
+ drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm,
+ "Delay in gsc-heci-non-priv submission to gsccs-hw");
+ if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+ msecs_to_jiffies(timeout_ms)) < 0)
+ err = -ETIME;
+ }
+
+ i915_request_put(rq);
+
+out_unpin_ce:
+ intel_context_unpin(ce);
+out_ww:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err) {
+ if (++trials < 10)
+ goto retry;
+ else
+ err = -EAGAIN;
+ }
+ }
+ i915_gem_ww_ctx_fini(&ww);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h
new file mode 100644
index 000000000000..c4308291c003
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_GSC_UC_HECI_CMD_SUBMIT_H_
+#define _INTEL_GSC_UC_HECI_CMD_SUBMIT_H_
+
+#include <linux/types.h>
+
+struct i915_vma;
+struct intel_context;
+struct intel_gsc_uc;
+
+#define GSC_HECI_REPLY_LATENCY_MS 500
+/*
+ * Max FW response time is 500ms, but this should be counted from the time the
+ * command has hit the GSC-CS hardware, not the preceding handoff to GuC CTB.
+ */
+
+struct intel_gsc_mtl_header {
+ u32 validity_marker;
+#define GSC_HECI_VALIDITY_MARKER 0xA578875A
+
+ u8 heci_client_id;
+#define HECI_MEADDRESS_MKHI 7
+#define HECI_MEADDRESS_PROXY 10
+#define HECI_MEADDRESS_PXP 17
+#define HECI_MEADDRESS_HDCP 18
+
+ u8 reserved1;
+
+ u16 header_version;
+#define MTL_GSC_HEADER_VERSION 1
+
+ /*
+ * FW allows host to decide host_session handle
+ * as it sees fit.
+ * For intertracebility reserving select bits(60-63)
+ * to differentiate caller-target subsystem
+ * 0000 - HDCP
+ * 0001 - PXP Single Session
+ */
+ u64 host_session_handle;
+#define HOST_SESSION_MASK REG_GENMASK64(63, 60)
+#define HOST_SESSION_PXP_SINGLE BIT_ULL(60)
+ u64 gsc_message_handle;
+
+ u32 message_size; /* lower 20 bits only, upper 12 are reserved */
+
+ /*
+ * Flags mask:
+ * Bit 0: Pending
+ * Bit 1: Session Cleanup;
+ * Bits 2-15: Flags
+ * Bits 16-31: Extension Size
+ * According to internal spec flags are either input or output
+ * we distinguish the flags using OUTFLAG or INFLAG
+ */
+ u32 flags;
+#define GSC_OUTFLAG_MSG_PENDING BIT(0)
+#define GSC_INFLAG_MSG_CLEANUP BIT(1)
+
+ u32 status;
+} __packed;
+
+int intel_gsc_uc_heci_cmd_submit_packet(struct intel_gsc_uc *gsc,
+ u64 addr_in, u32 size_in,
+ u64 addr_out, u32 size_out);
+void intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header,
+ u8 heci_client_id, u32 message_size,
+ u64 host_session_id);
+
+struct intel_gsc_heci_non_priv_pkt {
+ u64 addr_in;
+ u32 size_in;
+ u64 addr_out;
+ u32 size_out;
+ struct i915_vma *heci_pkt_vma;
+ struct i915_vma *bb_vma;
+};
+
+void
+intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header,
+ u8 heci_client_id, u32 msg_size,
+ u64 host_session_id);
+
+int
+intel_gsc_uc_heci_cmd_submit_nonpriv(struct intel_gsc_uc *gsc,
+ struct intel_context *ce,
+ struct intel_gsc_heci_non_priv_pkt *pkt,
+ u32 *cs, int timeout_ms);
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index d76508fa3af7..52ec4421a211 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -8,14 +8,17 @@
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm_irq.h"
#include "gt/intel_gt_regs.h"
+
+#include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_capture.h"
#include "intel_guc_print.h"
#include "intel_guc_slpc.h"
#include "intel_guc_submission.h"
-#include "i915_drv.h"
-#include "i915_irq.h"
/**
* DOC: GuC
@@ -158,18 +161,35 @@ static void gen11_disable_guc_interrupts(struct intel_guc *guc)
gen11_reset_guc_interrupts(guc);
}
+static void guc_dead_worker_func(struct work_struct *w)
+{
+ struct intel_guc *guc = container_of(w, struct intel_guc, dead_guc_worker);
+ struct intel_gt *gt = guc_to_gt(guc);
+ unsigned long last = guc->last_dead_guc_jiffies;
+ unsigned long delta = jiffies_to_msecs(jiffies - last);
+
+ if (delta < 500) {
+ intel_gt_set_wedged(gt);
+ } else {
+ intel_gt_handle_error(gt, ALL_ENGINES, I915_ERROR_CAPTURE, "dead GuC");
+ guc->last_dead_guc_jiffies = jiffies;
+ }
+}
+
void intel_guc_init_early(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct drm_i915_private *i915 = gt->i915;
- intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC);
+ intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, true);
intel_guc_ct_init_early(&guc->ct);
intel_guc_log_init_early(&guc->log);
intel_guc_submission_init_early(guc);
intel_guc_slpc_init_early(&guc->slpc);
intel_guc_rc_init_early(guc);
+ INIT_WORK(&guc->dead_guc_worker, guc_dead_worker_func);
+
mutex_init(&guc->send_mutex);
spin_lock_init(&guc->irq_lock);
if (GRAPHICS_VER(i915) >= 11) {
@@ -221,8 +241,16 @@ static u32 guc_ctl_debug_flags(struct intel_guc *guc)
static u32 guc_ctl_feature_flags(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
u32 flags = 0;
+ /*
+ * Enable PXP GuC autoteardown flow.
+ * NB: MTL does things differently.
+ */
+ if (HAS_PXP(gt->i915) && !IS_METEORLAKE(gt->i915))
+ flags |= GUC_CTL_ENABLE_GUC_PXP_CTL;
+
if (!intel_guc_submission_is_used(guc))
flags |= GUC_CTL_DISABLE_SCHEDULER;
@@ -268,57 +296,58 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
/* Wa_22012773006:gen11,gen12 < XeHP */
if (GRAPHICS_VER(gt->i915) >= 11 &&
- GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
+ GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 55))
flags |= GUC_WA_POLLCS;
- /* Wa_16011759253:dg2_g10:a0 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
- flags |= GUC_WA_GAM_CREDITS;
-
/* Wa_14014475959 */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(gt->i915))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+ /* Wa_16019325821 */
+ /* Wa_14019159160 */
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
+ flags |= GUC_WA_RCS_CCS_SWITCHOUT;
+
/*
- * Wa_14012197797:dg2_g10:a0,dg2_g11:a0
- * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12
+ * Wa_14012197797
+ * Wa_22011391025
*
* The same WA bit is used for both and 22011391025 is applicable to
* all DG2.
+ *
+ * Platforms post DG2 prevent this issue in hardware by stalling
+ * submissions. With this flag GuC will schedule as to avoid such
+ * stalls.
*/
- if (IS_DG2(gt->i915))
+ if (IS_DG2(gt->i915) ||
+ (CCS_MASK(gt) && GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)))
flags |= GUC_WA_DUAL_QUEUE;
/* Wa_22011802037: graphics version 11/12 */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(gt->i915) >= 11 &&
- GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(gt))
flags |= GUC_WA_PRE_PARSER;
- /* Wa_16011777198:dg2 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
- flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
-
/*
- * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..)
- * Wa_22012727685:dg2_g11[a0..)
+ * Wa_22012727170
+ * Wa_22012727685
*/
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
+ if (IS_DG2_G11(gt->i915))
flags |= GUC_WA_CONTEXT_ISOLATION;
- /* Wa_16015675438 */
- if (!RCS_MASK(gt))
- flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+ /*
+ * Wa_14018913170: Applicable to all platforms supported by i915 so
+ * don't bother testing for all X/Y/Z platforms explicitly.
+ */
+ if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0))
+ flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
return flags;
}
static u32 guc_ctl_devid(struct intel_guc *guc)
{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915);
}
@@ -460,6 +489,8 @@ void intel_guc_fini(struct intel_guc *guc)
if (!intel_uc_fw_is_loadable(&guc->fw))
return;
+ flush_work(&guc->dead_guc_worker);
+
if (intel_guc_slpc_is_used(guc))
intel_guc_slpc_fini(&guc->slpc);
@@ -584,6 +615,20 @@ out:
return ret;
}
+int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action)
+{
+ if (action == INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED)
+ guc_err(guc, "Crash dump notification\n");
+ else if (action == INTEL_GUC_ACTION_NOTIFY_EXCEPTION)
+ guc_err(guc, "Exception notification\n");
+ else
+ guc_err(guc, "Unknown crash notification: 0x%04X\n", action);
+
+ queue_work(system_unbound_wq, &guc->dead_guc_worker);
+
+ return 0;
+}
+
int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
const u32 *payload, u32 len)
{
@@ -600,6 +645,9 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
if (msg & INTEL_GUC_RECV_MSG_EXCEPTION)
guc_err(guc, "Received early exception notification!\n");
+ if (msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED | INTEL_GUC_RECV_MSG_EXCEPTION))
+ queue_work(system_unbound_wq, &guc->dead_guc_worker);
+
return 0;
}
@@ -639,6 +687,8 @@ int intel_guc_suspend(struct intel_guc *guc)
return 0;
if (intel_guc_submission_is_used(guc)) {
+ flush_work(&guc->dead_guc_worker);
+
/*
* This H2G MMIO command tears down the GuC in two steps. First it will
* generate a G2H CTB for every active context indicating a reset. In
@@ -647,7 +697,7 @@ int intel_guc_suspend(struct intel_guc *guc)
* H2G MMIO command completes.
*
* Don't abort on a failure code from the GuC. Keep going and do the
- * clean up in santize() and re-initialisation on resume and hopefully
+ * clean up in sanitize() and re-initialisation on resume and hopefully
* the error here won't be problematic.
*/
ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
@@ -743,6 +793,14 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
if (IS_ERR(obj))
return ERR_CAST(obj);
+ /*
+ * Wa_22016122933: For Media version 13.0, all Media GT shared
+ * memory needs to be mapped as WC on CPU side and UC (PAT
+ * index 2) on GPU side.
+ */
+ if (intel_gt_needs_wa_22016122933(gt))
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+
vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma))
goto err;
@@ -784,8 +842,8 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
return PTR_ERR(vma);
vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
- i915_coherent_map_type(guc_to_gt(guc)->i915,
- vma->obj, true));
+ intel_gt_coherent_map_type(guc_to_gt(guc),
+ vma->obj, true));
if (IS_ERR(vaddr)) {
i915_vma_unpin_and_release(&vma, 0);
return PTR_ERR(vaddr);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index bb4dfe707a7d..053780f562c1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -42,6 +42,9 @@ struct intel_guc {
/** @capture: the error-state-capture module's data and objects */
struct intel_guc_state_capture *capture;
+ /** @dbgfs_node: debugfs node */
+ struct dentry *dbgfs_node;
+
/** @sched_engine: Global engine used to submit requests to GuC */
struct i915_sched_engine *sched_engine;
/**
@@ -76,6 +79,18 @@ struct intel_guc {
*/
atomic_t outstanding_submission_g2h;
+ /** @tlb_lookup: xarray to store all pending TLB invalidation requests */
+ struct xarray tlb_lookup;
+
+ /**
+ * @serial_slot: id to the initial waiter created in tlb_lookup,
+ * which is used only when failed to allocate new waiter.
+ */
+ u32 serial_slot;
+
+ /** @next_seqno: the next id (sequence number) to allocate. */
+ u32 next_seqno;
+
/** @interrupts: pointers to GuC interrupt-managing functions. */
struct {
bool enabled;
@@ -90,61 +105,67 @@ struct intel_guc {
*/
struct {
/**
- * @lock: protects everything in submission_state,
- * ce->guc_id.id, and ce->guc_id.ref when transitioning in and
- * out of zero
+ * @submission_state.lock: protects everything in
+ * submission_state, ce->guc_id.id, and ce->guc_id.ref
+ * when transitioning in and out of zero
*/
spinlock_t lock;
/**
- * @guc_ids: used to allocate new guc_ids, single-lrc
+ * @submission_state.guc_ids: used to allocate new
+ * guc_ids, single-lrc
*/
struct ida guc_ids;
/**
- * @num_guc_ids: Number of guc_ids, selftest feature to be able
- * to reduce this number while testing.
+ * @submission_state.num_guc_ids: Number of guc_ids, selftest
+ * feature to be able to reduce this number while testing.
*/
int num_guc_ids;
/**
- * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+ * @submission_state.guc_ids_bitmap: used to allocate
+ * new guc_ids, multi-lrc
*/
unsigned long *guc_ids_bitmap;
/**
- * @guc_id_list: list of intel_context with valid guc_ids but no
- * refs
+ * @submission_state.guc_id_list: list of intel_context
+ * with valid guc_ids but no refs
*/
struct list_head guc_id_list;
/**
- * @guc_ids_in_use: Number single-lrc guc_ids in use
+ * @submission_state.guc_ids_in_use: Number single-lrc
+ * guc_ids in use
*/
unsigned int guc_ids_in_use;
/**
- * @destroyed_contexts: list of contexts waiting to be destroyed
- * (deregistered with the GuC)
+ * @submission_state.destroyed_contexts: list of contexts
+ * waiting to be destroyed (deregistered with the GuC)
*/
struct list_head destroyed_contexts;
/**
- * @destroyed_worker: worker to deregister contexts, need as we
- * need to take a GT PM reference and can't from destroy
- * function as it might be in an atomic context (no sleeping)
+ * @submission_state.destroyed_worker: worker to deregister
+ * contexts, need as we need to take a GT PM reference and
+ * can't from destroy function as it might be in an atomic
+ * context (no sleeping)
*/
struct work_struct destroyed_worker;
/**
- * @reset_fail_worker: worker to trigger a GT reset after an
- * engine reset fails
+ * @submission_state.reset_fail_worker: worker to trigger
+ * a GT reset after an engine reset fails
*/
struct work_struct reset_fail_worker;
/**
- * @reset_fail_mask: mask of engines that failed to reset
+ * @submission_state.reset_fail_mask: mask of engines that
+ * failed to reset
*/
intel_engine_mask_t reset_fail_mask;
/**
- * @sched_disable_delay_ms: schedule disable delay, in ms, for
- * contexts
+ * @submission_state.sched_disable_delay_ms: schedule
+ * disable delay, in ms, for contexts
*/
unsigned int sched_disable_delay_ms;
/**
- * @sched_disable_gucid_threshold: threshold of min remaining available
- * guc_ids before we start bypassing the schedule disable delay
+ * @submission_state.sched_disable_gucid_threshold:
+ * threshold of min remaining available guc_ids before
+ * we start bypassing the schedule disable delay
*/
unsigned int sched_disable_gucid_threshold;
} submission_state;
@@ -183,10 +204,10 @@ struct intel_guc {
struct guc_mmio_reg *ads_regset;
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
+ /** @ads_waklv_size: size of workaround KLVs */
+ u32 ads_waklv_size;
/** @ads_capture_size: size of register lists in the ADS used for error capture */
u32 ads_capture_size;
- /** @ads_engine_usage_size: size of engine usage in the ADS */
- u32 ads_engine_usage_size;
/** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */
struct i915_vma *lrc_desc_pool_v69;
@@ -228,49 +249,75 @@ struct intel_guc {
*/
struct {
/**
- * @lock: Lock protecting the below fields and the engine stats.
+ * @timestamp.lock: Lock protecting the below fields and
+ * the engine stats.
*/
spinlock_t lock;
/**
- * @gt_stamp: 64 bit extended value of the GT timestamp.
+ * @timestamp.gt_stamp: 64-bit extended value of the GT
+ * timestamp.
*/
u64 gt_stamp;
/**
- * @ping_delay: Period for polling the GT timestamp for
- * overflow.
+ * @timestamp.ping_delay: Period for polling the GT
+ * timestamp for overflow.
*/
unsigned long ping_delay;
/**
- * @work: Periodic work to adjust GT timestamp, engine and
- * context usage for overflows.
+ * @timestamp.work: Periodic work to adjust GT timestamp,
+ * engine and context usage for overflows.
*/
struct delayed_work work;
/**
- * @shift: Right shift value for the gpm timestamp
+ * @timestamp.shift: Right shift value for the gpm timestamp
*/
u32 shift;
/**
- * @last_stat_jiffies: jiffies at last actual stats collection time
- * We use this timestamp to ensure we don't oversample the
- * stats because runtime power management events can trigger
- * stats collection at much higher rates than required.
+ * @timestamp.last_stat_jiffies: jiffies at last actual
+ * stats collection time. We use this timestamp to ensure
+ * we don't oversample the stats because runtime power
+ * management events can trigger stats collection at much
+ * higher rates than required.
*/
unsigned long last_stat_jiffies;
} timestamp;
+ /**
+ * @dead_guc_worker: Asynchronous worker thread for forcing a GuC reset.
+ * Specifically used when the G2H handler wants to issue a reset. Resets
+ * require flushing the G2H queue. So, the G2H processing itself must not
+ * trigger a reset directly. Instead, go via this worker.
+ */
+ struct work_struct dead_guc_worker;
+ /**
+ * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrence
+ * used to prevent a fundamentally broken system from continuously
+ * reloading the GuC.
+ */
+ unsigned long last_dead_guc_jiffies;
+
#ifdef CONFIG_DRM_I915_SELFTEST
/**
* @number_guc_id_stolen: The number of guc_ids that have been stolen
*/
int number_guc_id_stolen;
+ /**
+ * @fast_response_selftest: Backdoor to CT handler for fast response selftest
+ */
+ u32 fast_response_selftest;
#endif
};
+struct intel_guc_tlb_wait {
+ struct wait_queue_head wq;
+ bool busy;
+};
+
/*
* GuC version number components are only 8-bit, so converting to a 32bit 8.8.8
* integer works.
@@ -278,6 +325,7 @@ struct intel_guc {
#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch)
#define GUC_SUBMIT_VER(guc) MAKE_GUC_VER_STRUCT((guc)->submission_version)
+#define GUC_FIRMWARE_VER(guc) MAKE_GUC_VER_STRUCT((guc)->fw.file_selected.ver)
static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
{
@@ -473,6 +521,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
int intel_guc_error_capture_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
+int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action);
struct intel_engine_cs *
intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance);
@@ -496,4 +545,10 @@ void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p);
int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc);
+bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc);
+int intel_guc_invalidate_tlb_engines(struct intel_guc *guc);
+int intel_guc_invalidate_tlb_guc(struct intel_guc *guc);
+int intel_guc_tlb_invalidation_done(struct intel_guc *guc,
+ const u32 *payload, u32 len);
+void wake_up_all_tlb_invalidate(struct intel_guc *guc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 69ce06faf8cd..46fabbfc775e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -46,6 +46,10 @@
* +---------------------------------------+
* | padding |
* +---------------------------------------+ <== 4K aligned
+ * | w/a KLVs |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
* | capture lists |
* +---------------------------------------+
* | padding |
@@ -88,6 +92,11 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
return PAGE_ALIGN(guc->ads_golden_ctxt_size);
}
+static u32 guc_ads_waklv_size(struct intel_guc *guc)
+{
+ return PAGE_ALIGN(guc->ads_waklv_size);
+}
+
static u32 guc_ads_capture_size(struct intel_guc *guc)
{
return PAGE_ALIGN(guc->ads_capture_size);
@@ -113,7 +122,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
}
-static u32 guc_ads_capture_offset(struct intel_guc *guc)
+static u32 guc_ads_waklv_offset(struct intel_guc *guc)
{
u32 offset;
@@ -123,6 +132,16 @@ static u32 guc_ads_capture_offset(struct intel_guc *guc)
return PAGE_ALIGN(offset);
}
+static u32 guc_ads_capture_offset(struct intel_guc *guc)
+{
+ u32 offset;
+
+ offset = guc_ads_waklv_offset(guc) +
+ guc_ads_waklv_size(guc);
+
+ return PAGE_ALIGN(offset);
+}
+
static u32 guc_ads_private_data_offset(struct intel_guc *guc)
{
u32 offset;
@@ -377,8 +396,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
CCS_MASK(engine->gt))
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true);
+ /*
+ * some of the WA registers are MCR registers. As it is safe to
+ * use MCR form for non-MCR registers, for code simplicity, all
+ * WA registers are added with MCR form.
+ */
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- ret |= GUC_MMIO_REG_ADD(gt, regset, wa->reg, wa->masked_reg);
+ ret |= GUC_MCR_REG_ADD(gt, regset, wa->mcr_reg, wa->masked_reg);
/* Be extra paranoid and include all whitelist registers. */
for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
@@ -388,19 +412,19 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
/* add in local MOCS registers */
for (i = 0; i < LNCFCMOCS_REG_COUNT; i++)
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false);
else
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
if (GRAPHICS_VER(engine->i915) >= 12) {
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL0)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL1)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL2)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL3)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL4)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL5)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL6)), false);
}
return ret ? -1 : 0;
@@ -498,7 +522,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt,
#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
#define XEHP_LR_HW_CONTEXT_SIZE (96 * sizeof(u32))
-#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50) ? \
+#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55) ? \
XEHP_LR_HW_CONTEXT_SIZE : \
LR_HW_CONTEXT_SIZE)
#define LRC_SKIP_SIZE(i915) (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SZ(i915))
@@ -643,6 +667,39 @@ static void guc_init_golden_context(struct intel_guc *guc)
GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
}
+static u32 guc_get_capture_engine_mask(struct iosys_map *info_map, u32 capture_class)
+{
+ u32 mask;
+
+ switch (capture_class) {
+ case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
+ mask = info_map_read(info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
+ mask |= info_map_read(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
+ break;
+
+ case GUC_CAPTURE_LIST_CLASS_VIDEO:
+ mask = info_map_read(info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
+ break;
+
+ case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
+ mask = info_map_read(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
+ break;
+
+ case GUC_CAPTURE_LIST_CLASS_BLITTER:
+ mask = info_map_read(info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
+ break;
+
+ case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
+ mask = info_map_read(info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
+ break;
+
+ default:
+ mask = 0;
+ }
+
+ return mask;
+}
+
static int
guc_capture_prep_lists(struct intel_guc *guc)
{
@@ -678,9 +735,10 @@ guc_capture_prep_lists(struct intel_guc *guc)
for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
+ u32 engine_mask = guc_get_capture_engine_mask(&info_map, j);
/* null list if we dont have said engine or list */
- if (!info_map_read(&info_map, engine_enabled_masks[j])) {
+ if (!engine_mask) {
if (ads_is_mapped) {
ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
@@ -757,6 +815,73 @@ engine_instance_list:
return PAGE_ALIGN(total_size);
}
+static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id)
+{
+ u32 size;
+ u32 klv_entry[] = {
+ /* 16:16 key/length */
+ FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
+ FIELD_PREP(GUC_KLV_0_LEN, 0),
+ /* 0 dwords data */
+ };
+
+ size = sizeof(klv_entry);
+ GEM_BUG_ON(*remain < size);
+
+ iosys_map_memcpy_to(&guc->ads_map, *offset, klv_entry, size);
+ *offset += size;
+ *remain -= size;
+}
+
+static void guc_waklv_init(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ u32 offset, addr_ggtt, remain, size;
+
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ return;
+
+ if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0))
+ return;
+
+ GEM_BUG_ON(iosys_map_is_null(&guc->ads_map));
+ offset = guc_ads_waklv_offset(guc);
+ remain = guc_ads_waklv_size(guc);
+
+ /* Wa_14019159160 */
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
+ guc_waklv_enable_simple(guc, &offset, &remain,
+ GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE);
+ guc_waklv_enable_simple(guc, &offset, &remain,
+ GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE);
+ }
+
+ /* Wa_16021333562 */
+ if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) &&
+ (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) ||
+ IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) ||
+ IS_DG2(gt->i915)))
+ guc_waklv_enable_simple(guc, &offset, &remain,
+ GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
+
+ size = guc_ads_waklv_size(guc) - remain;
+ if (!size)
+ return;
+
+ offset = guc_ads_waklv_offset(guc);
+ addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
+
+ ads_blob_write(guc, ads.wa_klv_addr_lo, addr_ggtt);
+ ads_blob_write(guc, ads.wa_klv_addr_hi, 0);
+ ads_blob_write(guc, ads.wa_klv_size, size);
+}
+
+static int guc_prep_waklv(struct intel_guc *guc)
+{
+ /* Fudge something chunky for now: */
+ return PAGE_SIZE;
+}
+
static void __guc_ads_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -804,6 +929,9 @@ static void __guc_ads_init(struct intel_guc *guc)
/* MMIO save/restore list */
guc_mmio_reg_state_init(guc);
+ /* Workaround KLV list */
+ guc_waklv_init(guc);
+
/* Private Data */
ads_blob_write(guc, ads.private_data, base +
guc_ads_private_data_offset(guc));
@@ -847,6 +975,12 @@ int intel_guc_ads_create(struct intel_guc *guc)
return ret;
guc->ads_capture_size = ret;
+ /* And don't forget the workaround KLVs: */
+ ret = guc_prep_waklv(guc);
+ if (ret < 0)
+ return ret;
+ guc->ads_waklv_size = ret;
+
/* Now the total size can be determined: */
size = guc_ads_blob_size(guc);
@@ -922,7 +1056,7 @@ u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine)
{
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u8 guc_class = engine_class_to_guc_class(engine->class);
size_t offset = offsetof(struct __guc_ads_blob,
engine_usage.engines[guc_class][ilog2(engine->logical_mask)]);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index fc3b994626a4..9547fff672bd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -15,6 +15,7 @@
#include "guc_capture_fwif.h"
#include "intel_guc_capture.h"
#include "intel_guc_fwif.h"
+#include "intel_guc_print.h"
#include "i915_drv.h"
#include "i915_gpu_error.h"
#include "i915_irq.h"
@@ -29,13 +30,15 @@
#define COMMON_BASE_GLOBAL \
{ FORCEWAKE_MT, 0, 0, "FORCEWAKE" }
-#define COMMON_GEN9BASE_GLOBAL \
- { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \
- { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" }, \
+#define COMMON_GEN8BASE_GLOBAL \
{ ERROR_GEN6, 0, 0, "ERROR_GEN6" }, \
{ DONE_REG, 0, 0, "DONE_REG" }, \
{ HSW_GTT_CACHE_EN, 0, 0, "HSW_GTT_CACHE_EN" }
+#define GEN8_GLOBAL \
+ { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \
+ { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" }
+
#define COMMON_GEN12BASE_GLOBAL \
{ GEN12_FAULT_TLB_DATA0, 0, 0, "GEN12_FAULT_TLB_DATA0" }, \
{ GEN12_FAULT_TLB_DATA1, 0, 0, "GEN12_FAULT_TLB_DATA1" }, \
@@ -48,6 +51,7 @@
{ RING_ESR(0), 0, 0, "ESR" }, \
{ RING_DMA_FADD(0), 0, 0, "RING_DMA_FADD_LDW" }, \
{ RING_DMA_FADD_UDW(0), 0, 0, "RING_DMA_FADD_UDW" }, \
+ { RING_EIR(0), 0, 0, "EIR" }, \
{ RING_IPEIR(0), 0, 0, "IPEIR" }, \
{ RING_IPEHR(0), 0, 0, "IPEHR" }, \
{ RING_INSTPS(0), 0, 0, "INSTPS" }, \
@@ -77,9 +81,6 @@
{ GEN8_RING_PDP_LDW(0, 3), 0, 0, "PDP3_LDW" }, \
{ GEN8_RING_PDP_UDW(0, 3), 0, 0, "PDP3_UDW" }
-#define COMMON_BASE_HAS_EU \
- { EIR, 0, 0, "EIR" }
-
#define COMMON_BASE_RENDER \
{ GEN7_SC_INSTDONE, 0, 0, "GEN7_SC_INSTDONE" }
@@ -93,66 +94,63 @@
{ GEN12_SFC_DONE(2), 0, 0, "SFC_DONE[2]" }, \
{ GEN12_SFC_DONE(3), 0, 0, "SFC_DONE[3]" }
-/* XE_LPD - Global */
-static const struct __guc_mmio_reg_descr xe_lpd_global_regs[] = {
+/* XE_LP Global */
+static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
COMMON_BASE_GLOBAL,
- COMMON_GEN9BASE_GLOBAL,
+ COMMON_GEN8BASE_GLOBAL,
COMMON_GEN12BASE_GLOBAL,
};
-/* XE_LPD - Render / Compute Per-Class */
-static const struct __guc_mmio_reg_descr xe_lpd_rc_class_regs[] = {
- COMMON_BASE_HAS_EU,
+/* XE_LP Render / Compute Per-Class */
+static const struct __guc_mmio_reg_descr xe_lp_rc_class_regs[] = {
COMMON_BASE_RENDER,
COMMON_GEN12BASE_RENDER,
};
-/* GEN9/XE_LPD - Render / Compute Per-Engine-Instance */
-static const struct __guc_mmio_reg_descr xe_lpd_rc_inst_regs[] = {
+/* GEN8+ Render / Compute Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr gen8_rc_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
};
-/* GEN9/XE_LPD - Media Decode/Encode Per-Engine-Instance */
-static const struct __guc_mmio_reg_descr xe_lpd_vd_inst_regs[] = {
+/* GEN8+ Media Decode/Encode Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr gen8_vd_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
};
-/* XE_LPD - Video Enhancement Per-Class */
-static const struct __guc_mmio_reg_descr xe_lpd_vec_class_regs[] = {
+/* XE_LP Video Enhancement Per-Class */
+static const struct __guc_mmio_reg_descr xe_lp_vec_class_regs[] = {
COMMON_GEN12BASE_VEC,
};
-/* GEN9/XE_LPD - Video Enhancement Per-Engine-Instance */
-static const struct __guc_mmio_reg_descr xe_lpd_vec_inst_regs[] = {
+/* GEN8+ Video Enhancement Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr gen8_vec_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
};
-/* GEN9/XE_LPD - Blitter Per-Engine-Instance */
-static const struct __guc_mmio_reg_descr xe_lpd_blt_inst_regs[] = {
+/* GEN8+ Blitter Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr gen8_blt_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
};
-/* XE_LPD - GSC Per-Engine-Instance */
-static const struct __guc_mmio_reg_descr xe_lpd_gsc_inst_regs[] = {
+/* XE_LP - GSC Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
};
-/* GEN9 - Global */
-static const struct __guc_mmio_reg_descr default_global_regs[] = {
+/* GEN8 - Global */
+static const struct __guc_mmio_reg_descr gen8_global_regs[] = {
COMMON_BASE_GLOBAL,
- COMMON_GEN9BASE_GLOBAL,
+ COMMON_GEN8BASE_GLOBAL,
+ GEN8_GLOBAL,
};
-static const struct __guc_mmio_reg_descr default_rc_class_regs[] = {
- COMMON_BASE_HAS_EU,
+static const struct __guc_mmio_reg_descr gen8_rc_class_regs[] = {
COMMON_BASE_RENDER,
};
/*
- * Empty lists:
- * GEN9/XE_LPD - Blitter Per-Class
- * GEN9/XE_LPD - Media Decode/Encode Per-Class
- * GEN9 - VEC Class
+ * Empty list to prevent warnings about unknown class/instance types
+ * as not all class/instanace types have entries on all platforms.
*/
static const struct __guc_mmio_reg_descr empty_regs_list[] = {
};
@@ -170,37 +168,33 @@ static const struct __guc_mmio_reg_descr empty_regs_list[] = {
}
/* List of lists */
-static const struct __guc_mmio_reg_descr_group default_lists[] = {
- MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0),
- MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
- MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
- MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
- MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
- MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
- MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
- MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
- MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS),
- MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS),
- MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS),
- MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_GSC_OTHER_CLASS),
- MAKE_REGLIST(xe_lpd_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS),
+static const struct __guc_mmio_reg_descr_group gen8_lists[] = {
+ MAKE_REGLIST(gen8_global_regs, PF, GLOBAL, 0),
+ MAKE_REGLIST(gen8_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(gen8_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(gen8_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(gen8_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
{}
};
-static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = {
- MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0),
- MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
- MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
- MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
- MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
- MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
- MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
- MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
- MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS),
- MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS),
- MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS),
- MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_GSC_OTHER_CLASS),
- MAKE_REGLIST(xe_lpd_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS),
+static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = {
+ MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
+ MAKE_REGLIST(xe_lp_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(gen8_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_lp_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(gen8_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(gen8_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
{}
};
@@ -256,11 +250,15 @@ struct __ext_steer_reg {
i915_mcr_reg_t reg;
};
-static const struct __ext_steer_reg xe_extregs[] = {
+static const struct __ext_steer_reg gen8_extregs[] = {
{"GEN8_SAMPLER_INSTDONE", GEN8_SAMPLER_INSTDONE},
{"GEN8_ROW_INSTDONE", GEN8_ROW_INSTDONE}
};
+static const struct __ext_steer_reg xehpg_extregs[] = {
+ {"XEHPG_INSTDONE_GEOM_SVG", XEHPG_INSTDONE_GEOM_SVG}
+};
+
static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
const struct __ext_steer_reg *extlist,
int slice_id, int subslice_id)
@@ -291,87 +289,30 @@ __alloc_ext_regs(struct __guc_mmio_reg_descr_group *newlist,
}
static void
-guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc,
- const struct __guc_mmio_reg_descr_group *lists)
+guc_capture_alloc_steered_lists(struct intel_guc *guc,
+ const struct __guc_mmio_reg_descr_group *lists)
{
struct intel_gt *gt = guc_to_gt(guc);
int slice, subslice, iter, i, num_steer_regs, num_tot_regs = 0;
const struct __guc_mmio_reg_descr_group *list;
struct __guc_mmio_reg_descr_group *extlists;
struct __guc_mmio_reg_descr *extarray;
- struct sseu_dev_info *sseu;
+ bool has_xehpg_extregs;
- /* In XE_LPD we only have steered registers for the render-class */
+ /* steered registers currently only exist for the render-class */
list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
- GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS);
+ GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+ GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE);
/* skip if extlists was previously allocated */
if (!list || guc->capture->extlists)
return;
- num_steer_regs = ARRAY_SIZE(xe_extregs);
-
- sseu = &gt->info.sseu;
- for_each_ss_steering(iter, gt, slice, subslice)
- num_tot_regs += num_steer_regs;
-
- if (!num_tot_regs)
- return;
-
- /* allocate an extra for an end marker */
- extlists = kcalloc(2, sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
- if (!extlists)
- return;
+ has_xehpg_extregs = GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 55);
- if (__alloc_ext_regs(&extlists[0], list, num_tot_regs)) {
- kfree(extlists);
- return;
- }
-
- extarray = extlists[0].extlist;
- for_each_ss_steering(iter, gt, slice, subslice) {
- for (i = 0; i < num_steer_regs; ++i) {
- __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
- ++extarray;
- }
- }
-
- guc->capture->extlists = extlists;
-}
-
-static const struct __ext_steer_reg xehpg_extregs[] = {
- {"XEHPG_INSTDONE_GEOM_SVG", XEHPG_INSTDONE_GEOM_SVG}
-};
-
-static bool __has_xehpg_extregs(u32 ipver)
-{
- return (ipver >= IP_VER(12, 55));
-}
-
-static void
-guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc,
- const struct __guc_mmio_reg_descr_group *lists,
- u32 ipver)
-{
- struct intel_gt *gt = guc_to_gt(guc);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
- struct sseu_dev_info *sseu;
- int slice, subslice, i, iter, num_steer_regs, num_tot_regs = 0;
- const struct __guc_mmio_reg_descr_group *list;
- struct __guc_mmio_reg_descr_group *extlists;
- struct __guc_mmio_reg_descr *extarray;
-
- /* In XE_LP / HPG we only have render-class steering registers during error-capture */
- list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
- GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS);
- /* skip if extlists was previously allocated */
- if (!list || guc->capture->extlists)
- return;
-
- num_steer_regs = ARRAY_SIZE(xe_extregs);
- if (__has_xehpg_extregs(ipver))
+ num_steer_regs = ARRAY_SIZE(gen8_extregs);
+ if (has_xehpg_extregs)
num_steer_regs += ARRAY_SIZE(xehpg_extregs);
- sseu = &gt->info.sseu;
for_each_ss_steering(iter, gt, slice, subslice)
num_tot_regs += num_steer_regs;
@@ -390,11 +331,12 @@ guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc,
extarray = extlists[0].extlist;
for_each_ss_steering(iter, gt, slice, subslice) {
- for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
- __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
+ for (i = 0; i < ARRAY_SIZE(gen8_extregs); ++i) {
+ __fill_ext_reg(extarray, &gen8_extregs[i], slice, subslice);
++extarray;
}
- if (__has_xehpg_extregs(ipver)) {
+
+ if (has_xehpg_extregs) {
for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
__fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
++extarray;
@@ -402,34 +344,30 @@ guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc,
}
}
- drm_dbg(&i915->drm, "GuC-capture found %d-ext-regs.\n", num_tot_regs);
+ guc_dbg(guc, "capture found %d ext-regs.\n", num_tot_regs);
guc->capture->extlists = extlists;
}
static const struct __guc_mmio_reg_descr_group *
guc_capture_get_device_reglist(struct intel_guc *guc)
{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
-
- if (GRAPHICS_VER(i915) > 11) {
- /*
- * For certain engine classes, there are slice and subslice
- * level registers requiring steering. We allocate and populate
- * these at init time based on hw config add it as an extension
- * list at the end of the pre-populated render list.
- */
- if (IS_DG2(i915))
- guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 55));
- else if (IS_XEHPSDV(i915))
- guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 50));
- else
- guc_capture_alloc_steered_lists_xe_lpd(guc, xe_lpd_lists);
+ struct drm_i915_private *i915 = guc_to_i915(guc);
+ const struct __guc_mmio_reg_descr_group *lists;
- return xe_lpd_lists;
- }
+ if (GRAPHICS_VER(i915) >= 12)
+ lists = xe_lp_lists;
+ else
+ lists = gen8_lists;
- /* if GuC submission is enabled on a non-POR platform, just use a common baseline */
- return default_lists;
+ /*
+ * For certain engine classes, there are slice and subslice
+ * level registers requiring steering. We allocate and populate
+ * these at init time based on hw config add it as an extension
+ * list at the end of the pre-populated render list.
+ */
+ guc_capture_alloc_steered_lists(guc, lists);
+
+ return lists;
}
static const char *
@@ -453,17 +391,15 @@ static const char *
__stringify_engclass(u32 class)
{
switch (class) {
- case GUC_RENDER_CLASS:
- return "Render";
- case GUC_VIDEO_CLASS:
+ case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
+ return "Render/Compute";
+ case GUC_CAPTURE_LIST_CLASS_VIDEO:
return "Video";
- case GUC_VIDEOENHANCE_CLASS:
+ case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
return "VideoEnhance";
- case GUC_BLITTER_CLASS:
+ case GUC_CAPTURE_LIST_CLASS_BLITTER:
return "Blitter";
- case GUC_COMPUTE_CLASS:
- return "Compute";
- case GUC_GSC_OTHER_CLASS:
+ case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
return "GSC-Other";
default:
break;
@@ -477,7 +413,6 @@ guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
struct guc_mmio_reg *ptr, u16 num_entries)
{
u32 i = 0, j = 0;
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
const struct __guc_mmio_reg_descr_group *match;
@@ -509,8 +444,7 @@ guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
}
}
if (i < num_entries)
- drm_dbg(&i915->drm, "GuC-capture: Init reglist short %d out %d.\n",
- (int)i, (int)num_entries);
+ guc_dbg(guc, "Got short capture reglist init: %d out %d.\n", i, num_entries);
return 0;
}
@@ -540,12 +474,11 @@ guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
size_t *size, bool is_purpose_est)
{
struct intel_guc_state_capture *gc = guc->capture;
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
int num_regs;
if (!gc->reglists) {
- drm_warn(&i915->drm, "GuC-capture: No reglist on this device\n");
+ guc_warn(guc, "No capture reglist for this device\n");
return -ENODEV;
}
@@ -557,9 +490,9 @@ guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
!guc_capture_get_one_list(gc->reglists, owner, type, classid)) {
if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL)
- drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist Global!\n");
+ guc_warn(guc, "Missing capture reglist: global!\n");
else
- drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist %s(%u):%s(%u)!\n",
+ guc_warn(guc, "Missing capture reglist: %s(%u):%s(%u)!\n",
__stringify_type(type), type,
__stringify_engclass(classid), classid);
return -ENODATA;
@@ -592,7 +525,6 @@ intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classi
{
struct intel_guc_state_capture *gc = guc->capture;
struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
struct guc_debug_capture_list *listnode;
int ret, num_regs;
u8 *caplist, *tmp;
@@ -623,7 +555,7 @@ intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classi
caplist = kzalloc(size, GFP_KERNEL);
if (!caplist) {
- drm_dbg(&i915->drm, "GuC-capture: failed to alloc cached caplist");
+ guc_dbg(guc, "Failed to alloc cached register capture list");
return -ENOMEM;
}
@@ -653,7 +585,6 @@ intel_guc_capture_getnullheader(struct intel_guc *guc,
void **outptr, size_t *size)
{
struct intel_guc_state_capture *gc = guc->capture;
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
int tmp = sizeof(u32) * 4;
void *null_header;
@@ -665,7 +596,7 @@ intel_guc_capture_getnullheader(struct intel_guc *guc,
null_header = kzalloc(tmp, GFP_KERNEL);
if (!null_header) {
- drm_dbg(&i915->drm, "GuC-capture: failed to alloc cached nulllist");
+ guc_dbg(guc, "Failed to alloc cached register capture null list");
return -ENOMEM;
}
@@ -727,7 +658,6 @@ guc_capture_output_min_size_est(struct intel_guc *guc)
static void check_guc_capture_size(struct intel_guc *guc)
{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
int min_size = guc_capture_output_min_size_est(guc);
int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
u32 buffer_size = intel_guc_log_section_size_capture(&guc->log);
@@ -741,13 +671,13 @@ static void check_guc_capture_size(struct intel_guc *guc)
* INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
*/
if (min_size < 0)
- drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n",
+ guc_warn(guc, "Failed to calculate error state capture buffer minimum size: %d!\n",
min_size);
else if (min_size > buffer_size)
- drm_warn(&i915->drm, "GuC error state capture buffer maybe small: %d < %d\n",
+ guc_warn(guc, "Error state capture buffer maybe small: %d < %d\n",
buffer_size, min_size);
else if (spare_size > buffer_size)
- drm_dbg(&i915->drm, "GuC error state capture buffer lacks spare size: %d < %d (min = %d)\n",
+ guc_dbg(guc, "Error state capture buffer lacks spare size: %d < %d (min = %d)\n",
buffer_size, spare_size, min_size);
}
@@ -848,7 +778,6 @@ static int
guc_capture_log_remove_dw(struct intel_guc *guc, struct __guc_capture_bufstate *buf,
u32 *dw)
{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
int tries = 2;
int avail = 0;
u32 *src_data;
@@ -865,7 +794,7 @@ guc_capture_log_remove_dw(struct intel_guc *guc, struct __guc_capture_bufstate *
return 4;
}
if (avail)
- drm_dbg(&i915->drm, "GuC-Cap-Logs not dword aligned, skipping.\n");
+ guc_dbg(guc, "Register capture log not dword aligned, skipping.\n");
buf->rd = 0;
}
@@ -1118,13 +1047,12 @@ static void
__guc_capture_create_prealloc_nodes(struct intel_guc *guc)
{
struct __guc_capture_parsed_output *node = NULL;
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
int i;
for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
node = guc_capture_alloc_one_node(guc);
if (!node) {
- drm_warn(&i915->drm, "GuC Capture pre-alloc-cache failure\n");
+ guc_warn(guc, "Register capture pre-alloc-cache failure\n");
/* dont free the priors, use what we got and cleanup at shutdown */
return;
}
@@ -1169,9 +1097,8 @@ guc_capture_create_prealloc_nodes(struct intel_guc *guc)
static int
guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstate *buf)
{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
- struct guc_state_capture_group_header_t ghdr = {0};
- struct guc_state_capture_header_t hdr = {0};
+ struct guc_state_capture_group_header_t ghdr = {};
+ struct guc_state_capture_header_t hdr = {};
struct __guc_capture_parsed_output *node = NULL;
struct guc_mmio_reg *regs = NULL;
int i, numlists, numregs, ret = 0;
@@ -1183,7 +1110,7 @@ guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstat
if (!i)
return -ENODATA;
if (i % sizeof(u32)) {
- drm_warn(&i915->drm, "GuC Capture new entries unaligned\n");
+ guc_warn(guc, "Got mis-aligned register capture entries\n");
ret = -EIO;
goto bailout;
}
@@ -1301,7 +1228,7 @@ guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstat
break;
}
if (datatype != GUC_CAPTURE_LIST_TYPE_GLOBAL)
- drm_dbg(&i915->drm, "GuC Capture missing global dump: %08x!\n",
+ guc_dbg(guc, "Register capture missing global dump: %08x!\n",
datatype);
}
node->is_partial = is_partial;
@@ -1322,7 +1249,7 @@ guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstat
numregs = FIELD_GET(CAP_HDR_NUM_MMIOS, hdr.num_mmios);
if (numregs > guc->capture->max_mmio_per_node) {
- drm_dbg(&i915->drm, "GuC Capture list extraction clipped by prealloc!\n");
+ guc_dbg(guc, "Register capture list extraction clipped by prealloc!\n");
numregs = guc->capture->max_mmio_per_node;
}
node->reginfo[datatype].num_regs = numregs;
@@ -1367,7 +1294,6 @@ static void __guc_capture_process_output(struct intel_guc *guc)
{
unsigned int buffer_size, read_offset, write_offset, full_count;
struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
struct guc_log_buffer_state log_buf_state_local;
struct guc_log_buffer_state *log_buf_state;
struct __guc_capture_bufstate buf;
@@ -1403,7 +1329,8 @@ static void __guc_capture_process_output(struct intel_guc *guc)
write_offset = buffer_size;
} else if (unlikely((read_offset > buffer_size) ||
(write_offset > buffer_size))) {
- drm_err(&i915->drm, "invalid GuC log capture buffer state!\n");
+ guc_err(guc, "Register capture buffer in invalid state: read = 0x%X, size = 0x%X!\n",
+ read_offset, buffer_size);
/* copy whole buffer as offsets are unreliable */
read_offset = 0;
write_offset = buffer_size;
@@ -1510,7 +1437,7 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf,
if (!cap || !ee->engine)
return -ENODEV;
- guc = &ee->engine->gt->uc.guc;
+ guc = gt_to_guc(ee->engine->gt);
i915_error_printf(ebuf, "global --- GuC Error Capture on %s command stream:\n",
ee->engine->name);
@@ -1571,6 +1498,27 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf,
#endif //CONFIG_DRM_I915_CAPTURE_ERROR
+static void guc_capture_find_ecode(struct intel_engine_coredump *ee)
+{
+ struct gcap_reg_list_info *reginfo;
+ struct guc_mmio_reg *regs;
+ i915_reg_t reg_ipehr = RING_IPEHR(0);
+ i915_reg_t reg_instdone = RING_INSTDONE(0);
+ int i;
+
+ if (!ee->guc_capture_node)
+ return;
+
+ reginfo = ee->guc_capture_node->reginfo + GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE;
+ regs = reginfo->regs;
+ for (i = 0; i < reginfo->num_regs; i++) {
+ if (regs[i].offset == reg_ipehr.reg)
+ ee->ipehr = regs[i].value;
+ else if (regs[i].offset == reg_instdone.reg)
+ ee->instdone.instdone = regs[i].value;
+ }
+}
+
void intel_guc_capture_free_node(struct intel_engine_coredump *ee)
{
if (!ee || !ee->guc_capture_node)
@@ -1581,23 +1529,52 @@ void intel_guc_capture_free_node(struct intel_engine_coredump *ee)
ee->guc_capture_node = NULL;
}
+bool intel_guc_capture_is_matching_engine(struct intel_gt *gt,
+ struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ struct __guc_capture_parsed_output *n;
+ struct intel_guc *guc;
+
+ if (!gt || !ce || !engine)
+ return false;
+
+ guc = gt_to_guc(gt);
+ if (!guc->capture)
+ return false;
+
+ /*
+ * Look for a matching GuC reported error capture node from
+ * the internal output link-list based on lrca, guc-id and engine
+ * identification.
+ */
+ list_for_each_entry(n, &guc->capture->outlist, link) {
+ if (n->eng_inst == GUC_ID_TO_ENGINE_INSTANCE(engine->guc_id) &&
+ n->eng_class == GUC_ID_TO_ENGINE_CLASS(engine->guc_id) &&
+ n->guc_id == ce->guc_id.id &&
+ (n->lrca & CTX_GTT_ADDRESS_MASK) == (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
+ return true;
+ }
+
+ return false;
+}
+
void intel_guc_capture_get_matching_node(struct intel_gt *gt,
struct intel_engine_coredump *ee,
struct intel_context *ce)
{
struct __guc_capture_parsed_output *n, *ntmp;
- struct drm_i915_private *i915;
struct intel_guc *guc;
if (!gt || !ee || !ce)
return;
- i915 = gt->i915;
- guc = &gt->uc.guc;
+ guc = gt_to_guc(gt);
if (!guc->capture)
return;
GEM_BUG_ON(ee->guc_capture_node);
+
/*
* Look for a matching GuC reported error capture node from
* the internal output link-list based on lrca, guc-id and engine
@@ -1606,16 +1583,18 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt,
list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
if (n->eng_inst == GUC_ID_TO_ENGINE_INSTANCE(ee->engine->guc_id) &&
n->eng_class == GUC_ID_TO_ENGINE_CLASS(ee->engine->guc_id) &&
- n->guc_id && n->guc_id == ce->guc_id.id &&
- (n->lrca & CTX_GTT_ADDRESS_MASK) && (n->lrca & CTX_GTT_ADDRESS_MASK) ==
- (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) {
+ n->guc_id == ce->guc_id.id &&
+ (n->lrca & CTX_GTT_ADDRESS_MASK) == (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) {
list_del(&n->link);
ee->guc_capture_node = n;
ee->guc_capture = guc->capture;
+ guc_capture_find_ecode(ee);
return;
}
}
- drm_dbg(&i915->drm, "GuC capture can't match ee to node\n");
+
+ guc_warn(guc, "No register capture node found for 0x%04X / 0x%08X\n",
+ ce->guc_id.id, ce->lrc.lrca);
}
void intel_guc_capture_process(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
index fbd3713c7832..302256d45431 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
@@ -11,6 +11,7 @@
struct drm_i915_error_state_buf;
struct guc_gt_system_info;
struct intel_engine_coredump;
+struct intel_engine_cs;
struct intel_context;
struct intel_gt;
struct intel_guc;
@@ -20,6 +21,8 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m,
const struct intel_engine_coredump *ee);
void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee,
struct intel_context *ce);
+bool intel_guc_capture_is_matching_engine(struct intel_gt *gt, struct intel_context *ce,
+ struct intel_engine_cs *engine);
void intel_guc_capture_process(struct intel_guc *guc);
int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
void **outptr);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 1803a633ed64..2c651ec024ef 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -5,14 +5,39 @@
#include <linux/circ_buf.h>
#include <linux/ktime.h>
-#include <linux/time64.h>
#include <linux/string_helpers.h>
+#include <linux/time64.h>
#include <linux/timekeeping.h>
#include "i915_drv.h"
+#include "i915_wait_util.h"
#include "intel_guc_ct.h"
#include "intel_guc_print.h"
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+enum {
+ CT_DEAD_ALIVE = 0,
+ CT_DEAD_SETUP,
+ CT_DEAD_WRITE,
+ CT_DEAD_DEADLOCK,
+ CT_DEAD_H2G_HAS_ROOM,
+ CT_DEAD_READ,
+ CT_DEAD_PROCESS_FAILED,
+};
+
+static void ct_dead_ct_worker_func(struct work_struct *w);
+
+#define CT_DEAD(ct, reason) \
+ do { \
+ if (!(ct)->dead_ct_reported) { \
+ (ct)->dead_ct_reason |= 1 << CT_DEAD_##reason; \
+ queue_work(system_unbound_wq, &(ct)->dead_ct_worker); \
+ } \
+ } while (0)
+#else
+#define CT_DEAD(ct, reason) do { } while (0)
+#endif
+
static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
{
return container_of(ct, struct intel_guc, ct);
@@ -72,13 +97,40 @@ struct ct_request {
struct ct_incoming_msg {
struct list_head link;
u32 size;
- u32 msg[];
+ u32 msg[] __counted_by(size);
};
enum { CTB_SEND = 0, CTB_RECV = 1 };
enum { CTB_OWNER_HOST = 0 };
+/*
+ * Some H2G commands involve a synchronous response that the driver needs
+ * to wait for. In such cases, a timeout is required to prevent the driver
+ * from waiting forever in the case of an error (either no error response
+ * is defined in the protocol or something has died and requires a reset).
+ * The specific command may be defined as having a time bound response but
+ * the CT is a queue and that time guarantee only starts from the point
+ * when the command reaches the head of the queue and is processed by GuC.
+ *
+ * Ideally there would be a helper to report the progress of a given
+ * command through the CT. However, that would require a significant
+ * amount of work in the CT layer. In the meantime, provide a reasonable
+ * estimation of the worst case latency it should take for the entire
+ * queue to drain. And therefore, how long a caller should wait before
+ * giving up on their request. The current estimate is based on empirical
+ * measurement of a test that fills the buffer with context creation and
+ * destruction requests as they seem to be the slowest operation.
+ */
+long intel_guc_ct_max_queue_time_jiffies(void)
+{
+ /*
+ * A 4KB buffer full of context destroy commands takes a little
+ * over a second to process so bump that to 2s to be super safe.
+ */
+ return (CTB_H2G_BUFFER_SIZE * HZ) / SZ_2K;
+}
+
static void ct_receive_tasklet_func(struct tasklet_struct *t);
static void ct_incoming_request_worker_func(struct work_struct *w);
@@ -93,6 +145,9 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
spin_lock_init(&ct->requests.lock);
INIT_LIST_HEAD(&ct->requests.pending);
INIT_LIST_HEAD(&ct->requests.incoming);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+ INIT_WORK(&ct->dead_ct_worker, ct_dead_ct_worker_func);
+#endif
INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func);
tasklet_setup(&ct->receive_tasklet, ct_receive_tasklet_func);
init_waitqueue_head(&ct->wq);
@@ -211,7 +266,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
u32 *cmds;
int err;
- err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO);
+ err = i915_inject_probe_error(guc_to_i915(guc), -ENXIO);
if (err)
return err;
@@ -319,11 +374,16 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
ct->enabled = true;
ct->stall_time = KTIME_MAX;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+ ct->dead_ct_reported = false;
+ ct->dead_ct_reason = CT_DEAD_ALIVE;
+#endif
return 0;
err_out:
CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
+ CT_DEAD(ct, SETUP);
return err;
}
@@ -344,6 +404,24 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
}
}
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static void ct_track_lost_and_found(struct intel_guc_ct *ct, u32 fence, u32 action)
+{
+ unsigned int lost = fence % ARRAY_SIZE(ct->requests.lost_and_found);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+ unsigned long entries[SZ_32];
+ unsigned int n;
+
+ n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+
+ /* May be called under spinlock, so avoid sleeping */
+ ct->requests.lost_and_found[lost].stack = stack_depot_save(entries, n, GFP_NOWAIT);
+#endif
+ ct->requests.lost_and_found[lost].fence = fence;
+ ct->requests.lost_and_found[lost].action = action;
+}
+#endif
+
static u32 ct_get_next_fence(struct intel_guc_ct *ct)
{
/* For now it's trivial */
@@ -394,11 +472,11 @@ static int ct_write(struct intel_guc_ct *ct,
FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence);
- type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT :
+ type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_FAST_REQUEST :
GUC_HXG_TYPE_REQUEST;
hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) |
- FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
- GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION |
+ GUC_HXG_REQUEST_MSG_0_DATA0, action[0]);
CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n",
tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]);
@@ -415,6 +493,11 @@ static int ct_write(struct intel_guc_ct *ct,
}
GEM_BUG_ON(tail > size);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+ ct_track_lost_and_found(ct, fence,
+ FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0]));
+#endif
+
/*
* make sure H2G buffer update and LRC tail update (if this triggering a
* submission) are visible before updating the descriptor tail
@@ -434,6 +517,7 @@ static int ct_write(struct intel_guc_ct *ct,
corrupted:
CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
desc->head, desc->tail, desc->status);
+ CT_DEAD(ct, WRITE);
ctb->broken = true;
return -EPIPE;
}
@@ -504,6 +588,7 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct)
CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
+ CT_DEAD(ct, DEADLOCK);
ct->ctbs.send.broken = true;
}
@@ -552,6 +637,7 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
head, ctb->size);
desc->status |= GUC_CTB_STATUS_OVERFLOW;
ctb->broken = true;
+ CT_DEAD(ct, H2G_HAS_ROOM);
return false;
}
@@ -640,7 +726,7 @@ static int ct_send(struct intel_guc_ct *ct,
GEM_BUG_ON(!ct->enabled);
GEM_BUG_ON(!len);
- GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+ GEM_BUG_ON(len > GUC_CTB_HXG_MSG_MAX_LEN - GUC_CTB_HDR_LEN);
GEM_BUG_ON(!response_buf && response_buf_size);
might_sleep();
@@ -902,15 +988,55 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
/* now update descriptor */
WRITE_ONCE(desc->head, head);
+ intel_guc_write_barrier(ct_to_guc(ct));
+
return available - len;
corrupted:
CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
desc->head, desc->tail, desc->status);
ctb->broken = true;
+ CT_DEAD(ct, READ);
return -EPIPE;
}
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+ unsigned int n;
+ char *buf = NULL;
+ bool found = false;
+
+ lockdep_assert_held(&ct->requests.lock);
+
+ for (n = 0; n < ARRAY_SIZE(ct->requests.lost_and_found); n++) {
+ if (ct->requests.lost_and_found[n].fence != fence)
+ continue;
+ found = true;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+ buf = kmalloc(SZ_4K, GFP_NOWAIT);
+ if (buf && stack_depot_snprint(ct->requests.lost_and_found[n].stack,
+ buf, SZ_4K, 0)) {
+ CT_ERROR(ct, "Fence %u was used by action %#04x sent at\n%s",
+ fence, ct->requests.lost_and_found[n].action, buf);
+ break;
+ }
+#endif
+ CT_ERROR(ct, "Fence %u was used by action %#04x\n",
+ fence, ct->requests.lost_and_found[n].action);
+ break;
+ }
+ kfree(buf);
+ return found;
+}
+#else
+static bool ct_check_lost_and_found(struct intel_guc_ct *ct, u32 fence)
+{
+ return false;
+}
+#endif
+
static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response)
{
u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]);
@@ -951,13 +1077,23 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
found = true;
break;
}
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+ if (!found && ct_to_guc(ct)->fast_response_selftest) {
+ CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n");
+ ct_to_guc(ct)->fast_response_selftest++;
+ found = true;
+ }
+#endif
+
if (!found) {
- CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence);
- CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence,
- ct->requests.last_fence);
- list_for_each_entry(req, &ct->requests.pending, link)
- CT_ERROR(ct, "request %u awaits response\n",
- req->fence);
+ CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
+ len, hxg[0], fence, ct->requests.last_fence);
+ if (!ct_check_lost_and_found(ct, fence)) {
+ list_for_each_entry(req, &ct->requests.pending, link)
+ CT_ERROR(ct, "request %u awaits response\n",
+ req->fence);
+ }
err = -ENOKEY;
}
spin_unlock_irqrestore(&ct->requests.lock, flags);
@@ -1013,12 +1149,11 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
ret = 0;
break;
case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
- CT_ERROR(ct, "Received GuC crash dump notification!\n");
- ret = 0;
- break;
case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
- CT_ERROR(ct, "Received GuC exception notification!\n");
- ret = 0;
+ ret = intel_guc_crash_process_msg(guc, action);
+ break;
+ case INTEL_GUC_ACTION_TLB_INVALIDATION_DONE:
+ ret = intel_guc_tlb_invalidation_done(guc, payload, len);
break;
default:
ret = -EOPNOTSUPP;
@@ -1057,6 +1192,7 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
if (unlikely(err)) {
CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n",
ERR_PTR(err), 4 * request->size, request->msg);
+ CT_DEAD(ct, PROCESS_FAILED);
ct_free_msg(request);
}
@@ -1090,9 +1226,17 @@ static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *requ
switch (action) {
case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ case INTEL_GUC_ACTION_TLB_INVALIDATION_DONE:
g2h_release_space(ct, request->size);
}
+ /*
+ * TLB invalidation responses must be handled immediately as processing
+ * of other G2H notifications may be blocked by an invalidation request.
+ */
+ if (action == INTEL_GUC_ACTION_TLB_INVALIDATION_DONE)
+ return ct_process_request(ct, request);
+
spin_lock_irqsave(&ct->requests.lock, flags);
list_add_tail(&request->link, &ct->requests.incoming);
spin_unlock_irqrestore(&ct->requests.lock, flags);
@@ -1181,9 +1325,16 @@ static int ct_receive(struct intel_guc_ct *ct)
static void ct_try_receive_message(struct intel_guc_ct *ct)
{
+ struct intel_guc *guc = ct_to_guc(ct);
int ret;
- if (GEM_WARN_ON(!ct->enabled))
+ if (!ct->enabled) {
+ GEM_WARN_ON(!guc_to_gt(guc)->uc.reset_in_progress);
+ return;
+ }
+
+ /* When interrupt disabled, message handling is not expected */
+ if (!guc->interrupts.enabled)
return;
ret = ct_receive(ct);
@@ -1233,3 +1384,22 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct,
drm_printf(p, "Tail: %u\n",
ct->ctbs.recv.desc->tail);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+static void ct_dead_ct_worker_func(struct work_struct *w)
+{
+ struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker);
+ struct intel_guc *guc = ct_to_guc(ct);
+
+ if (ct->dead_ct_reported)
+ return;
+
+ if (i915_error_injected())
+ return;
+
+ ct->dead_ct_reported = true;
+
+ guc_info(guc, "CTB is dead - reason=0x%X\n", ct->dead_ct_reason);
+ intel_klog_error_capture(guc_to_gt(guc), (intel_engine_mask_t)~0U);
+}
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index f709a19c7e21..e9a6ec4e6d38 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -8,6 +8,7 @@
#include <linux/interrupt.h>
#include <linux/spinlock.h>
+#include <linux/stackdepot.h>
#include <linux/workqueue.h>
#include <linux/ktime.h>
#include <linux/wait.h>
@@ -81,12 +82,30 @@ struct intel_guc_ct {
struct list_head incoming; /* incoming requests */
struct work_struct worker; /* handler for incoming requests */
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+ struct {
+ u16 fence;
+ u16 action;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
+ depot_stack_handle_t stack;
+#endif
+ } lost_and_found[SZ_16];
+#endif
} requests;
/** @stall_time: time of first time a CTB submission is stalled */
ktime_t stall_time;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+ int dead_ct_reason;
+ bool dead_ct_reported;
+ struct work_struct dead_ct_worker;
+#endif
};
+long intel_guc_ct_max_queue_time_jiffies(void);
+
void intel_guc_ct_init_early(struct intel_guc_ct *ct);
int intel_guc_ct_init(struct intel_guc_ct *ct);
void intel_guc_ct_fini(struct intel_guc_ct *ct);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 69133420c78b..b1bda1b84f0a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -12,9 +12,12 @@
#include "gt/intel_gt.h"
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
+#include "gt/intel_rps.h"
+
+#include "i915_drv.h"
+#include "i915_wait_util.h"
#include "intel_guc_fw.h"
#include "intel_guc_print.h"
-#include "i915_drv.h"
static void guc_prepare_xfer(struct intel_gt *gt)
{
@@ -25,7 +28,7 @@ static void guc_prepare_xfer(struct intel_gt *gt)
GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
GUC_ENABLE_MIA_CLOCK_GATING;
- if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 55))
shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
GUC_ENABLE_MIA_CACHING;
@@ -45,6 +48,14 @@ static void guc_prepare_xfer(struct intel_gt *gt)
/* allows for 5us (in 10ns units) before GT can go to RC6 */
intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
}
+
+ /*
+ * Starting from IP 12.50 we need to enable the mirroring of GuC
+ * internal state to debug registers. This is always enabled on previous
+ * IPs.
+ */
+ if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50))
+ intel_uncore_rmw(uncore, GUC_SHIM_CONTROL2, 0, GUC_ENABLE_DEBUG_REG);
}
static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw,
@@ -88,31 +99,82 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
/*
* Read the GuC status register (GUC_STATUS) and store it in the
* specified location; then return a boolean indicating whether
- * the value matches either of two values representing completion
- * of the GuC boot process.
+ * the value matches either completion or a known failure code.
*
* This is used for polling the GuC status in a wait_for()
* loop below.
*/
-static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
+static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool *success)
{
u32 val = intel_uncore_read(uncore, GUC_STATUS);
u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
+ u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, val);
*status = val;
- return uk_val == INTEL_GUC_LOAD_STATUS_READY;
+ switch (uk_val) {
+ case INTEL_GUC_LOAD_STATUS_READY:
+ *success = true;
+ return true;
+
+ case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH:
+ case INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH:
+ case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE:
+ case INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR:
+ case INTEL_GUC_LOAD_STATUS_DPC_ERROR:
+ case INTEL_GUC_LOAD_STATUS_EXCEPTION:
+ case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID:
+ case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID:
+ case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
+ *success = false;
+ return true;
+ }
+
+ switch (br_val) {
+ case INTEL_BOOTROM_STATUS_NO_KEY_FOUND:
+ case INTEL_BOOTROM_STATUS_RSA_FAILED:
+ case INTEL_BOOTROM_STATUS_PAVPC_FAILED:
+ case INTEL_BOOTROM_STATUS_WOPCM_FAILED:
+ case INTEL_BOOTROM_STATUS_LOADLOC_FAILED:
+ case INTEL_BOOTROM_STATUS_JUMP_FAILED:
+ case INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED:
+ case INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT:
+ case INTEL_BOOTROM_STATUS_EXCEPTION:
+ case INTEL_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
+ *success = false;
+ return true;
+ }
+
+ return false;
}
+/*
+ * Use a longer timeout for debug builds so that problems can be detected
+ * and analysed. But a shorter timeout for releases so that user's don't
+ * wait forever to find out there is a problem. Note that the only reason
+ * an end user should hit the timeout is in case of extreme thermal throttling.
+ * And a system that is that hot during boot is probably dead anyway!
+ */
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+#define GUC_LOAD_RETRY_LIMIT 20
+#else
+#define GUC_LOAD_RETRY_LIMIT 3
+#endif
+
static int guc_wait_ucode(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_uncore *uncore = gt->uncore;
+ ktime_t before, after, delta;
+ bool success;
u32 status;
- int ret;
+ int ret, count;
+ u64 delta_ms;
+ u32 before_freq;
/*
* Wait for the GuC to start up.
- * NB: Docs recommend not using the interrupt for completion.
+ *
* Measurements indicate this should take no more than 20ms
* (assuming the GT clock is at maximum frequency). So, a
* timeout here indicates that the GuC has failed and is unusable.
@@ -126,29 +188,95 @@ static int guc_wait_ucode(struct intel_guc *guc)
* issues to be resolved. In the meantime bump the timeout to
* 200ms. Even at slowest clock, this should be sufficient. And
* in the working case, a larger timeout makes no difference.
+ *
+ * IFWI updates have also been seen to cause sporadic failures due to
+ * the requested frequency not being granted and thus the firmware
+ * load is attempted at minimum frequency. That can lead to load times
+ * in the seconds range. However, there is a limit on how long an
+ * individual wait_for() can wait. So wrap it in a loop.
*/
- ret = wait_for(guc_ready(uncore, &status), 200);
- if (ret) {
- guc_info(guc, "load failed: status = 0x%08X\n", status);
- guc_info(guc, "load failed: status: Reset = %d, "
- "BootROM = 0x%02X, UKernel = 0x%02X, "
- "MIA = 0x%02X, Auth = 0x%02X\n",
- REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ before_freq = intel_rps_read_actual_frequency(&gt->rps);
+ before = ktime_get();
+ for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) {
+ ret = wait_for(guc_load_done(uncore, &status, &success), 1000);
+ if (!ret || !success)
+ break;
+
+ guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz, status = 0x%08X [0x%02X/%02X]\n",
+ count, intel_rps_read_actual_frequency(&gt->rps), status,
REG_FIELD_GET(GS_BOOTROM_MASK, status),
- REG_FIELD_GET(GS_UKERNEL_MASK, status),
- REG_FIELD_GET(GS_MIA_MASK, status),
- REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+ REG_FIELD_GET(GS_UKERNEL_MASK, status));
+ }
+ after = ktime_get();
+ delta = ktime_sub(after, before);
+ delta_ms = ktime_to_ms(delta);
+ if (ret || !success) {
+ u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
+ u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+
+ guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n",
+ status, delta_ms, intel_rps_read_actual_frequency(&gt->rps), ret);
+ guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ bootrom, ukernel,
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+ switch (bootrom) {
+ case INTEL_BOOTROM_STATUS_NO_KEY_FOUND:
+ guc_info(guc, "invalid key requested, header = 0x%08X\n",
+ intel_uncore_read(uncore, GUC_HEADER_INFO));
+ ret = -ENOEXEC;
+ break;
- if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+ case INTEL_BOOTROM_STATUS_RSA_FAILED:
guc_info(guc, "firmware signature verification failed\n");
ret = -ENOEXEC;
+ break;
+
+ case INTEL_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
+ guc_info(guc, "firmware production part check failure\n");
+ ret = -ENOEXEC;
+ break;
}
- if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) {
+ switch (ukernel) {
+ case INTEL_GUC_LOAD_STATUS_EXCEPTION:
guc_info(guc, "firmware exception. EIP: %#x\n",
intel_uncore_read(uncore, SOFT_SCRATCH(13)));
ret = -ENXIO;
+ break;
+
+ case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ guc_info(guc, "illegal register in save/restore workaround list\n");
+ ret = -EPERM;
+ break;
+
+ case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
+ guc_info(guc, "invalid w/a KLV entry\n");
+ ret = -EINVAL;
+ break;
+
+ case INTEL_GUC_LOAD_STATUS_HWCONFIG_START:
+ guc_info(guc, "still extracting hwconfig table.\n");
+ ret = -ETIMEDOUT;
+ break;
}
+
+ /* Uncommon/unexpected error, see earlier status code print for details */
+ if (ret == 0)
+ ret = -ENXIO;
+ } else if (delta_ms > 200) {
+ guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
+ delta_ms, status, count, ret);
+ guc_warn(guc, "excessive init time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n",
+ before_freq, intel_rps_read_actual_frequency(&gt->rps),
+ intel_rps_get_requested_frequency(&gt->rps),
+ intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
+ } else {
+ guc_dbg(guc, "init took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n",
+ delta_ms, before_freq, intel_rps_read_actual_frequency(&gt->rps),
+ intel_rps_get_requested_frequency(&gt->rps), status, count, ret);
}
return ret;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 4ae5fc2f6002..eded00f0c7e1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -22,6 +22,7 @@
/* Payload length only i.e. don't include G2H header length */
#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 2
#define G2H_LEN_DW_DEREGISTER_CONTEXT 1
+#define G2H_LEN_DW_INVALIDATE_TLB 1
#define GUC_CONTEXT_DISABLE 0
#define GUC_CONTEXT_ENABLE 1
@@ -35,13 +36,6 @@
#define GUC_MAX_CONTEXT_ID 65535
#define GUC_INVALID_CONTEXT_ID GUC_MAX_CONTEXT_ID
-#define GUC_RENDER_ENGINE 0
-#define GUC_VIDEO_ENGINE 1
-#define GUC_BLITTER_ENGINE 2
-#define GUC_VIDEOENHANCE_ENGINE 3
-#define GUC_VIDEO_ENGINE2 4
-#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1)
-
#define GUC_RENDER_CLASS 0
#define GUC_VIDEO_CLASS 1
#define GUC_VIDEOENHANCE_CLASS 2
@@ -102,13 +96,16 @@
#define GUC_WA_GAM_CREDITS BIT(10)
#define GUC_WA_DUAL_QUEUE BIT(11)
#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13)
-#define GUC_WA_CONTEXT_ISOLATION BIT(15)
#define GUC_WA_PRE_PARSER BIT(14)
+#define GUC_WA_CONTEXT_ISOLATION BIT(15)
+#define GUC_WA_RCS_CCS_SWITCHOUT BIT(16)
#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17)
#define GUC_WA_POLLCS BIT(18)
#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21)
+#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22)
#define GUC_CTL_FEATURE 2
+#define GUC_CTL_ENABLE_GUC_PXP_CTL BIT(1)
#define GUC_CTL_ENABLE_SLPC BIT(2)
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
@@ -299,7 +296,7 @@ struct guc_update_scheduling_policy_header {
} __packed;
/*
- * Can't dynmically allocate memory for the scheduling policy KLV because
+ * Can't dynamically allocate memory for the scheduling policy KLV because
* it will be sent from within the reset path. Need a fixed size lump on
* the stack instead :(.
*
@@ -411,6 +408,15 @@ enum guc_capture_type {
GUC_CAPTURE_LIST_TYPE_MAX,
};
+/* Class indices for capture_class and capture_instance arrays */
+enum {
+ GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
+ GUC_CAPTURE_LIST_CLASS_VIDEO = 1,
+ GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE = 2,
+ GUC_CAPTURE_LIST_CLASS_BLITTER = 3,
+ GUC_CAPTURE_LIST_CLASS_GSC_OTHER = 4,
+};
+
/* GuC Additional Data Struct */
struct guc_ads {
struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
@@ -426,7 +432,10 @@ struct guc_ads {
u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
- u32 reserved[14];
+ u32 wa_klv_addr_lo;
+ u32 wa_klv_addr_hi;
+ u32 wa_klv_size;
+ u32 reserved[11];
} __packed;
/* Engine usage stats */
@@ -451,7 +460,7 @@ enum guc_log_buffer_type {
GUC_MAX_LOG_BUFFER
};
-/**
+/*
* struct guc_log_buffer_state - GuC log buffer state
*
* Below state structure is used for coordination of retrieval of GuC firmware
@@ -490,32 +499,6 @@ struct guc_log_buffer_state {
u32 version;
} __packed;
-struct guc_ctx_report {
- u32 report_return_status;
- u32 reserved1[64];
- u32 affected_count;
- u32 reserved2[2];
-} __packed;
-
-/* GuC Shared Context Data Struct */
-struct guc_shared_ctx_data {
- u32 addr_of_last_preempted_data_low;
- u32 addr_of_last_preempted_data_high;
- u32 addr_of_last_preempted_data_high_tmp;
- u32 padding;
- u32 is_mapped_to_proxy;
- u32 proxy_ctx_id;
- u32 engine_reset_ctx_id;
- u32 media_reset_count;
- u32 reserved1[8];
- u32 uk_last_ctx_switch_reason;
- u32 was_reset;
- u32 lrca_gpu_addr;
- u64 execlist_ctx;
- u32 reserved2[66];
- struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM];
-} __packed;
-
/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
enum intel_guc_recv_message {
INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
index 4781fccc2687..868195c33f5b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -7,6 +7,7 @@
#include "gt/intel_hwconfig.h"
#include "i915_drv.h"
#include "i915_memcpy.h"
+#include "intel_guc_print.h"
/*
* GuC has a blob containing hardware configuration information (HWConfig).
@@ -42,6 +43,8 @@ static int __guc_action_get_hwconfig(struct intel_guc *guc,
};
int ret;
+ guc_dbg(guc, "Querying HW config table: size = %d, offset = 0x%08X\n",
+ ggtt_size, ggtt_offset);
ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
if (ret == -ENXIO)
return -ENOENT;
@@ -94,7 +97,7 @@ static int guc_hwconfig_fill_buffer(struct intel_guc *guc, struct intel_hwconfig
static bool has_table(struct drm_i915_private *i915)
{
- if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915))
+ if (IS_ALDERLAKE_P(i915) && !IS_ALDERLAKE_P_N(i915))
return true;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
return true;
@@ -102,7 +105,7 @@ static bool has_table(struct drm_i915_private *i915)
return false;
}
-/**
+/*
* intel_guc_hwconfig_init - Initialize the HWConfig
*
* Retrieve the HWConfig table from the GuC and save it locally.
@@ -111,7 +114,7 @@ static bool has_table(struct drm_i915_private *i915)
static int guc_hwconfig_init(struct intel_gt *gt)
{
struct intel_hwconfig *hwconfig = &gt->info.hwconfig;
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
int ret;
if (!has_table(gt->i915))
@@ -136,7 +139,7 @@ static int guc_hwconfig_init(struct intel_gt *gt)
return 0;
}
-/**
+/*
* intel_gt_init_hwconfig - Initialize the HWConfig if available
*
* Retrieve the HWConfig table if available on the current platform.
@@ -149,7 +152,7 @@ int intel_gt_init_hwconfig(struct intel_gt *gt)
return guc_hwconfig_init(gt);
}
-/**
+/*
* intel_gt_fini_hwconfig - Finalize the HWConfig
*
* Free up the memory allocation holding the table.
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index c3792ddeec80..cdff48920ee6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -6,6 +6,8 @@
#include <linux/debugfs.h>
#include <linux/string_helpers.h>
+#include <drm/drm_managed.h>
+
#include "gt/intel_gt.h"
#include "i915_drv.h"
#include "i915_irq.h"
@@ -14,11 +16,11 @@
#include "intel_guc_log.h"
#include "intel_guc_print.h"
-#if defined(CONFIG_DRM_I915_DEBUG_GUC)
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
-#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
+#elif IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
@@ -220,8 +222,7 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable,
*/
static int subbuf_start_callback(struct rchan_buf *buf,
void *subbuf,
- void *prev_subbuf,
- size_t prev_padding)
+ void *prev_subbuf)
{
/*
* Use no-overwrite mode by default, where relay will stop accepting
@@ -333,8 +334,7 @@ bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log,
log->stats[type].sampled_overflow += 16;
}
- dev_notice_ratelimited(guc_to_gt(log_to_guc(log))->i915->drm.dev,
- "GuC log buffer overflow\n");
+ guc_notice_ratelimited(log_to_guc(log), "log buffer overflow\n");
}
return overflow;
@@ -513,7 +513,11 @@ static void guc_log_relay_unmap(struct intel_guc_log *log)
void intel_guc_log_init_early(struct intel_guc_log *log)
{
- mutex_init(&log->relay.lock);
+ struct intel_guc *guc = log_to_guc(log);
+ struct drm_i915_private *i915 = guc_to_i915(guc);
+
+ drmm_mutex_init(&i915->drm, &log->relay.lock);
+ drmm_mutex_init(&i915->drm, &log->guc_lock);
INIT_WORK(&log->relay.flush_work, copy_debug_logs_work);
log->relay.started = false;
}
@@ -521,7 +525,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log)
static int guc_log_relay_create(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
struct rchan *guc_log_relay_chan;
size_t n_subbufs, subbuf_size;
int ret;
@@ -543,10 +547,13 @@ static int guc_log_relay_create(struct intel_guc_log *log)
*/
n_subbufs = 8;
+ if (!guc->dbgfs_node)
+ return -ENOENT;
+
guc_log_relay_chan = relay_open("guc_log",
- dev_priv->drm.primary->debugfs_root,
+ guc->dbgfs_node,
subbuf_size, n_subbufs,
- &relay_callbacks, dev_priv);
+ &relay_callbacks, i915);
if (!guc_log_relay_chan) {
guc_err(guc, "Couldn't create relay channel for logging\n");
@@ -571,7 +578,7 @@ static void guc_log_relay_destroy(struct intel_guc_log *log)
static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
intel_wakeref_t wakeref;
_guc_log_copy_debuglogs_for_relay(log);
@@ -580,14 +587,14 @@ static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
* Generally device is expected to be active only at this
* time, so get/put should be really quick.
*/
- with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
guc_action_flush_log_complete(guc);
}
static u32 __get_default_log_level(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
/* A negative value means "use platform/config default" */
if (i915->params.guc_log_level < 0) {
@@ -662,7 +669,7 @@ void intel_guc_log_destroy(struct intel_guc_log *log)
int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
intel_wakeref_t wakeref;
int ret = 0;
@@ -676,12 +683,12 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
if (level < GUC_LOG_LEVEL_DISABLED || level > GUC_LOG_LEVEL_MAX)
return -EINVAL;
- mutex_lock(&dev_priv->drm.struct_mutex);
+ mutex_lock(&log->guc_lock);
if (log->level == level)
goto out_unlock;
- with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
ret = guc_action_control_log(guc,
GUC_LOG_LEVEL_IS_VERBOSE(level),
GUC_LOG_LEVEL_IS_ENABLED(level),
@@ -694,7 +701,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
log->level = level;
out_unlock:
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ mutex_unlock(&log->guc_lock);
return ret;
}
@@ -794,7 +801,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
static void guc_log_relay_stop(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
if (!log->relay.started)
return;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index 02127703be80..13cb93ad0710 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -42,6 +42,14 @@ enum {
struct intel_guc_log {
u32 level;
+ /*
+ * Protects concurrent access and modification of intel_guc_log->level.
+ *
+ * This lock replaces the legacy struct_mutex usage in
+ * intel_guc_log system.
+ */
+ struct mutex guc_lock;
+
/* Allocation settings */
struct {
s32 bytes; /* Size in bytes */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_print.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_print.h
index e75989d4ba06..2465d05638b4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_print.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_print.h
@@ -30,6 +30,9 @@
#define guc_err_ratelimited(_guc, _fmt, ...) \
guc_printk((_guc), err_ratelimited, _fmt, ##__VA_ARGS__)
+#define guc_notice_ratelimited(_guc, _fmt, ...) \
+ guc_printk((_guc), notice_ratelimited, _fmt, ##__VA_ARGS__)
+
#define guc_probe_error(_guc, _fmt, ...) \
guc_printk((_guc), probe_error, _fmt, ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
index b5855091cf6a..9df7927304ae 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
@@ -6,25 +6,15 @@
#include <linux/string_helpers.h>
#include "intel_guc_rc.h"
+#include "intel_guc_print.h"
#include "gt/intel_gt.h"
#include "i915_drv.h"
static bool __guc_rc_supported(struct intel_guc *guc)
{
- struct intel_gt *gt = guc_to_gt(guc);
-
- /*
- * Wa_14017073508: mtl
- * Do not enable gucrc to avoid additional interrupts which
- * may disrupt pcode wa.
- */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
- gt->type == GT_MEDIA)
- return false;
-
/* GuC RC is unavailable for pre-Gen12 */
return guc->submission_supported &&
- GRAPHICS_VER(gt->i915) >= 12;
+ GRAPHICS_VER(guc_to_i915(guc)) >= 12;
}
static bool __guc_rc_selected(struct intel_guc *guc)
@@ -70,13 +60,12 @@ static int __guc_rc_control(struct intel_guc *guc, bool enable)
ret = guc_action_control_gucrc(guc, enable);
if (ret) {
- i915_probe_error(guc_to_gt(guc)->i915, "Failed to %s GuC RC (%pe)\n",
- str_enable_disable(enable), ERR_PTR(ret));
+ guc_probe_error(guc, "Failed to %s RC (%pe)\n",
+ str_enable_disable(enable), ERR_PTR(ret));
return ret;
}
- drm_info(&gt->i915->drm, "GuC RC: %s\n",
- str_enabled_disabled(enable));
+ guc_info(guc, "RC %s\n", str_enabled_disabled(enable));
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index 9915de32e894..f73dab527547 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -18,8 +18,6 @@
#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT)
#define GS_BOOTROM_SHIFT 1
#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT)
-#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT)
-#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT)
#define GS_UKERNEL_SHIFT 8
#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT)
#define GS_MIA_SHIFT 16
@@ -32,6 +30,8 @@
#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT)
#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT)
+#define GUC_HEADER_INFO _MMIO(0xc014)
+
#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4)
#define SOFT_SCRATCH_COUNT 16
@@ -96,6 +96,7 @@
#define GUC_GEN10_SHIM_WC_ENABLE (1<<21)
#define GUC_SHIM_CONTROL2 _MMIO(0xc068)
+#define GUC_ENABLE_DEBUG_REG (1<<11)
#define GUC_IS_PRIVILEGED (1<<29)
#define GSC_LOADS_HUC (1<<30)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 63464933cbce..fa9af08f9708 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -3,16 +3,48 @@
* Copyright © 2021 Intel Corporation
*/
-#include <drm/drm_cache.h>
#include <linux/string_helpers.h>
+#include <drm/drm_cache.h>
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_rps.h"
+
#include "i915_drv.h"
#include "i915_reg.h"
+#include "i915_wait_util.h"
+#include "intel_guc_print.h"
#include "intel_guc_slpc.h"
#include "intel_mchbar_regs.h"
-#include "gt/intel_gt.h"
-#include "gt/intel_gt_regs.h"
-#include "gt/intel_rps.h"
+
+/**
+ * DOC: SLPC - Dynamic Frequency management
+ *
+ * Single Loop Power Control (SLPC) is a GuC algorithm that manages
+ * GT frequency based on busyness and how KMD initializes it. SLPC is
+ * almost completely in control after initialization except for a few
+ * scenarios mentioned below.
+ *
+ * KMD uses the concept of waitboost to ramp frequency to RP0 when there
+ * are pending submissions for a context. It achieves this by sending GuC a
+ * request to update the min frequency to RP0. Waitboost is disabled
+ * when the request retires.
+ *
+ * Another form of frequency control happens through per-context hints.
+ * A context can be marked as low latency during creation. That will ensure
+ * that SLPC uses an aggressive frequency ramp when that context is active.
+ *
+ * Power profiles add another level of control to these mechanisms.
+ * When power saving profile is chosen, SLPC will use conservative
+ * thresholds to ramp frequency, thus saving power. KMD will disable
+ * waitboosts as well, which achieves further power savings. Base profile
+ * is default and ensures balanced performance for any workload.
+ *
+ * Lastly, users have some level of control through sysfs, where min/max
+ * frequency values can be altered and the use of efficient freq
+ * can be toggled.
+ */
static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc)
{
@@ -33,7 +65,7 @@ static bool __detect_slpc_supported(struct intel_guc *guc)
{
/* GuC SLPC is unavailable for pre-Gen12 */
return guc->submission_supported &&
- GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+ GRAPHICS_VER(guc_to_i915(guc)) >= 12;
}
static bool __guc_slpc_selected(struct intel_guc *guc)
@@ -137,17 +169,6 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
return ret > 0 ? -EPROTO : ret;
}
-static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
-{
- u32 request[] = {
- GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
- SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
- id,
- };
-
- return intel_guc_send(guc, request, ARRAY_SIZE(request));
-}
-
static bool slpc_is_running(struct intel_guc_slpc *slpc)
{
return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING;
@@ -171,14 +192,12 @@ static int guc_action_slpc_query(struct intel_guc *guc, u32 offset)
static int slpc_query_task_state(struct intel_guc_slpc *slpc)
{
struct intel_guc *guc = slpc_to_guc(slpc);
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
u32 offset = intel_guc_ggtt_offset(guc, slpc->vma);
int ret;
ret = guc_action_slpc_query(guc, offset);
if (unlikely(ret))
- i915_probe_error(i915, "Failed to query task state (%pe)\n",
- ERR_PTR(ret));
+ guc_probe_error(guc, "Failed to query task state: %pe\n", ERR_PTR(ret));
drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES);
@@ -188,32 +207,22 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc)
static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)
{
struct intel_guc *guc = slpc_to_guc(slpc);
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
int ret;
GEM_BUG_ON(id >= SLPC_MAX_PARAM);
ret = guc_action_slpc_set_param(guc, id, value);
if (ret)
- i915_probe_error(i915, "Failed to set param %d to %u (%pe)\n",
- id, value, ERR_PTR(ret));
+ guc_probe_error(guc, "Failed to set param %d to %u: %pe\n",
+ id, value, ERR_PTR(ret));
return ret;
}
-static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id)
-{
- struct intel_guc *guc = slpc_to_guc(slpc);
-
- GEM_BUG_ON(id >= SLPC_MAX_PARAM);
-
- return guc_action_slpc_unset_param(guc, id);
-}
-
static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
{
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
struct intel_guc *guc = slpc_to_guc(slpc);
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
intel_wakeref_t wakeref;
int ret = 0;
@@ -236,9 +245,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
freq);
if (ret)
- drm_notice(&i915->drm,
- "Failed to send set_param for min freq(%d): (%d)\n",
- freq, ret);
+ guc_notice(guc, "Failed to send set_param for min freq(%d): %pe\n",
+ freq, ERR_PTR(ret));
}
return ret;
@@ -267,7 +275,6 @@ static void slpc_boost_work(struct work_struct *work)
int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
{
struct intel_guc *guc = slpc_to_guc(slpc);
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
int err;
@@ -275,14 +282,13 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
if (unlikely(err)) {
- i915_probe_error(i915,
- "Failed to allocate SLPC struct (err=%pe)\n",
- ERR_PTR(err));
+ guc_probe_error(guc, "Failed to allocate SLPC struct: %pe\n", ERR_PTR(err));
return err;
}
slpc->max_freq_softlimit = 0;
slpc->min_freq_softlimit = 0;
+ slpc->ignore_eff_freq = false;
slpc->min_is_rpmax = false;
slpc->boost_freq = 0;
@@ -290,6 +296,8 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
slpc->num_boosts = 0;
slpc->media_ratio_mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL;
+ slpc->power_profile = SLPC_POWER_PROFILES_BASE;
+
mutex_init(&slpc->lock);
INIT_WORK(&slpc->boost_work, slpc_boost_work);
@@ -338,7 +346,6 @@ static int guc_action_slpc_reset(struct intel_guc *guc, u32 offset)
static int slpc_reset(struct intel_guc_slpc *slpc)
{
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
struct intel_guc *guc = slpc_to_guc(slpc);
u32 offset = intel_guc_ggtt_offset(guc, slpc->vma);
int ret;
@@ -346,15 +353,14 @@ static int slpc_reset(struct intel_guc_slpc *slpc)
ret = guc_action_slpc_reset(guc, offset);
if (unlikely(ret < 0)) {
- i915_probe_error(i915, "SLPC reset action failed (%pe)\n",
- ERR_PTR(ret));
+ guc_probe_error(guc, "SLPC reset action failed: %pe\n", ERR_PTR(ret));
return ret;
}
if (!ret) {
if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) {
- i915_probe_error(i915, "SLPC not enabled! State = %s\n",
- slpc_get_state_string(slpc));
+ guc_probe_error(guc, "SLPC not enabled! State = %s\n",
+ slpc_get_state_string(slpc));
return -EIO;
}
}
@@ -384,21 +390,29 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc)
GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER);
}
-static void slpc_shared_data_reset(struct slpc_shared_data *data)
+static void slpc_shared_data_reset(struct intel_guc_slpc *slpc)
{
- memset(data, 0, sizeof(struct slpc_shared_data));
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ struct slpc_shared_data *data = slpc->vaddr;
+ memset(data, 0, sizeof(struct slpc_shared_data));
data->header.size = sizeof(struct slpc_shared_data);
/* Enable only GTPERF task, disable others */
slpc_mem_set_enabled(data, SLPC_PARAM_TASK_ENABLE_GTPERF,
SLPC_PARAM_TASK_DISABLE_GTPERF);
- slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER,
- SLPC_PARAM_TASK_DISABLE_BALANCER);
+ /*
+ * Don't allow balancer related algorithms on platforms before
+ * Xe_LPG, where GuC started to restrict it to TDP limited scenarios.
+ */
+ if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)) {
+ slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER,
+ SLPC_PARAM_TASK_DISABLE_BALANCER);
- slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC,
- SLPC_PARAM_TASK_DISABLE_DCC);
+ slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC,
+ SLPC_PARAM_TASK_DISABLE_DCC);
+ }
}
/**
@@ -465,6 +479,36 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val)
return ret;
}
+int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val)
+{
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ intel_wakeref_t wakeref;
+ int ret;
+
+ mutex_lock(&slpc->lock);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+ val);
+ if (ret) {
+ guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n",
+ val, ERR_PTR(ret));
+ } else {
+ slpc->ignore_eff_freq = val;
+
+ /* Set min to RPn when we disable efficient freq */
+ if (val)
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+ slpc->min_freq);
+ }
+
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ mutex_unlock(&slpc->lock);
+ return ret;
+}
+
/**
* intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC.
* @slpc: pointer to intel_guc_slpc.
@@ -490,16 +534,6 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
mutex_lock(&slpc->lock);
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
- /* Ignore efficient freq if lower min freq is requested */
- ret = slpc_set_param(slpc,
- SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
- val < slpc->rp1_freq);
- if (ret) {
- i915_probe_error(i915, "Failed to toggle efficient freq (%pe)\n",
- ERR_PTR(ret));
- goto out;
- }
-
ret = slpc_set_param(slpc,
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
val);
@@ -507,7 +541,6 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
if (!ret)
slpc->min_freq_softlimit = val;
-out:
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
mutex_unlock(&slpc->lock);
@@ -545,6 +578,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val)
return ret;
}
+int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val)
+{
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ intel_wakeref_t wakeref;
+ int ret = 0;
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_STRATEGIES,
+ val);
+
+ return ret;
+}
+
int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
@@ -561,6 +608,34 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val)
return ret;
}
+int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val)
+{
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ intel_wakeref_t wakeref;
+ int ret = 0;
+
+ if (val > SLPC_POWER_PROFILES_POWER_SAVING)
+ return -EINVAL;
+
+ mutex_lock(&slpc->lock);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_POWER_PROFILE,
+ val);
+ if (ret)
+ guc_err(slpc_to_guc(slpc),
+ "Failed to set power profile to %d: %pe\n",
+ val, ERR_PTR(ret));
+ else
+ slpc->power_profile = val;
+
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ mutex_unlock(&slpc->lock);
+
+ return ret;
+}
+
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt)
{
u32 pm_intrmsk_mbz = 0;
@@ -597,11 +672,10 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return ret;
if (!slpc->min_freq_softlimit) {
- ret = intel_guc_slpc_get_min_freq(slpc, &slpc->min_freq_softlimit);
- if (unlikely(ret))
- return ret;
+ /* Min softlimit is initialized to RPn */
+ slpc->min_freq_softlimit = slpc->min_freq;
slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit;
- } else if (slpc->min_freq_softlimit != slpc->min_freq) {
+ } else {
return intel_guc_slpc_set_min_freq(slpc,
slpc->min_freq_softlimit);
}
@@ -611,15 +685,12 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc)
{
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
int slpc_min_freq;
int ret;
ret = intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq);
if (ret) {
- drm_err(&i915->drm,
- "Failed to get min freq: (%d)\n",
- ret);
+ guc_err(slpc_to_guc(slpc), "Failed to get min freq: %pe\n", ERR_PTR(ret));
return false;
}
@@ -664,52 +735,6 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
slpc->boost_freq = slpc->rp0_freq;
}
-/**
- * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode
- * @slpc: pointer to intel_guc_slpc.
- * @mode: new value of the mode.
- *
- * This function will override the GUCRC mode.
- *
- * Return: 0 on success, non-zero error code on failure.
- */
-int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode)
-{
- int ret;
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
- intel_wakeref_t wakeref;
-
- if (mode >= SLPC_GUCRC_MODE_MAX)
- return -EINVAL;
-
- with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
- if (ret)
- drm_err(&i915->drm,
- "Override gucrc mode %d failed %d\n",
- mode, ret);
- }
-
- return ret;
-}
-
-int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc)
-{
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
- intel_wakeref_t wakeref;
- int ret = 0;
-
- with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE);
- if (ret)
- drm_err(&i915->drm,
- "Unsetting gucrc mode failed %d\n",
- ret);
- }
-
- return ret;
-}
-
/*
* intel_guc_slpc_enable() - Start SLPC
* @slpc: pointer to intel_guc_slpc.
@@ -725,17 +750,16 @@ int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc)
*/
int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
{
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ struct intel_guc *guc = slpc_to_guc(slpc);
int ret;
GEM_BUG_ON(!slpc->vma);
- slpc_shared_data_reset(slpc->vaddr);
+ slpc_shared_data_reset(slpc);
ret = slpc_reset(slpc);
if (unlikely(ret < 0)) {
- i915_probe_error(i915, "SLPC Reset event returned (%pe)\n",
- ERR_PTR(ret));
+ guc_probe_error(guc, "SLPC Reset event returned: %pe\n", ERR_PTR(ret));
return ret;
}
@@ -743,7 +767,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
if (unlikely(ret < 0))
return ret;
- intel_guc_pm_intrmsk_enable(to_gt(i915));
+ intel_guc_pm_intrmsk_enable(slpc_to_gt(slpc));
slpc_get_rp_values(slpc);
@@ -753,22 +777,33 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Set SLPC max limit to RP0 */
ret = slpc_use_fused_rp0(slpc);
if (unlikely(ret)) {
- i915_probe_error(i915, "Failed to set SLPC max to RP0 (%pe)\n",
- ERR_PTR(ret));
+ guc_probe_error(guc, "Failed to set SLPC max to RP0: %pe\n", ERR_PTR(ret));
return ret;
}
+ /* Set cached value of ignore efficient freq */
+ intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
+
/* Revert SLPC min/max to softlimits if necessary */
ret = slpc_set_softlimits(slpc);
if (unlikely(ret)) {
- i915_probe_error(i915, "Failed to set SLPC softlimits (%pe)\n",
- ERR_PTR(ret));
+ guc_probe_error(guc, "Failed to set SLPC softlimits: %pe\n", ERR_PTR(ret));
return ret;
}
/* Set cached media freq ratio mode */
intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
+ /* Enable SLPC Optimized Strategy for compute */
+ intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
+
+ /* Set cached value of power_profile */
+ ret = intel_guc_slpc_set_power_profile(slpc, slpc->power_profile);
+ if (unlikely(ret)) {
+ guc_probe_error(guc, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret));
+ return ret;
+ }
+
return 0;
}
@@ -832,12 +867,31 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p
drm_printf(p, "\tSLPC state: %s\n", slpc_get_state_string(slpc));
drm_printf(p, "\tGTPERF task active: %s\n",
str_yes_no(slpc_tasks->status & SLPC_GTPERF_TASK_ENABLED));
+ drm_printf(p, "\tDCC enabled: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_DCC_TASK_ENABLED));
+ drm_printf(p, "\tDCC in: %s\n",
+ str_yes_no(slpc_tasks->status & SLPC_IN_DCC));
+ drm_printf(p, "\tBalancer enabled: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_BALANCER_ENABLED));
+ drm_printf(p, "\tIBC enabled: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_IBC_TASK_ENABLED));
+ drm_printf(p, "\tBalancer IA LMT enabled: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_BALANCER_IA_LMT_ENABLED));
+ drm_printf(p, "\tBalancer IA LMT active: %s\n",
+ str_yes_no(slpc_tasks->status &
+ SLPC_BALANCER_IA_LMT_ACTIVE));
drm_printf(p, "\tMax freq: %u MHz\n",
slpc_decode_max_freq(slpc));
drm_printf(p, "\tMin freq: %u MHz\n",
slpc_decode_min_freq(slpc));
drm_printf(p, "\twaitboosts: %u\n",
slpc->num_boosts);
+ drm_printf(p, "\tBoosts outstanding: %u\n",
+ atomic_read(&slpc->num_waiters));
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 17ed515f6a85..fc9f761b4372 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -44,7 +44,8 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val);
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
-int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc);
-int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode);
+int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val);
+int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val);
+int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
index a6ef53b04e04..83673b10ac4e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
@@ -31,6 +31,10 @@ struct intel_guc_slpc {
/* frequency softlimits */
u32 min_freq_softlimit;
u32 max_freq_softlimit;
+ bool ignore_eff_freq;
+
+ /* Base or power saving */
+ u32 power_profile;
/* cached media ratio mode */
u32 media_ratio_mode;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 53f3ed3244d5..68f2b8d363ac 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -25,15 +25,16 @@
#include "gt/intel_mocs.h"
#include "gt/intel_ring.h"
+#include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
+#include "i915_trace.h"
+#include "i915_wait_util.h"
#include "intel_guc_ads.h"
#include "intel_guc_capture.h"
#include "intel_guc_print.h"
#include "intel_guc_submission.h"
-#include "i915_drv.h"
-#include "i915_reg.h"
-#include "i915_trace.h"
-
/**
* DOC: GuC-based command submission
*
@@ -235,6 +236,13 @@ set_context_destroyed(struct intel_context *ce)
ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
}
+static inline void
+clr_context_destroyed(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED;
+}
+
static inline bool context_pending_disable(struct intel_context *ce)
{
return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
@@ -390,7 +398,7 @@ static inline void set_context_guc_id_invalid(struct intel_context *ce)
static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
{
- return &ce->engine->gt->uc.guc;
+ return gt_to_guc(ce->engine->gt);
}
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@@ -612,6 +620,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
u32 g2h_len_dw,
bool loop)
{
+ int ret;
+
/*
* We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
* so we don't handle the case where we don't get a reply because we
@@ -622,7 +632,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
if (g2h_len_dw)
atomic_inc(&guc->outstanding_submission_g2h);
- return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ if (ret && g2h_len_dw)
+ atomic_dec(&guc->outstanding_submission_g2h);
+
+ return ret;
}
int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
@@ -1106,7 +1120,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
if (deregister)
guc_signal_context_fence(ce);
if (destroyed) {
- intel_gt_pm_put_async(guc_to_gt(guc));
+ intel_gt_pm_put_async_untracked(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
@@ -1209,7 +1223,7 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
* determine validity of these values. Instead we read the values multiple times
* until they are consistent. In test runs, 3 attempts results in consistent
* values. The upper bound is set to 6 attempts and may need to be tuned as per
- * any new occurences.
+ * any new occurrences.
*/
static void __get_engine_usage_record(struct intel_engine_cs *engine,
u32 *last_in, u32 *id, u32 *total)
@@ -1229,10 +1243,25 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine,
} while (++i < 6);
}
+static void __set_engine_usage_record(struct intel_engine_cs *engine,
+ u32 last_in, u32 id, u32 total)
+{
+ struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
+
+#define record_write(map_, field_, val_) \
+ iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_)
+
+ record_write(&rec_map, last_switch_in_stamp, last_in);
+ record_write(&rec_map, current_context_index, id);
+ record_write(&rec_map, total_runtime, total);
+
+#undef record_write
+}
+
static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
{
struct intel_engine_guc_stats *stats = &engine->stats.guc;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u32 last_switch, ctx_id, total;
lockdep_assert_held(&guc->timestamp.lock);
@@ -1256,15 +1285,12 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
static u32 gpm_timestamp_shift(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
- u32 reg, shift;
+ u32 reg;
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
- shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
- GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
-
- return 3 - shift;
+ return 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
}
static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
@@ -1297,11 +1323,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
struct intel_gt *gt = engine->gt;
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
u64 total, gt_stamp_saved;
unsigned long flags;
u32 reset_count;
bool in_reset;
+ intel_wakeref_t wakeref;
spin_lock_irqsave(&guc->timestamp.lock, flags);
@@ -1324,7 +1351,8 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
* start_gt_clk is derived from GuC state. To get a consistent
* view of activity, we query the GuC state only if gt is awake.
*/
- if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
+ wakeref = in_reset ? NULL : intel_gt_pm_get_if_awake(gt);
+ if (wakeref) {
stats_saved = *stats;
gt_stamp_saved = guc->timestamp.gt_stamp;
/*
@@ -1333,7 +1361,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
*/
guc_update_engine_gt_clks(engine);
guc_update_pm_timestamp(guc, now);
- intel_gt_pm_put_async(gt);
+ intel_gt_pm_put_async(gt, wakeref);
if (i915_reset_count(gpu_error) != reset_count) {
*stats = stats_saved;
guc->timestamp.gt_stamp = gt_stamp_saved;
@@ -1347,9 +1375,63 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
total += intel_gt_clock_interval_to_ns(gt, clk);
}
+ if (total > stats->total)
+ stats->total = total;
+
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
- return ns_to_ktime(total);
+ return ns_to_ktime(stats->total);
+}
+
+static void guc_enable_busyness_worker(struct intel_guc *guc)
+{
+ mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay);
+}
+
+static void guc_cancel_busyness_worker(struct intel_guc *guc)
+{
+ /*
+ * There are many different call stacks that can get here. Some of them
+ * hold the reset mutex. The busyness worker also attempts to acquire the
+ * reset mutex. Synchronously flushing a worker thread requires acquiring
+ * the worker mutex. Lockdep sees this as a conflict. It thinks that the
+ * flush can deadlock because it holds the worker mutex while waiting for
+ * the reset mutex, but another thread is holding the reset mutex and might
+ * attempt to use other worker functions.
+ *
+ * In practice, this scenario does not exist because the busyness worker
+ * does not block waiting for the reset mutex. It does a try-lock on it and
+ * immediately exits if the lock is already held. Unfortunately, the mutex
+ * in question (I915_RESET_BACKOFF) is an i915 implementation which has lockdep
+ * annotation but not to the extent of explaining the 'might lock' is also a
+ * 'does not need to lock'. So one option would be to add more complex lockdep
+ * annotations to ignore the issue (if at all possible). A simpler option is to
+ * just not flush synchronously when a rest in progress. Given that the worker
+ * will just early exit and re-schedule itself anyway, there is no advantage
+ * to running it immediately.
+ *
+ * If a reset is not in progress, then the synchronous flush may be required.
+ * As noted many call stacks lead here, some during suspend and driver unload
+ * which do require a synchronous flush to make sure the worker is stopped
+ * before memory is freed.
+ *
+ * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
+ * every possible call stack is unfeasible. It would be too intrusive to many
+ * areas that really don't care about the GuC backend. However, there is the
+ * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked.
+ * So just use those. Note that testing both is required due to the hideously
+ * complex nature of the i915 driver's reset code paths.
+ *
+ * And note that in the case of a reset occurring during driver unload
+ * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the
+ * reset flag/mutex are set) is fine because there is another explicit cancel in
+ * intel_guc_submission_fini (when the reset flag/mutex are not).
+ */
+ if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) ||
+ test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags))
+ cancel_delayed_work(&guc->timestamp.work);
+ else
+ cancel_delayed_work_sync(&guc->timestamp.work);
}
static void __reset_guc_busyness_stats(struct intel_guc *guc)
@@ -1360,19 +1442,43 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
unsigned long flags;
ktime_t unused;
- cancel_delayed_work_sync(&guc->timestamp.work);
-
spin_lock_irqsave(&guc->timestamp.lock, flags);
guc_update_pm_timestamp(guc, &unused);
for_each_engine(engine, gt, id) {
+ struct intel_engine_guc_stats *stats = &engine->stats.guc;
+
guc_update_engine_gt_clks(engine);
- engine->stats.guc.prev_total = 0;
+
+ /*
+ * If resetting a running context, accumulate the active
+ * time as well since there will be no context switch.
+ */
+ if (stats->running) {
+ u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
+
+ stats->total_gt_clks += clk;
+ }
+ stats->prev_total = 0;
+ stats->running = 0;
}
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
}
+static void __update_guc_busyness_running_state(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ for_each_engine(engine, gt, id)
+ engine->stats.guc.running = false;
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
static void __update_guc_busyness_stats(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -1392,16 +1498,67 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
}
+static void __guc_context_update_stats(struct intel_context *ce)
+{
+ struct intel_guc *guc = ce_to_guc(ce);
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ lrc_update_runtime(ce);
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
+static void guc_context_update_stats(struct intel_context *ce)
+{
+ if (!intel_context_pin_if_active(ce))
+ return;
+
+ __guc_context_update_stats(ce);
+ intel_context_unpin(ce);
+}
+
static void guc_timestamp_ping(struct work_struct *wrk)
{
struct intel_guc *guc = container_of(wrk, typeof(*guc),
timestamp.work.work);
struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_context *ce;
intel_wakeref_t wakeref;
+ unsigned long index;
int srcu, ret;
/*
+ * Ideally the busyness worker should take a gt pm wakeref because the
+ * worker only needs to be active while gt is awake. However, the
+ * gt_park path cancels the worker synchronously and this complicates
+ * the flow if the worker is also running at the same time. The cancel
+ * waits for the worker and when the worker releases the wakeref, that
+ * would call gt_park and would lead to a deadlock.
+ *
+ * The resolution is to take the global pm wakeref if runtime pm is
+ * already active. If not, we don't need to update the busyness stats as
+ * the stats would already be updated when the gt was parked.
+ *
+ * Note:
+ * - We do not requeue the worker if we cannot take a reference to runtime
+ * pm since intel_guc_busyness_unpark would requeue the worker in the
+ * resume path.
+ *
+ * - If the gt was parked longer than time taken for GT timestamp to roll
+ * over, we ignore those rollovers since we don't care about tracking
+ * the exact GT time. We only care about roll overs when the gt is
+ * active and running workloads.
+ *
+ * - There is a window of time between gt_park and runtime suspend,
+ * where the worker may run. This is acceptable since the worker will
+ * not find any new data to update busyness.
+ */
+ wakeref = intel_runtime_pm_get_if_active(&gt->i915->runtime_pm);
+ if (!wakeref)
+ return;
+
+ /*
* Synchronize with gt reset to make sure the worker does not
* corrupt the engine/guc stats. NB: can't actually block waiting
* for a reset to complete as the reset requires flushing out
@@ -1409,19 +1566,27 @@ static void guc_timestamp_ping(struct work_struct *wrk)
*/
ret = intel_gt_reset_trylock(gt, &srcu);
if (ret)
- return;
+ goto err_trylock;
- with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
- __update_guc_busyness_stats(guc);
+ __update_guc_busyness_stats(guc);
+
+ /* adjust context stats for overflow */
+ xa_for_each(&guc->context_lookup, index, ce)
+ guc_context_update_stats(ce);
intel_gt_reset_unlock(gt, srcu);
- mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
- guc->timestamp.ping_delay);
+ guc_enable_busyness_worker(guc);
+
+err_trylock:
+ intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
}
static int guc_action_enable_usage_stats(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
u32 offset = intel_guc_engine_usage_offset(guc);
u32 action[] = {
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
@@ -1429,38 +1594,50 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc)
0,
};
+ for_each_engine(engine, gt, id)
+ __set_engine_usage_record(engine, 0, 0xffffffff, 0);
+
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
-static void guc_init_engine_stats(struct intel_guc *guc)
+static int guc_init_engine_stats(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
intel_wakeref_t wakeref;
+ int ret;
- mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
- guc->timestamp.ping_delay);
+ with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ ret = guc_action_enable_usage_stats(guc);
- with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
- int ret = guc_action_enable_usage_stats(guc);
+ if (ret)
+ guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret));
+ else
+ guc_enable_busyness_worker(guc);
- if (ret)
- guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret));
- }
+ return ret;
+}
+
+static void guc_fini_engine_stats(struct intel_guc *guc)
+{
+ guc_cancel_busyness_worker(guc);
}
void intel_guc_busyness_park(struct intel_gt *gt)
{
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
if (!guc_submission_initialized(guc))
return;
+ /* Assume no engines are running and set running state to false */
+ __update_guc_busyness_running_state(guc);
+
/*
* There is a race with suspend flow where the worker runs after suspend
* and causes an unclaimed register access warning. Cancel the worker
* synchronously here.
*/
- cancel_delayed_work_sync(&guc->timestamp.work);
+ guc_cancel_busyness_worker(guc);
/*
* Before parking, we should sample engine busyness stats if we need to.
@@ -1477,7 +1654,7 @@ void intel_guc_busyness_park(struct intel_gt *gt)
void intel_guc_busyness_unpark(struct intel_gt *gt)
{
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
unsigned long flags;
ktime_t unused;
@@ -1487,8 +1664,7 @@ void intel_guc_busyness_unpark(struct intel_gt *gt)
spin_lock_irqsave(&guc->timestamp.lock, flags);
guc_update_pm_timestamp(guc, &unused);
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
- mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
- guc->timestamp.ping_delay);
+ guc_enable_busyness_worker(guc);
}
static inline bool
@@ -1539,6 +1715,11 @@ static void guc_flush_submissions(struct intel_guc *guc)
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
+void intel_guc_submission_flush_work(struct intel_guc *guc)
+{
+ flush_work(&guc->submission_state.destroyed_worker);
+}
+
static void guc_flush_destroyed_contexts(struct intel_guc *guc);
void intel_guc_submission_reset_prepare(struct intel_guc *guc)
@@ -1557,6 +1738,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
spin_lock_irq(guc_to_gt(guc)->irq_lock);
spin_unlock_irq(guc_to_gt(guc)->irq_lock);
+ /* Flush tasklet */
+ tasklet_disable(&guc->ct.receive_tasklet);
+ tasklet_enable(&guc->ct.receive_tasklet);
+
guc_flush_submissions(guc);
guc_flush_destroyed_contexts(guc);
flush_work(&guc->ct.requests.worker);
@@ -1615,16 +1800,14 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
{
- if (!IS_GRAPHICS_VER(engine->i915, 11, 12))
- return;
-
- intel_engine_stop_cs(engine);
-
/*
* Wa_22011802037: In addition to stopping the cs, we need
* to wait for any pending mi force wakeups
*/
- intel_engine_wait_for_pending_mi_fw(engine);
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt)) {
+ intel_engine_stop_cs(engine);
+ intel_engine_wait_for_pending_mi_fw(engine);
+ }
}
static void guc_reset_nop(struct intel_engine_cs *engine)
@@ -1727,6 +1910,20 @@ next_context:
intel_context_put(parent);
}
+void wake_up_all_tlb_invalidate(struct intel_guc *guc)
+{
+ struct intel_guc_tlb_wait *wait;
+ unsigned long i;
+
+ if (!intel_guc_tlb_invalidation_is_available(guc))
+ return;
+
+ xa_lock_irq(&guc->tlb_lookup);
+ xa_for_each(&guc->tlb_lookup, i, wait)
+ wake_up(&wait->wq);
+ xa_unlock_irq(&guc->tlb_lookup);
+}
+
void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
{
struct intel_context *ce;
@@ -1852,33 +2049,99 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
/* GuC is blown away, drop all references to contexts */
xa_destroy(&guc->context_lookup);
+
+ /*
+ * Wedged GT won't respond to any TLB invalidation request. Simply
+ * release all the blocked waiters.
+ */
+ wake_up_all_tlb_invalidate(guc);
}
void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
+ int outstanding;
+
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
+ !intel_guc_is_fw_running(guc) ||
intel_gt_is_wedged(guc_to_gt(guc)))) {
return;
}
/*
* Technically possible for either of these values to be non-zero here,
- * but very unlikely + harmless. Regardless let's add a warn so we can
+ * but very unlikely + harmless. Regardless let's add an error so we can
* see in CI if this happens frequently / a precursor to taking down the
* machine.
*/
- GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
+ outstanding = atomic_read(&guc->outstanding_submission_g2h);
+ if (outstanding)
+ guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n",
+ outstanding);
atomic_set(&guc->outstanding_submission_g2h, 0);
intel_guc_global_policies_update(guc);
enable_submission(guc);
intel_gt_unpark_heartbeats(guc_to_gt(guc));
+
+ /*
+ * The full GT reset will have cleared the TLB caches and flushed the
+ * G2H message queue; we can release all the blocked waiters.
+ */
+ wake_up_all_tlb_invalidate(guc);
}
static void destroyed_worker_func(struct work_struct *w);
static void reset_fail_worker_func(struct work_struct *w);
+bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc)
+{
+ return HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915) &&
+ intel_guc_is_ready(guc);
+}
+
+static int init_tlb_lookup(struct intel_guc *guc)
+{
+ struct intel_guc_tlb_wait *wait;
+ int err;
+
+ if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915))
+ return 0;
+
+ xa_init_flags(&guc->tlb_lookup, XA_FLAGS_ALLOC);
+
+ wait = kzalloc(sizeof(*wait), GFP_KERNEL);
+ if (!wait)
+ return -ENOMEM;
+
+ init_waitqueue_head(&wait->wq);
+
+ /* Preallocate a shared id for use under memory pressure. */
+ err = xa_alloc_cyclic_irq(&guc->tlb_lookup, &guc->serial_slot, wait,
+ xa_limit_32b, &guc->next_seqno, GFP_KERNEL);
+ if (err < 0) {
+ kfree(wait);
+ return err;
+ }
+
+ return 0;
+}
+
+static void fini_tlb_lookup(struct intel_guc *guc)
+{
+ struct intel_guc_tlb_wait *wait;
+
+ if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915))
+ return;
+
+ wait = xa_load(&guc->tlb_lookup, guc->serial_slot);
+ if (wait && wait->busy)
+ guc_err(guc, "Unexpected busy item in tlb_lookup on fini\n");
+ kfree(wait);
+
+ xa_destroy(&guc->tlb_lookup);
+}
+
/*
* Set up the memory resources to be shared with the GuC (via the GGTT)
* at firmware loading time.
@@ -1897,11 +2160,15 @@ int intel_guc_submission_init(struct intel_guc *guc)
return ret;
}
+ ret = init_tlb_lookup(guc);
+ if (ret)
+ goto destroy_pool;
+
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap) {
ret = -ENOMEM;
- goto destroy_pool;
+ goto destroy_tlb;
}
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1910,9 +2177,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
return 0;
+destroy_tlb:
+ fini_tlb_lookup(guc);
destroy_pool:
guc_lrc_desc_pool_destroy_v69(guc);
-
return ret;
}
@@ -1921,10 +2189,12 @@ void intel_guc_submission_fini(struct intel_guc *guc)
if (!guc->submission_initialized)
return;
+ guc_fini_engine_stats(guc);
guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
bitmap_free(guc->submission_state.guc_ids_bitmap);
+ fini_tlb_lookup(guc);
guc->submission_initialized = false;
}
@@ -1978,7 +2248,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
static void guc_submit_request(struct i915_request *rq)
{
struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
- struct intel_guc *guc = &rq->engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(rq->engine->gt);
unsigned long flags;
/* Will be called from irq-context when using foreign fences. */
@@ -2004,11 +2274,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
order_base_2(ce->parallel.number_children
+ 1));
else
- ret = ida_simple_get(&guc->submission_state.guc_ids,
- NUMBER_MULTI_LRC_GUC_ID(guc),
- guc->submission_state.num_guc_ids,
- GFP_KERNEL | __GFP_RETRY_MAYFAIL |
- __GFP_NOWARN);
+ ret = ida_alloc_range(&guc->submission_state.guc_ids,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
+ guc->submission_state.num_guc_ids - 1,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(ret < 0))
return ret;
@@ -2031,8 +2300,8 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
+ 1));
} else {
--guc->submission_state.guc_ids_in_use;
- ida_simple_remove(&guc->submission_state.guc_ids,
- ce->guc_id.id);
+ ida_free(&guc->submission_state.guc_ids,
+ ce->guc_id.id);
}
clr_ctx_id_mapping(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
@@ -2429,6 +2698,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
+MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY)
#undef MAKE_CONTEXT_POLICY_ADD
@@ -2444,10 +2714,11 @@ static int __guc_context_set_context_policies(struct intel_guc *guc,
static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
struct context_policy policy;
u32 execution_quantum;
u32 preemption_timeout;
+ u32 slpc_ctx_freq_req = 0;
unsigned long flags;
int ret;
@@ -2459,11 +2730,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
execution_quantum = engine->props.timeslice_duration_ms * 1000;
preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+ if (ce->flags & BIT(CONTEXT_LOW_LATENCY))
+ slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
+
__guc_context_policy_start_klv(&policy, ce->guc_id.id);
__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
__guc_context_policy_add_execution_quantum(&policy, execution_quantum);
__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+ __guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req);
if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
__guc_context_policy_add_preempt_to_idle(&policy, 1);
@@ -2520,7 +2795,7 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
static void prepare_context_registration_info_v69(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u32 ctx_id = ce->guc_id.id;
struct guc_lrc_desc_v69 *desc;
struct intel_context *child;
@@ -2589,7 +2864,7 @@ static void prepare_context_registration_info_v70(struct intel_context *ce,
struct guc_ctxt_registration_info *info)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u32 ctx_id = ce->guc_id.id;
GEM_BUG_ON(!engine->mask);
@@ -2626,9 +2901,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce,
ce->parallel.guc.wqi_tail = 0;
ce->parallel.guc.wqi_head = 0;
- wq_desc_offset = i915_ggtt_offset(ce->state) +
+ wq_desc_offset = (u64)i915_ggtt_offset(ce->state) +
__get_parent_scratch_offset(ce);
- wq_base_offset = i915_ggtt_offset(ce->state) +
+ wq_base_offset = (u64)i915_ggtt_offset(ce->state) +
__get_wq_offset(ce);
info->wq_desc_lo = lower_32_bits(wq_desc_offset);
info->wq_desc_hi = upper_32_bits(wq_desc_offset);
@@ -2652,7 +2927,7 @@ static int try_context_registration(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
intel_wakeref_t wakeref;
u32 ctx_id = ce->guc_id.id;
bool context_registered;
@@ -2733,7 +3008,7 @@ static int __guc_context_pin(struct intel_context *ce,
/*
* GuC context gets pinned in guc_request_alloc. See that function for
- * explaination of why.
+ * explanation of why.
*/
return lrc_pin(ce, engine, vaddr);
@@ -2760,6 +3035,7 @@ static void guc_context_unpin(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
+ __guc_context_update_stats(ce);
unpin_guc_id(guc, ce);
lrc_unpin(ce);
@@ -3130,12 +3406,13 @@ static void guc_context_close(struct intel_context *ce)
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
}
-static inline void guc_lrc_desc_unpin(struct intel_context *ce)
+static inline int guc_lrc_desc_unpin(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
struct intel_gt *gt = guc_to_gt(guc);
unsigned long flags;
bool disabled;
+ int ret;
GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
@@ -3146,18 +3423,52 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
spin_lock_irqsave(&ce->guc_state.lock, flags);
disabled = submission_disabled(guc);
if (likely(!disabled)) {
+ /*
+ * Take a gt-pm ref and change context state to be destroyed.
+ * NOTE: a G2H IRQ that comes after will put this gt-pm ref back
+ */
__intel_gt_pm_get(gt);
set_context_destroyed(ce);
clr_context_registered(ce);
}
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
if (unlikely(disabled)) {
release_guc_id(guc, ce);
__guc_context_destroy(ce);
- return;
+ return 0;
}
- deregister_context(ce, ce->guc_id.id);
+ /*
+ * GuC is active, lets destroy this context, but at this point we can still be racing
+ * with suspend, so we undo everything if the H2G fails in deregister_context so
+ * that GuC reset will find this context during clean up.
+ *
+ * There is a race condition where the reset code could have altered
+ * this context's state and done a wakeref put before we try to
+ * deregister it here. So check if the context is still set to be
+ * destroyed before undoing earlier changes, to avoid two wakeref puts
+ * on the same context.
+ */
+ ret = deregister_context(ce, ce->guc_id.id);
+ if (ret) {
+ bool pending_destroyed;
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ pending_destroyed = context_destroyed(ce);
+ if (pending_destroyed) {
+ set_context_registered(ce);
+ clr_context_destroyed(ce);
+ }
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ /*
+ * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements
+ * the wakeref immediately but per function spec usage call this after unlock.
+ */
+ if (pending_destroyed)
+ intel_wakeref_put_async(&gt->wakeref);
+ }
+
+ return ret;
}
static void __guc_context_destroy(struct intel_context *ce)
@@ -3225,7 +3536,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc)
if (!ce)
break;
- guc_lrc_desc_unpin(ce);
+ if (guc_lrc_desc_unpin(ce)) {
+ /*
+ * This means GuC's CT link severed mid-way which could happen
+ * in suspend-resume corner cases. In this case, put the
+ * context back into the destroyed_contexts list which will
+ * get picked up on the next context deregistration event or
+ * purged in a GuC sanitization event (reset/unload/wedged/...).
+ */
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ list_add_tail(&ce->destroyed_link,
+ &guc->submission_state.destroyed_contexts);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+ /* Bail now since the list might never be emptied if h2gs fail */
+ break;
+ }
+
}
}
@@ -3234,9 +3560,20 @@ static void destroyed_worker_func(struct work_struct *w)
struct intel_guc *guc = container_of(w, struct intel_guc,
submission_state.destroyed_worker);
struct intel_gt *gt = guc_to_gt(guc);
- int tmp;
+ intel_wakeref_t wakeref;
+
+ /*
+ * In rare cases we can get here via async context-free fence-signals that
+ * come very late in suspend flow or very early in resume flows. In these
+ * cases, GuC won't be ready but just skipping it here is fine as these
+ * pending-destroy-contexts get destroyed totally at GuC reset time at the
+ * end of suspend.. OR.. this worker can be picked up later on the next
+ * context destruction trigger after resume-completes
+ */
+ if (!intel_guc_is_ready(guc))
+ return;
- with_intel_gt_pm(gt, tmp)
+ with_intel_gt_pm(gt, wakeref)
deregister_destroyed_contexts(guc);
}
@@ -3441,6 +3778,7 @@ static void remove_from_context(struct i915_request *rq)
}
static const struct intel_context_ops guc_context_ops = {
+ .flags = COPS_RUNTIME_CYCLES,
.alloc = guc_context_alloc,
.close = guc_context_close,
@@ -3459,6 +3797,8 @@ static const struct intel_context_ops guc_context_ops = {
.sched_disable = guc_context_sched_disable,
+ .update_stats = guc_context_update_stats,
+
.reset = lrc_reset,
.destroy = guc_context_destroy,
@@ -3714,6 +4054,7 @@ static int guc_virtual_context_alloc(struct intel_context *ce)
}
static const struct intel_context_ops virtual_guc_context_ops = {
+ .flags = COPS_RUNTIME_CYCLES,
.alloc = guc_virtual_context_alloc,
.close = guc_context_close,
@@ -3731,6 +4072,7 @@ static const struct intel_context_ops virtual_guc_context_ops = {
.exit = guc_virtual_context_exit,
.sched_disable = guc_context_sched_disable,
+ .update_stats = guc_context_update_stats,
.destroy = guc_context_destroy,
@@ -3995,20 +4337,25 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq,
u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
/* Short circuit function */
- if (prio < I915_PRIORITY_NORMAL ||
- rq->guc_prio == GUC_PRIO_FINI ||
- (rq->guc_prio != GUC_PRIO_INIT &&
- !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
+ if (prio < I915_PRIORITY_NORMAL)
return;
spin_lock(&ce->guc_state.lock);
- if (rq->guc_prio != GUC_PRIO_FINI) {
- if (rq->guc_prio != GUC_PRIO_INIT)
- sub_context_inflight_prio(ce, rq->guc_prio);
- rq->guc_prio = new_guc_prio;
- add_context_inflight_prio(ce, rq->guc_prio);
- update_context_prio(ce);
- }
+
+ if (rq->guc_prio == GUC_PRIO_FINI)
+ goto exit;
+
+ if (!new_guc_prio_higher(rq->guc_prio, new_guc_prio))
+ goto exit;
+
+ if (rq->guc_prio != GUC_PRIO_INIT)
+ sub_context_inflight_prio(ce, rq->guc_prio);
+
+ rq->guc_prio = new_guc_prio;
+ add_context_inflight_prio(ce, rq->guc_prio);
+ update_context_prio(ce);
+
+exit:
spin_unlock(&ce->guc_state.lock);
}
@@ -4102,9 +4449,11 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
engine->submit_request = guc_submit_request;
}
-static inline void guc_kernel_context_pin(struct intel_guc *guc,
- struct intel_context *ce)
+static inline int guc_kernel_context_pin(struct intel_guc *guc,
+ struct intel_context *ce)
{
+ int ret;
+
/*
* Note: we purposefully do not check the returns below because
* the registration can only fail if a reset is just starting.
@@ -4112,16 +4461,24 @@ static inline void guc_kernel_context_pin(struct intel_guc *guc,
* isn't happening and even it did this code would be run again.
*/
- if (context_guc_id_invalid(ce))
- pin_guc_id(guc, ce);
+ if (context_guc_id_invalid(ce)) {
+ ret = pin_guc_id(guc, ce);
+
+ if (ret < 0)
+ return ret;
+ }
if (!test_bit(CONTEXT_GUC_INIT, &ce->flags))
guc_context_init(ce);
- try_context_registration(ce, true);
+ ret = try_context_registration(ce, true);
+ if (ret)
+ unpin_guc_id(guc, ce);
+
+ return ret;
}
-static inline void guc_init_lrc_mapping(struct intel_guc *guc)
+static inline int guc_init_submission(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
@@ -4148,9 +4505,17 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
struct intel_context *ce;
list_for_each_entry(ce, &engine->pinned_contexts_list,
- pinned_contexts_link)
- guc_kernel_context_pin(guc, ce);
+ pinned_contexts_link) {
+ int ret = guc_kernel_context_pin(guc, ce);
+
+ if (ret) {
+ /* No point in trying to clean up as i915 will wedge on failure */
+ return ret;
+ }
+ }
}
+
+ return 0;
}
static void guc_release(struct intel_engine_cs *engine)
@@ -4204,9 +4569,15 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
/* Wa_14014475959:dg2 */
if (engine->class == COMPUTE_CLASS)
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(engine->i915))
- engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
+
+ /* Wa_16019325821 */
+ /* Wa_14019159160 */
+ if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) &&
+ IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
/*
* TODO: GuC supports timeslicing and semaphores as well, but they're
@@ -4217,7 +4588,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
*/
engine->emit_bb_start = gen8_emit_bb_start;
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
engine->emit_bb_start = xehp_emit_bb_start;
}
@@ -4259,7 +4630,7 @@ static void guc_sched_engine_destroy(struct kref *kref)
int intel_guc_submission_setup(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
/*
* The setup relies on several assumptions (e.g. irqs always enabled)
@@ -4393,42 +4764,69 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc)
return ret;
}
-void intel_guc_submission_enable(struct intel_guc *guc)
+static void guc_route_semaphores(struct intel_guc *guc, bool to_guc)
{
struct intel_gt *gt = guc_to_gt(guc);
+ u32 val;
- /* Enable and route to GuC */
- if (GRAPHICS_VER(gt->i915) >= 12)
- intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES,
- GUC_SEM_INTR_ROUTE_TO_GUC |
- GUC_SEM_INTR_ENABLE_ALL);
+ if (GRAPHICS_VER(gt->i915) < 12)
+ return;
- guc_init_lrc_mapping(guc);
- guc_init_engine_stats(guc);
- guc_init_global_schedule_policy(guc);
+ if (to_guc)
+ val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL;
+ else
+ val = 0;
+
+ intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val);
}
-void intel_guc_submission_disable(struct intel_guc *guc)
+int intel_guc_submission_enable(struct intel_guc *guc)
{
- struct intel_gt *gt = guc_to_gt(guc);
+ int ret;
+
+ /* Semaphore interrupt enable and route to GuC */
+ guc_route_semaphores(guc, true);
+
+ ret = guc_init_submission(guc);
+ if (ret)
+ goto fail_sem;
+
+ ret = guc_init_engine_stats(guc);
+ if (ret)
+ goto fail_sem;
- /* Note: By the time we're here, GuC may have already been reset */
+ ret = guc_init_global_schedule_policy(guc);
+ if (ret)
+ goto fail_stats;
+
+ return 0;
+
+fail_stats:
+ guc_fini_engine_stats(guc);
+fail_sem:
+ guc_route_semaphores(guc, false);
+ return ret;
+}
+
+/* Note: By the time we're here, GuC may have already been reset */
+void intel_guc_submission_disable(struct intel_guc *guc)
+{
+ guc_cancel_busyness_worker(guc);
- /* Disable and route to host */
- if (GRAPHICS_VER(gt->i915) >= 12)
- intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0);
+ /* Semaphore interrupt disable and route to host */
+ guc_route_semaphores(guc, false);
}
static bool __guc_submission_supported(struct intel_guc *guc)
{
/* GuC submission is unavailable for pre-Gen11 */
return intel_guc_is_supported(guc) &&
- GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
+ GRAPHICS_VER(guc_to_i915(guc)) >= 11;
}
static bool __guc_submission_selected(struct intel_guc *guc)
{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
if (!intel_guc_submission_is_supported(guc))
return false;
@@ -4504,6 +4902,154 @@ g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
return ce;
}
+static void wait_wake_outstanding_tlb_g2h(struct intel_guc *guc, u32 seqno)
+{
+ struct intel_guc_tlb_wait *wait;
+ unsigned long flags;
+
+ xa_lock_irqsave(&guc->tlb_lookup, flags);
+ wait = xa_load(&guc->tlb_lookup, seqno);
+
+ if (wait)
+ wake_up(&wait->wq);
+ else
+ guc_dbg(guc,
+ "Stale TLB invalidation response with seqno %d\n", seqno);
+
+ xa_unlock_irqrestore(&guc->tlb_lookup, flags);
+}
+
+int intel_guc_tlb_invalidation_done(struct intel_guc *guc,
+ const u32 *payload, u32 len)
+{
+ if (len < 1)
+ return -EPROTO;
+
+ wait_wake_outstanding_tlb_g2h(guc, payload[0]);
+ return 0;
+}
+
+static long must_wait_woken(struct wait_queue_entry *wq_entry, long timeout)
+{
+ /*
+ * This is equivalent to wait_woken() with the exception that
+ * we do not wake up early if the kthread task has been completed.
+ * As we are called from page reclaim in any task context,
+ * we may be invoked from stopped kthreads, but we *must*
+ * complete the wait from the HW.
+ */
+ do {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (wq_entry->flags & WQ_FLAG_WOKEN)
+ break;
+
+ timeout = schedule_timeout(timeout);
+ } while (timeout);
+
+ /* See wait_woken() and woken_wake_function() */
+ __set_current_state(TASK_RUNNING);
+ smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN);
+
+ return timeout;
+}
+
+static bool intel_gt_is_enabled(const struct intel_gt *gt)
+{
+ /* Check if GT is wedged or suspended */
+ if (intel_gt_is_wedged(gt) || !intel_irqs_enabled(gt->i915))
+ return false;
+ return true;
+}
+
+static int guc_send_invalidate_tlb(struct intel_guc *guc,
+ enum intel_guc_tlb_invalidation_type type)
+{
+ struct intel_guc_tlb_wait _wq, *wq = &_wq;
+ struct intel_gt *gt = guc_to_gt(guc);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int err;
+ u32 seqno;
+ u32 action[] = {
+ INTEL_GUC_ACTION_TLB_INVALIDATION,
+ 0,
+ REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_TYPE_MASK, type) |
+ REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_MODE_MASK,
+ INTEL_GUC_TLB_INVAL_MODE_HEAVY) |
+ INTEL_GUC_TLB_INVAL_FLUSH_CACHE,
+ };
+ u32 size = ARRAY_SIZE(action);
+
+ /*
+ * Early guard against GT enablement. TLB invalidation should not be
+ * attempted if the GT is disabled due to suspend/wedge.
+ */
+ if (!intel_gt_is_enabled(gt))
+ return -EINVAL;
+
+ init_waitqueue_head(&_wq.wq);
+
+ if (xa_alloc_cyclic_irq(&guc->tlb_lookup, &seqno, wq,
+ xa_limit_32b, &guc->next_seqno,
+ GFP_ATOMIC | __GFP_NOWARN) < 0) {
+ /* Under severe memory pressure? Serialise TLB allocations */
+ xa_lock_irq(&guc->tlb_lookup);
+ wq = xa_load(&guc->tlb_lookup, guc->serial_slot);
+ wait_event_lock_irq(wq->wq,
+ !READ_ONCE(wq->busy),
+ guc->tlb_lookup.xa_lock);
+ /*
+ * Update wq->busy under lock to ensure only one waiter can
+ * issue the TLB invalidation command using the serial slot at a
+ * time. The condition is set to true before releasing the lock
+ * so that other caller continue to wait until woken up again.
+ */
+ wq->busy = true;
+ xa_unlock_irq(&guc->tlb_lookup);
+
+ seqno = guc->serial_slot;
+ }
+
+ action[1] = seqno;
+
+ add_wait_queue(&wq->wq, &wait);
+
+ /* This is a critical reclaim path and thus we must loop here. */
+ err = intel_guc_send_busy_loop(guc, action, size, G2H_LEN_DW_INVALIDATE_TLB, true);
+ if (err)
+ goto out;
+
+ /*
+ * Late guard against GT enablement. It is not an error for the TLB
+ * invalidation to time out if the GT is disabled during the process
+ * due to suspend/wedge. In fact, the TLB invalidation is cancelled
+ * in this case.
+ */
+ if (!must_wait_woken(&wait, intel_guc_ct_max_queue_time_jiffies()) &&
+ intel_gt_is_enabled(gt)) {
+ guc_err(guc,
+ "TLB invalidation response timed out for seqno %u\n", seqno);
+ err = -ETIME;
+ }
+out:
+ remove_wait_queue(&wq->wq, &wait);
+ if (seqno != guc->serial_slot)
+ xa_erase_irq(&guc->tlb_lookup, seqno);
+
+ return err;
+}
+
+/* Send a H2G command to invalidate the TLBs at engine level and beyond. */
+int intel_guc_invalidate_tlb_engines(struct intel_guc *guc)
+{
+ return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_ENGINES);
+}
+
+/* Send a H2G command to invalidate the GuC's internal TLB. */
+int intel_guc_invalidate_tlb_guc(struct intel_guc *guc)
+{
+ return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_GUC);
+}
+
int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
const u32 *msg,
u32 len)
@@ -4545,7 +5091,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
intel_context_put(ce);
} else if (context_destroyed(ce)) {
/* Context has been destroyed */
- intel_gt_pm_put_async(guc_to_gt(guc));
+ intel_gt_pm_put_async_untracked(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
@@ -4638,13 +5184,38 @@ static void capture_error_state(struct intel_guc *guc,
{
struct intel_gt *gt = guc_to_gt(guc);
struct drm_i915_private *i915 = gt->i915;
- struct intel_engine_cs *engine = __context_to_physical_engine(ce);
intel_wakeref_t wakeref;
+ intel_engine_mask_t engine_mask;
+
+ if (intel_engine_is_virtual(ce->engine)) {
+ struct intel_engine_cs *e;
+ intel_engine_mask_t tmp, virtual_mask = ce->engine->mask;
+
+ engine_mask = 0;
+ for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) {
+ bool match = intel_guc_capture_is_matching_engine(gt, ce, e);
+
+ if (match) {
+ intel_engine_set_hung_context(e, ce);
+ engine_mask |= e->mask;
+ i915_increase_reset_engine_count(&i915->gpu_error,
+ e);
+ }
+ }
+
+ if (!engine_mask) {
+ guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s",
+ ce->guc_id.id, ce->engine->name);
+ engine_mask = ~0U;
+ }
+ } else {
+ intel_engine_set_hung_context(ce->engine, ce);
+ engine_mask = ce->engine->mask;
+ i915_increase_reset_engine_count(&i915->gpu_error, ce->engine);
+ }
- intel_engine_set_hung_context(engine, ce);
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
- atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
+ i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
}
static void guc_context_replay(struct intel_context *ce)
@@ -4658,18 +5229,19 @@ static void guc_context_replay(struct intel_context *ce)
static void guc_handle_context_reset(struct intel_guc *guc,
struct intel_context *ce)
{
+ bool capture = intel_context_is_schedulable(ce);
+
trace_intel_context_reset(ce);
- drm_dbg(&guc_to_gt(guc)->i915->drm, "Got GuC reset of 0x%04X, exiting = %d, banned = %d\n",
- ce->guc_id.id, test_bit(CONTEXT_EXITING, &ce->flags),
- test_bit(CONTEXT_BANNED, &ce->flags));
+ guc_dbg(guc, "%s context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n",
+ capture ? "Got" : "Ignoring",
+ ce->guc_id.id, ce->engine->name,
+ str_yes_no(intel_context_is_exiting(ce)),
+ str_yes_no(intel_context_is_banned(ce)));
- if (likely(intel_context_is_schedulable(ce))) {
+ if (capture) {
capture_error_state(guc, ce);
guc_context_replay(ce);
- } else {
- guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s",
- ce->guc_id.id, ce->engine->name);
}
}
@@ -4817,7 +5389,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
void intel_guc_find_hung_context(struct intel_engine_cs *engine)
{
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
struct intel_context *ce;
struct i915_request *rq;
unsigned long index;
@@ -4879,7 +5451,7 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
struct i915_request *hung_rq,
struct drm_printer *m)
{
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
struct intel_context *ce;
unsigned long index;
unsigned long flags;
@@ -4971,12 +5543,20 @@ static inline void guc_log_context(struct drm_printer *p,
{
drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
- drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
- ce->ring->head,
- ce->lrc_reg_state[CTX_RING_HEAD]);
- drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
- ce->ring->tail,
- ce->lrc_reg_state[CTX_RING_TAIL]);
+ if (intel_context_pin_if_active(ce)) {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
+ ce->ring->head,
+ ce->lrc_reg_state[CTX_RING_HEAD]);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
+ ce->ring->tail,
+ ce->lrc_reg_state[CTX_RING_TAIL]);
+ intel_context_unpin(ce);
+ } else {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n",
+ ce->ring->head);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n",
+ ce->ring->tail);
+ }
drm_printf(p, "\t\tContext Pin Count: %u\n",
atomic_read(&ce->pin_count));
drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
@@ -5331,7 +5911,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
if (!ve)
return ERR_PTR(-ENOMEM);
- guc = &siblings[0]->gt->uc.guc;
+ guc = gt_to_guc(siblings[0]->gt);
ve->base.i915 = siblings[0]->i915;
ve->base.gt = siblings[0]->gt;
@@ -5355,6 +5935,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+ BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES);
+ ve->base.mask = VIRTUAL_ENGINES;
+
intel_context_init(&ve->context, &ve->base);
for (n = 0; n < count; n++) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index 5a95a9f0a8e3..b6df75622d3b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -15,7 +15,7 @@ struct intel_engine_cs;
void intel_guc_submission_init_early(struct intel_guc *guc);
int intel_guc_submission_init(struct intel_guc *guc);
-void intel_guc_submission_enable(struct intel_guc *guc);
+int intel_guc_submission_enable(struct intel_guc *guc);
void intel_guc_submission_disable(struct intel_guc *guc);
void intel_guc_submission_fini(struct intel_guc *guc);
int intel_guc_preempt_work_create(struct intel_guc *guc);
@@ -38,6 +38,8 @@ int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
bool interruptible,
long timeout);
+void intel_guc_submission_flush_work(struct intel_guc *guc);
+
static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
{
return guc->submission_supported;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 410905da8e97..456d3372eef8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -6,9 +6,13 @@
#include <linux/types.h>
#include "gt/intel_gt.h"
+#include "gt/intel_rps.h"
#include "intel_guc_reg.h"
#include "intel_huc.h"
+#include "intel_huc_print.h"
#include "i915_drv.h"
+#include "i915_reg.h"
+#include "pxp/intel_pxp_cmd_interface_43.h"
#include <linux/device/bus.h>
#include <linux/mei_aux.h>
@@ -21,15 +25,25 @@
* capabilities by adding HuC specific commands to batch buffers.
*
* The kernel driver is only responsible for loading the HuC firmware and
- * triggering its security authentication, which is performed by the GuC on
- * older platforms and by the GSC on newer ones. For the GuC to correctly
- * perform the authentication, the HuC binary must be loaded before the GuC one.
+ * triggering its security authentication. This is done differently depending
+ * on the platform:
+ *
+ * - older platforms (from Gen9 to most Gen12s): the load is performed via DMA
+ * and the authentication via GuC
+ * - DG2: load and authentication are both performed via GSC.
+ * - MTL and newer platforms: the load is performed via DMA (same as with
+ * not-DG2 older platforms), while the authentication is done in 2-steps,
+ * a first auth for clear-media workloads via GuC and a second one for all
+ * workloads via GSC.
+ *
+ * On platforms where the GuC does the authentication, to correctly do so the
+ * HuC binary must be loaded before the GuC one.
* Loading the HuC is optional; however, not using the HuC might negatively
* impact power usage and/or performance of media workloads, depending on the
* use-cases.
* HuC must be reloaded on events that cause the WOPCM to lose its contents
- * (S3/S4, FLR); GuC-authenticated HuC must also be reloaded on GuC/GT reset,
- * while GSC-managed HuC will survive that.
+ * (S3/S4, FLR); on older platforms the HuC must also be reloaded on GuC/GT
+ * reset, while on newer ones it will survive that.
*
* See https://github.com/intel/media-driver for the latest details on HuC
* functionality.
@@ -105,13 +119,11 @@ static enum hrtimer_restart huc_delayed_load_timer_callback(struct hrtimer *hrti
{
struct intel_huc *huc = container_of(hrtimer, struct intel_huc, delayed_load.timer);
- if (!intel_huc_is_authenticated(huc)) {
+ if (!intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC)) {
if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_GSC)
- drm_notice(&huc_to_gt(huc)->i915->drm,
- "timed out waiting for MEI GSC init to load HuC\n");
+ huc_notice(huc, "timed out waiting for MEI GSC\n");
else if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_PXP)
- drm_notice(&huc_to_gt(huc)->i915->drm,
- "timed out waiting for MEI PXP init to load HuC\n");
+ huc_notice(huc, "timed out waiting for MEI PXP\n");
else
MISSING_CASE(huc->delayed_load.status);
@@ -125,7 +137,7 @@ static void huc_delayed_load_start(struct intel_huc *huc)
{
ktime_t delay;
- GEM_BUG_ON(intel_huc_is_authenticated(huc));
+ GEM_BUG_ON(intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC));
/*
* On resume we don't have to wait for MEI-GSC to be re-probed, but we
@@ -174,8 +186,7 @@ static int gsc_notifier(struct notifier_block *nb, unsigned long action, void *d
case BUS_NOTIFY_DRIVER_NOT_BOUND: /* mei driver fails to be bound */
case BUS_NOTIFY_UNBIND_DRIVER: /* mei driver about to be unbound */
- drm_info(&huc_to_gt(huc)->i915->drm,
- "mei driver not bound, disabling HuC load\n");
+ huc_info(huc, "MEI driver not bound, disabling load\n");
gsc_init_error(huc);
break;
}
@@ -183,7 +194,7 @@ static int gsc_notifier(struct notifier_block *nb, unsigned long action, void *d
return 0;
}
-void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
+void intel_huc_register_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus)
{
int ret;
@@ -193,14 +204,13 @@ void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus
huc->delayed_load.nb.notifier_call = gsc_notifier;
ret = bus_register_notifier(bus, &huc->delayed_load.nb);
if (ret) {
- drm_err(&huc_to_gt(huc)->i915->drm,
- "failed to register GSC notifier\n");
+ huc_err(huc, "failed to register GSC notifier %pe\n", ERR_PTR(ret));
huc->delayed_load.nb.notifier_call = NULL;
gsc_init_error(huc);
}
}
-void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
+void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus)
{
if (!huc->delayed_load.nb.notifier_call)
return;
@@ -221,8 +231,8 @@ static void delayed_huc_load_init(struct intel_huc *huc)
sw_fence_dummy_notify);
i915_sw_fence_commit(&huc->delayed_load.fence);
- hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- huc->delayed_load.timer.function = huc_delayed_load_timer_callback;
+ hrtimer_setup(&huc->delayed_load.timer, huc_delayed_load_timer_callback, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
static void delayed_huc_load_fini(struct intel_huc *huc)
@@ -235,6 +245,13 @@ static void delayed_huc_load_fini(struct intel_huc *huc)
i915_sw_fence_fini(&huc->delayed_load.fence);
}
+int intel_huc_sanitize(struct intel_huc *huc)
+{
+ delayed_huc_load_complete(huc);
+ intel_uc_fw_sanitize(&huc->fw);
+ return 0;
+}
+
static bool vcs_supported(struct intel_gt *gt)
{
intel_engine_mask_t mask = gt->info.engine_mask;
@@ -251,7 +268,7 @@ static bool vcs_supported(struct intel_gt *gt)
GEM_BUG_ON(!gt_is_root(gt) && !gt->info.engine_mask);
if (gt_is_root(gt))
- mask = RUNTIME_INFO(gt->i915)->platform_engine_mask;
+ mask = INTEL_INFO(gt->i915)->platform_engine_mask;
else
mask = gt->info.engine_mask;
@@ -263,7 +280,7 @@ void intel_huc_init_early(struct intel_huc *huc)
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
struct intel_gt *gt = huc_to_gt(huc);
- intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC);
+ intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC, true);
/*
* we always init the fence as already completed, even if HuC is not
@@ -280,128 +297,227 @@ void intel_huc_init_early(struct intel_huc *huc)
}
if (GRAPHICS_VER(i915) >= 11) {
- huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO;
- huc->status.mask = HUC_LOAD_SUCCESSFUL;
- huc->status.value = HUC_LOAD_SUCCESSFUL;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].reg = GEN11_HUC_KERNEL_LOAD_INFO;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].mask = HUC_LOAD_SUCCESSFUL;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].value = HUC_LOAD_SUCCESSFUL;
+ } else {
+ huc->status[INTEL_HUC_AUTH_BY_GUC].reg = HUC_STATUS2;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].mask = HUC_FW_VERIFIED;
+ huc->status[INTEL_HUC_AUTH_BY_GUC].value = HUC_FW_VERIFIED;
+ }
+
+ if (IS_DG2(i915)) {
+ huc->status[INTEL_HUC_AUTH_BY_GSC].reg = GEN11_HUC_KERNEL_LOAD_INFO;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HUC_LOAD_SUCCESSFUL;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].value = HUC_LOAD_SUCCESSFUL;
} else {
- huc->status.reg = HUC_STATUS2;
- huc->status.mask = HUC_FW_VERIFIED;
- huc->status.value = HUC_FW_VERIFIED;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].reg = HECI_FWSTS(MTL_GSC_HECI1_BASE, 5);
+ huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HECI1_FWSTS5_HUC_AUTH_DONE;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].value = HECI1_FWSTS5_HUC_AUTH_DONE;
}
}
+void intel_huc_fini_late(struct intel_huc *huc)
+{
+ delayed_huc_load_fini(huc);
+}
+
#define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy")
static int check_huc_loading_mode(struct intel_huc *huc)
{
struct intel_gt *gt = huc_to_gt(huc);
- bool fw_needs_gsc = intel_huc_is_loaded_by_gsc(huc);
- bool hw_uses_gsc = false;
+ bool gsc_enabled = huc->fw.has_gsc_headers;
/*
* The fuse for HuC load via GSC is only valid on platforms that have
* GuC deprivilege.
*/
if (HAS_GUC_DEPRIVILEGE(gt->i915))
- hw_uses_gsc = intel_uncore_read(gt->uncore, GUC_SHIM_CONTROL2) &
- GSC_LOADS_HUC;
-
- if (fw_needs_gsc != hw_uses_gsc) {
- drm_err(&gt->i915->drm,
- "mismatch between HuC FW (%s) and HW (%s) load modes\n",
- HUC_LOAD_MODE_STRING(fw_needs_gsc),
- HUC_LOAD_MODE_STRING(hw_uses_gsc));
+ huc->loaded_via_gsc = intel_uncore_read(gt->uncore, GUC_SHIM_CONTROL2) &
+ GSC_LOADS_HUC;
+
+ if (huc->loaded_via_gsc && !gsc_enabled) {
+ huc_err(huc, "HW requires a GSC-enabled blob, but we found a legacy one\n");
return -ENOEXEC;
}
- /* make sure we can access the GSC via the mei driver if we need it */
- if (!(IS_ENABLED(CONFIG_INTEL_MEI_PXP) && IS_ENABLED(CONFIG_INTEL_MEI_GSC)) &&
- fw_needs_gsc) {
- drm_info(&gt->i915->drm,
- "Can't load HuC due to missing MEI modules\n");
- return -EIO;
+ /*
+ * On newer platforms we have GSC-enabled binaries but we load the HuC
+ * via DMA. To do so we need to find the location of the legacy-style
+ * binary inside the GSC-enabled one, which we do at fetch time. Make
+ * sure that we were able to do so if the fuse says we need to load via
+ * DMA and the binary is GSC-enabled.
+ */
+ if (!huc->loaded_via_gsc && gsc_enabled && !huc->fw.dma_start_offset) {
+ huc_err(huc, "HW in DMA mode, but we have an incompatible GSC-enabled blob\n");
+ return -ENOEXEC;
+ }
+
+ /*
+ * If the HuC is loaded via GSC, we need to be able to access the GSC.
+ * On DG2 this is done via the mei components, while on newer platforms
+ * it is done via the GSCCS,
+ */
+ if (huc->loaded_via_gsc) {
+ if (IS_DG2(gt->i915)) {
+ if (!IS_ENABLED(CONFIG_INTEL_MEI_PXP) ||
+ !IS_ENABLED(CONFIG_INTEL_MEI_GSC)) {
+ huc_info(huc, "can't load due to missing mei modules\n");
+ return -EIO;
+ }
+ } else {
+ if (!HAS_ENGINE(gt, GSC0)) {
+ huc_info(huc, "can't load due to missing GSCCS\n");
+ return -EIO;
+ }
+ }
}
- drm_dbg(&gt->i915->drm, "GSC loads huc=%s\n", str_yes_no(fw_needs_gsc));
+ huc_dbg(huc, "loaded by GSC = %s\n", str_yes_no(huc->loaded_via_gsc));
return 0;
}
int intel_huc_init(struct intel_huc *huc)
{
- struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
+ struct intel_gt *gt = huc_to_gt(huc);
int err;
err = check_huc_loading_mode(huc);
if (err)
goto out;
+ if (HAS_ENGINE(gt, GSC0)) {
+ struct i915_vma *vma;
+
+ vma = intel_guc_allocate_vma(gt_to_guc(gt), PXP43_HUC_AUTH_INOUT_SIZE * 2);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ huc_info(huc, "Failed to allocate heci pkt\n");
+ goto out;
+ }
+
+ huc->heci_pkt = vma;
+ }
+
err = intel_uc_fw_init(&huc->fw);
if (err)
- goto out;
+ goto out_pkt;
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE);
return 0;
+out_pkt:
+ if (huc->heci_pkt)
+ i915_vma_unpin_and_release(&huc->heci_pkt, 0);
out:
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
- drm_info(&i915->drm, "HuC init failed with %d\n", err);
+ huc_info(huc, "initialization failed %pe\n", ERR_PTR(err));
return err;
}
void intel_huc_fini(struct intel_huc *huc)
{
- /*
- * the fence is initialized in init_early, so we need to clean it up
- * even if HuC loading is off.
- */
- delayed_huc_load_fini(huc);
+ if (huc->heci_pkt)
+ i915_vma_unpin_and_release(&huc->heci_pkt, 0);
if (intel_uc_fw_is_loadable(&huc->fw))
intel_uc_fw_fini(&huc->fw);
}
-void intel_huc_suspend(struct intel_huc *huc)
+static const char *auth_mode_string(struct intel_huc *huc,
+ enum intel_huc_authentication_type type)
{
- if (!intel_uc_fw_is_loadable(&huc->fw))
- return;
+ bool partial = huc->fw.has_gsc_headers && type == INTEL_HUC_AUTH_BY_GUC;
- /*
- * in the unlikely case that we're suspending before the GSC has
- * completed its loading sequence, just stop waiting. We'll restart
- * on resume.
- */
- delayed_huc_load_complete(huc);
+ return partial ? "clear media" : "all workloads";
}
-int intel_huc_wait_for_auth_complete(struct intel_huc *huc)
+/*
+ * Use a longer timeout for debug builds so that problems can be detected
+ * and analysed. But a shorter timeout for releases so that user's don't
+ * wait forever to find out there is a problem. Note that the only reason
+ * an end user should hit the timeout is in case of extreme thermal throttling.
+ * And a system that is that hot during boot is probably dead anyway!
+ */
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+#define HUC_LOAD_RETRY_LIMIT 20
+#else
+#define HUC_LOAD_RETRY_LIMIT 3
+#endif
+
+int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
+ enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
- int ret;
+ struct intel_uncore *uncore = gt->uncore;
+ ktime_t before, after, delta;
+ int ret, count;
+ u64 delta_ms;
+ u32 before_freq;
- ret = __intel_wait_for_register(gt->uncore,
- huc->status.reg,
- huc->status.mask,
- huc->status.value,
- 2, 50, NULL);
+ /*
+ * The KMD requests maximum frequency during driver load, however thermal
+ * throttling can force the frequency down to minimum (although the board
+ * really should never get that hot in real life!). IFWI issues have been
+ * seen to cause sporadic failures to grant the higher frequency. And at
+ * minimum frequency, the authentication time can be in the seconds range.
+ * Note that there is a limit on how long an individual wait_for() can wait.
+ * So wrap it in a loop.
+ */
+ before_freq = intel_rps_read_actual_frequency(&gt->rps);
+ before = ktime_get();
+ for (count = 0; count < HUC_LOAD_RETRY_LIMIT; count++) {
+ ret = __intel_wait_for_register(gt->uncore,
+ huc->status[type].reg,
+ huc->status[type].mask,
+ huc->status[type].value,
+ 2, 1000, NULL);
+ if (!ret)
+ break;
+
+ huc_dbg(huc, "auth still in progress, count = %d, freq = %dMHz, status = 0x%08X\n",
+ count, intel_rps_read_actual_frequency(&gt->rps),
+ huc->status[type].reg.reg);
+ }
+ after = ktime_get();
+ delta = ktime_sub(after, before);
+ delta_ms = ktime_to_ms(delta);
+
+ if (delta_ms > 50) {
+ huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
+ delta_ms, huc->status[type].reg.reg, count, ret);
+ huc_warn(huc, "excessive auth time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n",
+ before_freq, intel_rps_read_actual_frequency(&gt->rps),
+ intel_rps_get_requested_frequency(&gt->rps),
+ intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
+ } else {
+ huc_dbg(huc, "auth took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n",
+ delta_ms, before_freq, intel_rps_read_actual_frequency(&gt->rps),
+ intel_rps_get_requested_frequency(&gt->rps),
+ huc->status[type].reg.reg, count, ret);
+ }
/* mark the load process as complete even if the wait failed */
delayed_huc_load_complete(huc);
if (ret) {
- drm_err(&gt->i915->drm, "HuC: Firmware not verified %d\n", ret);
+ huc_err(huc, "firmware not verified for %s: %pe\n",
+ auth_mode_string(huc, type), ERR_PTR(ret));
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return ret;
}
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
- drm_info(&gt->i915->drm, "HuC authenticated\n");
+ huc_info(huc, "authenticated for %s\n", auth_mode_string(huc, type));
return 0;
}
/**
* intel_huc_auth() - Authenticate HuC uCode
* @huc: intel_huc structure
+ * @type: authentication type (via GuC or via GSC)
*
* Called after HuC and GuC firmware loading during intel_uc_init_hw().
*
@@ -409,53 +525,76 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc)
* passing the offset of the RSA signature to intel_guc_auth_huc(). It then
* waits for up to 50ms for firmware verification ACK.
*/
-int intel_huc_auth(struct intel_huc *huc)
+int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
int ret;
if (!intel_uc_fw_is_loaded(&huc->fw))
return -ENOEXEC;
- /* GSC will do the auth */
+ /* GSC will do the auth with the load */
if (intel_huc_is_loaded_by_gsc(huc))
return -ENODEV;
+ if (intel_huc_is_authenticated(huc, type))
+ return -EEXIST;
+
ret = i915_inject_probe_error(gt->i915, -ENXIO);
if (ret)
goto fail;
- GEM_BUG_ON(intel_uc_fw_is_running(&huc->fw));
-
- ret = intel_guc_auth_huc(guc, intel_guc_ggtt_offset(guc, huc->fw.rsa_data));
- if (ret) {
- DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret);
- goto fail;
+ switch (type) {
+ case INTEL_HUC_AUTH_BY_GUC:
+ ret = intel_guc_auth_huc(guc, intel_guc_ggtt_offset(guc, huc->fw.rsa_data));
+ break;
+ case INTEL_HUC_AUTH_BY_GSC:
+ ret = intel_huc_fw_auth_via_gsccs(huc);
+ break;
+ default:
+ MISSING_CASE(type);
+ ret = -EINVAL;
}
+ if (ret)
+ goto fail;
/* Check authentication status, it should be done by now */
- ret = intel_huc_wait_for_auth_complete(huc);
+ ret = intel_huc_wait_for_auth_complete(huc, type);
if (ret)
goto fail;
return 0;
fail:
- i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
+ huc_probe_error(huc, "%s authentication failed %pe\n",
+ auth_mode_string(huc, type), ERR_PTR(ret));
return ret;
}
-bool intel_huc_is_authenticated(struct intel_huc *huc)
+bool intel_huc_is_authenticated(struct intel_huc *huc,
+ enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
intel_wakeref_t wakeref;
u32 status = 0;
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- status = intel_uncore_read(gt->uncore, huc->status.reg);
+ status = intel_uncore_read(gt->uncore, huc->status[type].reg);
- return (status & huc->status.mask) == huc->status.value;
+ return (status & huc->status[type].mask) == huc->status[type].value;
+}
+
+static bool huc_is_fully_authenticated(struct intel_huc *huc)
+{
+ struct intel_uc_fw *huc_fw = &huc->fw;
+
+ if (!huc_fw->has_gsc_headers)
+ return intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GUC);
+ else if (intel_huc_is_loaded_by_gsc(huc) || HAS_ENGINE(huc_to_gt(huc), GSC0))
+ return intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC);
+ else
+ return false;
}
/**
@@ -470,7 +609,9 @@ bool intel_huc_is_authenticated(struct intel_huc *huc)
*/
int intel_huc_check_status(struct intel_huc *huc)
{
- switch (__intel_uc_fw_status(&huc->fw)) {
+ struct intel_uc_fw *huc_fw = &huc->fw;
+
+ switch (__intel_uc_fw_status(huc_fw)) {
case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
return -ENODEV;
case INTEL_UC_FIRMWARE_DISABLED:
@@ -487,7 +628,17 @@ int intel_huc_check_status(struct intel_huc *huc)
break;
}
- return intel_huc_is_authenticated(huc);
+ /*
+ * GSC-enabled binaries loaded via DMA are first partially
+ * authenticated by GuC and then fully authenticated by GSC
+ */
+ if (huc_is_fully_authenticated(huc))
+ return 1; /* full auth */
+ else if (huc_fw->has_gsc_headers && !intel_huc_is_loaded_by_gsc(huc) &&
+ intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GUC))
+ return 2; /* clear media only */
+ else
+ return 0;
}
static bool huc_has_delayed_load(struct intel_huc *huc)
@@ -501,7 +652,10 @@ void intel_huc_update_auth_status(struct intel_huc *huc)
if (!intel_uc_fw_is_loadable(&huc->fw))
return;
- if (intel_huc_is_authenticated(huc))
+ if (!huc->fw.has_gsc_headers)
+ return;
+
+ if (huc_is_fully_authenticated(huc))
intel_uc_fw_change_status(&huc->fw,
INTEL_UC_FIRMWARE_RUNNING);
else if (huc_has_delayed_load(huc))
@@ -534,5 +688,5 @@ void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p)
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
drm_printf(p, "HuC status: 0x%08x\n",
- intel_uncore_read(gt->uncore, huc->status.reg));
+ intel_uncore_read(gt->uncore, huc->status[INTEL_HUC_AUTH_BY_GUC].reg));
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 52db03620c60..921ad4b1687f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -15,6 +15,7 @@
#include <linux/hrtimer.h>
struct bus_type;
+struct i915_vma;
enum intel_huc_delayed_load_status {
INTEL_HUC_WAITING_ON_GSC = 0,
@@ -22,6 +23,12 @@ enum intel_huc_delayed_load_status {
INTEL_HUC_DELAYED_LOAD_ERROR,
};
+enum intel_huc_authentication_type {
+ INTEL_HUC_AUTH_BY_GUC = 0,
+ INTEL_HUC_AUTH_BY_GSC,
+ INTEL_HUC_AUTH_MAX_MODES
+};
+
struct intel_huc {
/* Generic uC firmware management */
struct intel_uc_fw fw;
@@ -31,7 +38,7 @@ struct intel_huc {
i915_reg_t reg;
u32 mask;
u32 value;
- } status;
+ } status[INTEL_HUC_AUTH_MAX_MODES];
struct {
struct i915_sw_fence fence;
@@ -39,26 +46,28 @@ struct intel_huc {
struct notifier_block nb;
enum intel_huc_delayed_load_status status;
} delayed_load;
+
+ /* for load via GSCCS */
+ struct i915_vma *heci_pkt;
+
+ bool loaded_via_gsc;
};
+int intel_huc_sanitize(struct intel_huc *huc);
void intel_huc_init_early(struct intel_huc *huc);
+void intel_huc_fini_late(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc);
-void intel_huc_suspend(struct intel_huc *huc);
-int intel_huc_auth(struct intel_huc *huc);
-int intel_huc_wait_for_auth_complete(struct intel_huc *huc);
+int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type);
+int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
+ enum intel_huc_authentication_type type);
+bool intel_huc_is_authenticated(struct intel_huc *huc,
+ enum intel_huc_authentication_type type);
int intel_huc_check_status(struct intel_huc *huc);
void intel_huc_update_auth_status(struct intel_huc *huc);
-bool intel_huc_is_authenticated(struct intel_huc *huc);
-void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
-void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
-
-static inline int intel_huc_sanitize(struct intel_huc *huc)
-{
- intel_uc_fw_sanitize(&huc->fw);
- return 0;
-}
+void intel_huc_register_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus);
+void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, const struct bus_type *bus);
static inline bool intel_huc_is_supported(struct intel_huc *huc)
{
@@ -78,13 +87,13 @@ static inline bool intel_huc_is_used(struct intel_huc *huc)
static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc)
{
- return huc->fw.loaded_via_gsc;
+ return huc->loaded_via_gsc;
}
static inline bool intel_huc_wait_required(struct intel_huc *huc)
{
return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) &&
- !intel_huc_is_authenticated(huc);
+ !intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC);
}
void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index 534b0aa43316..b648238cc675 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -5,10 +5,231 @@
#include "gt/intel_gsc.h"
#include "gt/intel_gt.h"
+#include "intel_gsc_binary_headers.h"
+#include "intel_gsc_uc_heci_cmd_submit.h"
#include "intel_huc.h"
#include "intel_huc_fw.h"
+#include "intel_huc_print.h"
#include "i915_drv.h"
#include "pxp/intel_pxp_huc.h"
+#include "pxp/intel_pxp_cmd_interface_43.h"
+
+struct mtl_huc_auth_msg_in {
+ struct intel_gsc_mtl_header header;
+ struct pxp43_new_huc_auth_in huc_in;
+} __packed;
+
+struct mtl_huc_auth_msg_out {
+ struct intel_gsc_mtl_header header;
+ struct pxp43_huc_auth_out huc_out;
+} __packed;
+
+int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc)
+{
+ struct intel_gt *gt = huc_to_gt(huc);
+ struct drm_i915_gem_object *obj;
+ struct mtl_huc_auth_msg_in *msg_in;
+ struct mtl_huc_auth_msg_out *msg_out;
+ void *pkt_vaddr;
+ u64 pkt_offset;
+ int retry = 5;
+ int err = 0;
+
+ if (!huc->heci_pkt)
+ return -ENODEV;
+
+ obj = huc->heci_pkt->obj;
+ pkt_offset = i915_ggtt_offset(huc->heci_pkt);
+
+ pkt_vaddr = i915_gem_object_pin_map_unlocked(obj,
+ intel_gt_coherent_map_type(gt, obj, true));
+ if (IS_ERR(pkt_vaddr))
+ return PTR_ERR(pkt_vaddr);
+
+ msg_in = pkt_vaddr;
+ msg_out = pkt_vaddr + PXP43_HUC_AUTH_INOUT_SIZE;
+
+ intel_gsc_uc_heci_cmd_emit_mtl_header(&msg_in->header,
+ HECI_MEADDRESS_PXP,
+ sizeof(*msg_in), 0);
+
+ msg_in->huc_in.header.api_version = PXP_APIVER(4, 3);
+ msg_in->huc_in.header.command_id = PXP43_CMDID_NEW_HUC_AUTH;
+ msg_in->huc_in.header.status = 0;
+ msg_in->huc_in.header.buffer_len = sizeof(msg_in->huc_in) -
+ sizeof(msg_in->huc_in.header);
+ msg_in->huc_in.huc_base_address = huc->fw.vma_res.start;
+ msg_in->huc_in.huc_size = huc->fw.obj->base.size;
+
+ do {
+ err = intel_gsc_uc_heci_cmd_submit_packet(&gt->uc.gsc,
+ pkt_offset, sizeof(*msg_in),
+ pkt_offset + PXP43_HUC_AUTH_INOUT_SIZE,
+ PXP43_HUC_AUTH_INOUT_SIZE);
+ if (err) {
+ huc_err(huc, "failed to submit GSC request to auth: %d\n", err);
+ goto out_unpin;
+ }
+
+ if (msg_out->header.flags & GSC_OUTFLAG_MSG_PENDING) {
+ msg_in->header.gsc_message_handle = msg_out->header.gsc_message_handle;
+ err = -EBUSY;
+ msleep(50);
+ }
+ } while (--retry && err == -EBUSY);
+
+ if (err)
+ goto out_unpin;
+
+ if (msg_out->header.message_size != sizeof(*msg_out)) {
+ huc_err(huc, "invalid GSC reply length %u [expected %zu]\n",
+ msg_out->header.message_size, sizeof(*msg_out));
+ err = -EPROTO;
+ goto out_unpin;
+ }
+
+ /*
+ * The GSC will return PXP_STATUS_OP_NOT_PERMITTED if the HuC is already
+ * loaded. If the same error is ever returned with HuC not loaded we'll
+ * still catch it when we check the authentication bit later.
+ */
+ if (msg_out->huc_out.header.status != PXP_STATUS_SUCCESS &&
+ msg_out->huc_out.header.status != PXP_STATUS_OP_NOT_PERMITTED) {
+ huc_err(huc, "auth failed with GSC error = 0x%x\n",
+ msg_out->huc_out.header.status);
+ err = -EIO;
+ goto out_unpin;
+ }
+
+out_unpin:
+ i915_gem_object_unpin_map(obj);
+ return err;
+}
+
+static bool css_valid(const void *data, size_t size)
+{
+ const struct uc_css_header *css = data;
+
+ if (unlikely(size < sizeof(struct uc_css_header)))
+ return false;
+
+ if (css->module_type != 0x6)
+ return false;
+
+ if (css->module_vendor != PCI_VENDOR_ID_INTEL)
+ return false;
+
+ return true;
+}
+
+static inline u32 entry_offset(const struct intel_gsc_cpd_entry *entry)
+{
+ return entry->offset & INTEL_GSC_CPD_ENTRY_OFFSET_MASK;
+}
+
+int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, size_t size)
+{
+ struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
+ const struct intel_gsc_cpd_header_v2 *header = data;
+ const struct intel_gsc_cpd_entry *entry;
+ size_t min_size = sizeof(*header);
+ int i;
+
+ if (!huc_fw->has_gsc_headers) {
+ huc_err(huc, "Invalid FW type for GSC header parsing!\n");
+ return -EINVAL;
+ }
+
+ if (size < sizeof(*header)) {
+ huc_err(huc, "FW too small! %zu < %zu\n", size, min_size);
+ return -ENODATA;
+ }
+
+ /*
+ * The GSC-enabled HuC binary starts with a directory header, followed
+ * by a series of entries. Each entry is identified by a name and
+ * points to a specific section of the binary containing the relevant
+ * data. The entries we're interested in are:
+ * - "HUCP.man": points to the GSC manifest header for the HuC, which
+ * contains the version info.
+ * - "huc_fw": points to the legacy-style binary that can be used for
+ * load via the DMA. This entry only contains a valid CSS
+ * on binaries for platforms that support 2-step HuC load
+ * via dma and auth via GSC (like MTL).
+ *
+ * --------------------------------------------------
+ * [ intel_gsc_cpd_header_v2 ]
+ * --------------------------------------------------
+ * [ intel_gsc_cpd_entry[] ]
+ * [ entry1 ]
+ * [ ... ]
+ * [ entryX ]
+ * [ "HUCP.man" ]
+ * [ ... ]
+ * [ offset >----------------------------]------o
+ * [ ... ] |
+ * [ entryY ] |
+ * [ "huc_fw" ] |
+ * [ ... ] |
+ * [ offset >----------------------------]----------o
+ * -------------------------------------------------- | |
+ * | |
+ * -------------------------------------------------- | |
+ * [ intel_gsc_manifest_header ]<-----o |
+ * [ ... ] |
+ * [ intel_gsc_version fw_version ] |
+ * [ ... ] |
+ * -------------------------------------------------- |
+ * |
+ * -------------------------------------------------- |
+ * [ data[] ]<---------o
+ * [ ... ]
+ * [ ... ]
+ * --------------------------------------------------
+ */
+
+ if (header->header_marker != INTEL_GSC_CPD_HEADER_MARKER) {
+ huc_err(huc, "invalid marker for CPD header: 0x%08x!\n",
+ header->header_marker);
+ return -EINVAL;
+ }
+
+ /* we only have binaries with header v2 and entry v1 for now */
+ if (header->header_version != 2 || header->entry_version != 1) {
+ huc_err(huc, "invalid CPD header/entry version %u:%u!\n",
+ header->header_version, header->entry_version);
+ return -EINVAL;
+ }
+
+ if (header->header_length < sizeof(struct intel_gsc_cpd_header_v2)) {
+ huc_err(huc, "invalid CPD header length %u!\n",
+ header->header_length);
+ return -EINVAL;
+ }
+
+ min_size = header->header_length + sizeof(*entry) * header->num_of_entries;
+ if (size < min_size) {
+ huc_err(huc, "FW too small! %zu < %zu\n", size, min_size);
+ return -ENODATA;
+ }
+
+ entry = data + header->header_length;
+
+ for (i = 0; i < header->num_of_entries; i++, entry++) {
+ if (strcmp(entry->name, "HUCP.man") == 0)
+ intel_uc_fw_version_from_gsc_manifest(&huc_fw->file_selected.ver,
+ data + entry_offset(entry));
+
+ if (strcmp(entry->name, "huc_fw") == 0) {
+ u32 offset = entry_offset(entry);
+
+ if (offset < size && css_valid(data + offset, size - offset))
+ huc_fw->dma_start_offset = offset;
+ }
+ }
+
+ return 0;
+}
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
{
@@ -25,7 +246,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
* component gets re-bound and this function called again. If so, just
* mark the HuC as loaded.
*/
- if (intel_huc_is_authenticated(huc)) {
+ if (intel_huc_is_authenticated(huc, INTEL_HUC_AUTH_BY_GSC)) {
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
return 0;
}
@@ -38,7 +259,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED);
- return intel_huc_wait_for_auth_complete(huc);
+ return intel_huc_wait_for_auth_complete(huc, INTEL_HUC_AUTH_BY_GSC);
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
index db42e238b45f..307ab45e6b09 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
@@ -7,8 +7,12 @@
#define _INTEL_HUC_FW_H_
struct intel_huc;
+struct intel_uc_fw;
+
+#include <linux/types.h>
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc);
+int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc);
int intel_huc_fw_upload(struct intel_huc *huc);
-
+int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, size_t size);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_print.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_print.h
new file mode 100644
index 000000000000..915d310ee1df
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_print.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_HUC_PRINT__
+#define __INTEL_HUC_PRINT__
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_print.h"
+
+#define huc_printk(_huc, _level, _fmt, ...) \
+ gt_##_level(huc_to_gt(_huc), "HuC: " _fmt, ##__VA_ARGS__)
+#define huc_err(_huc, _fmt, ...) huc_printk((_huc), err, _fmt, ##__VA_ARGS__)
+#define huc_warn(_huc, _fmt, ...) huc_printk((_huc), warn, _fmt, ##__VA_ARGS__)
+#define huc_notice(_huc, _fmt, ...) huc_printk((_huc), notice, _fmt, ##__VA_ARGS__)
+#define huc_info(_huc, _fmt, ...) huc_printk((_huc), info, _fmt, ##__VA_ARGS__)
+#define huc_dbg(_huc, _fmt, ...) huc_printk((_huc), dbg, _fmt, ##__VA_ARGS__)
+#define huc_probe_error(_huc, _fmt, ...) huc_printk((_huc), probe_error, _fmt, ##__VA_ARGS__)
+
+#endif /* __INTEL_HUC_PRINT__ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index de7f987cf611..4a3493e8d433 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -18,6 +18,7 @@
#include "intel_uc.h"
#include "i915_drv.h"
+#include "i915_hwmon.h"
static const struct intel_uc_ops uc_ops_off;
static const struct intel_uc_ops uc_ops_on;
@@ -42,17 +43,13 @@ static void uc_expand_default_options(struct intel_uc *uc)
}
/* Intermediate platforms are HuC authentication only */
- if (IS_ALDERLAKE_S(i915) && !IS_ADLS_RPLS(i915)) {
+ if (IS_ALDERLAKE_S(i915) && !IS_RAPTORLAKE_S(i915)) {
i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
return;
}
/* Default: enable HuC authentication and GuC submission */
i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION;
-
- /* XEHPSDV and PVC do not use HuC */
- if (IS_XEHPSDV(i915) || IS_PONTEVECCHIO(i915))
- i915->params.enable_guc &= ~ENABLE_GUC_LOAD_HUC;
}
/* Reset GuC providing us with fresh state for both GuC and HuC.
@@ -83,15 +80,15 @@ static int __intel_uc_reset_hw(struct intel_uc *uc)
static void __confirm_options(struct intel_uc *uc)
{
- struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+ struct intel_gt *gt = uc_to_gt(uc);
+ struct drm_i915_private *i915 = gt->i915;
- drm_dbg(&i915->drm,
- "enable_guc=%d (guc:%s submission:%s huc:%s slpc:%s)\n",
- i915->params.enable_guc,
- str_yes_no(intel_uc_wants_guc(uc)),
- str_yes_no(intel_uc_wants_guc_submission(uc)),
- str_yes_no(intel_uc_wants_huc(uc)),
- str_yes_no(intel_uc_wants_guc_slpc(uc)));
+ gt_dbg(gt, "enable_guc=%d (guc:%s submission:%s huc:%s slpc:%s)\n",
+ i915->params.enable_guc,
+ str_yes_no(intel_uc_wants_guc(uc)),
+ str_yes_no(intel_uc_wants_guc_submission(uc)),
+ str_yes_no(intel_uc_wants_huc(uc)),
+ str_yes_no(intel_uc_wants_guc_slpc(uc)));
if (i915->params.enable_guc == 0) {
GEM_BUG_ON(intel_uc_wants_guc(uc));
@@ -102,26 +99,17 @@ static void __confirm_options(struct intel_uc *uc)
}
if (!intel_uc_supports_guc(uc))
- drm_info(&i915->drm,
- "Incompatible option enable_guc=%d - %s\n",
- i915->params.enable_guc, "GuC is not supported!");
-
- if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC &&
- !intel_uc_supports_huc(uc))
- drm_info(&i915->drm,
- "Incompatible option enable_guc=%d - %s\n",
- i915->params.enable_guc, "HuC is not supported!");
+ gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
+ i915->params.enable_guc, "GuC is not supported!");
if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
!intel_uc_supports_guc_submission(uc))
- drm_info(&i915->drm,
- "Incompatible option enable_guc=%d - %s\n",
- i915->params.enable_guc, "GuC submission is N/A");
+ gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
+ i915->params.enable_guc, "GuC submission is N/A");
if (i915->params.enable_guc & ~ENABLE_GUC_MASK)
- drm_info(&i915->drm,
- "Incompatible option enable_guc=%d - %s\n",
- i915->params.enable_guc, "undocumented flag");
+ gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
+ i915->params.enable_guc, "undocumented flag");
}
void intel_uc_init_early(struct intel_uc *uc)
@@ -143,10 +131,12 @@ void intel_uc_init_early(struct intel_uc *uc)
void intel_uc_init_late(struct intel_uc *uc)
{
intel_guc_init_late(&uc->guc);
+ intel_gsc_uc_load_start(&uc->gsc);
}
void intel_uc_driver_late_release(struct intel_uc *uc)
{
+ intel_huc_fini_late(&uc->huc);
}
/**
@@ -434,6 +424,9 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc)
static int __uc_check_hw(struct intel_uc *uc)
{
+ if (uc->fw_table_invalid)
+ return -EIO;
+
if (!intel_uc_supports_guc(uc))
return 0;
@@ -464,6 +457,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct intel_guc *guc = &uc->guc;
struct intel_huc *huc = &uc->huc;
int ret, attempts;
+ bool pl1en = false;
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
@@ -494,6 +488,9 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
+ /* Disable a potentially low PL1 power limit to allow freq to be raised */
+ i915_hwmon_power_max_disable(gt->i915, &pl1en);
+
intel_rps_raise_unslice(&uc_to_gt(uc)->rps);
while (attempts--) {
@@ -503,7 +500,7 @@ static int __uc_init_hw(struct intel_uc *uc)
*/
ret = __uc_sanitize(uc);
if (ret)
- goto err_out;
+ goto err_rps;
intel_huc_fw_upload(huc);
intel_guc_ads_reset(guc);
@@ -516,7 +513,7 @@ static int __uc_init_hw(struct intel_uc *uc)
ERR_PTR(ret), attempts);
}
- /* Did we succeded or run out of retries? */
+ /* Did we succeed or run out of retries? */
if (ret)
goto err_log_capture;
@@ -533,10 +530,13 @@ static int __uc_init_hw(struct intel_uc *uc)
if (intel_huc_is_loaded_by_gsc(huc))
intel_huc_update_auth_status(huc);
else
- intel_huc_auth(huc);
+ intel_huc_auth(huc, INTEL_HUC_AUTH_BY_GUC);
- if (intel_uc_uses_guc_submission(uc))
- intel_guc_submission_enable(guc);
+ if (intel_uc_uses_guc_submission(uc)) {
+ ret = intel_guc_submission_enable(guc);
+ if (ret)
+ goto err_log_capture;
+ }
if (intel_uc_uses_guc_slpc(uc)) {
ret = intel_guc_slpc_enable(&guc->slpc);
@@ -547,12 +547,10 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
}
- intel_gsc_uc_load_start(&uc->gsc);
+ i915_hwmon_power_max_restore(gt->i915, pl1en);
- gt_info(gt, "GuC submission %s\n",
- str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
- gt_info(gt, "GuC SLPC %s\n",
- str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
+ guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
+ guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
return 0;
@@ -563,10 +561,12 @@ err_submission:
intel_guc_submission_disable(guc);
err_log_capture:
__uc_capture_load_err_log(uc);
-err_out:
+err_rps:
/* Return GT back to RPn */
intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
+ i915_hwmon_power_max_restore(gt->i915, pl1en);
+err_out:
__uc_sanitize(uc);
if (!ret) {
@@ -634,10 +634,14 @@ void intel_uc_reset_finish(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
+ /*
+ * NB: The wedge code path results in prepare -> prepare -> finish -> finish.
+ * So this function is sometimes called with the in-progress flag not set.
+ */
uc->reset_in_progress = false;
/* Firmware expected to be running when this function is called */
- if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc))
+ if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_reset_finish(guc);
}
@@ -678,13 +682,17 @@ void intel_uc_suspend(struct intel_uc *uc)
int err;
/* flush the GSC worker */
- intel_gsc_uc_suspend(&uc->gsc);
+ intel_gsc_uc_flush_work(&uc->gsc);
+
+ wake_up_all_tlb_invalidate(guc);
if (!intel_guc_is_ready(guc)) {
guc->interrupts.enabled = false;
return;
}
+ intel_guc_submission_flush_work(guc);
+
with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) {
err = intel_guc_suspend(guc);
if (err)
@@ -692,6 +700,12 @@ void intel_uc_suspend(struct intel_uc *uc)
}
}
+static void __uc_resume_mappings(struct intel_uc *uc)
+{
+ intel_uc_fw_resume_mapping(&uc->guc.fw);
+ intel_uc_fw_resume_mapping(&uc->huc.fw);
+}
+
static int __uc_resume(struct intel_uc *uc, bool enable_communication)
{
struct intel_guc *guc = &uc->guc;
@@ -720,6 +734,13 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication)
return err;
}
+ intel_gsc_uc_resume(&uc->gsc);
+
+ if (intel_guc_tlb_invalidation_is_available(guc)) {
+ intel_guc_invalidate_tlb_engines(guc);
+ intel_guc_invalidate_tlb_guc(guc);
+ }
+
return 0;
}
@@ -757,4 +778,6 @@ static const struct intel_uc_ops uc_ops_on = {
.init_hw = __uc_init_hw,
.fini_hw = __uc_fini_hw,
+
+ .resume_mappings = __uc_resume_mappings,
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 5d0f1bcc381e..014bb7d83689 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -24,6 +24,7 @@ struct intel_uc_ops {
void (*fini)(struct intel_uc *uc);
int (*init_hw)(struct intel_uc *uc);
void (*fini_hw)(struct intel_uc *uc);
+ void (*resume_mappings)(struct intel_uc *uc);
};
struct intel_uc {
@@ -36,6 +37,7 @@ struct intel_uc {
struct drm_i915_gem_object *load_err_log;
bool reset_in_progress;
+ bool fw_table_invalid;
};
void intel_uc_init_early(struct intel_uc *uc);
@@ -113,6 +115,7 @@ intel_uc_ops_function(init, init, int, 0);
intel_uc_ops_function(fini, fini, void, );
intel_uc_ops_function(init_hw, init_hw, int, 0);
intel_uc_ops_function(fini_hw, fini_hw, void, );
+intel_uc_ops_function(resume_mappings, resume_mappings, void, );
#undef intel_uc_ops_function
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
index 284d6fbc2d08..6d541c866edb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
@@ -10,6 +10,7 @@
#include "gt/intel_gt_debugfs.h"
#include "intel_guc_debugfs.h"
+#include "intel_gsc_uc_debugfs.h"
#include "intel_huc_debugfs.h"
#include "intel_uc.h"
#include "intel_uc_debugfs.h"
@@ -54,8 +55,11 @@ void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
if (IS_ERR(root))
return;
+ uc->guc.dbgfs_node = root;
+
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), uc);
+ intel_gsc_uc_debugfs_register(&uc->gsc, root);
intel_guc_debugfs_register(&uc->guc, root);
intel_huc_debugfs_register(&uc->huc, root);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 65672ff82605..e848a04a80dc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -11,11 +11,21 @@
#include <drm/drm_print.h>
#include "gem/i915_gem_lmem.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_print.h"
+#include "intel_gsc_binary_headers.h"
+#include "intel_gsc_fw.h"
#include "intel_uc_fw.h"
#include "intel_uc_fw_abi.h"
#include "i915_drv.h"
#include "i915_reg.h"
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+#define UNEXPECTED gt_probe_error
+#else
+#define UNEXPECTED gt_notice
+#endif
+
static inline struct intel_gt *
____uc_fw_to_gt(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type)
{
@@ -44,11 +54,10 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
enum intel_uc_fw_status status)
{
uc_fw->__status = status;
- drm_dbg(&__uc_fw_to_gt(uc_fw)->i915->drm,
- "%s firmware -> %s\n",
- intel_uc_fw_type_repr(uc_fw->type),
- status == INTEL_UC_FIRMWARE_SELECTED ?
- uc_fw->file_selected.path : intel_uc_fw_status_repr(status));
+ gt_dbg(__uc_fw_to_gt(uc_fw), "%s firmware -> %s\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ status == INTEL_UC_FIRMWARE_SELECTED ?
+ uc_fw->file_selected.path : intel_uc_fw_status_repr(status));
}
#endif
@@ -79,14 +88,15 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* security fixes, etc. to be enabled.
*/
#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_maj, guc_mmp) \
- fw_def(DG2, 0, guc_maj(dg2, 70, 5)) \
- fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 5)) \
+ fw_def(METEORLAKE, 0, guc_maj(mtl, 70, 12, 1)) \
+ fw_def(DG2, 0, guc_maj(dg2, 70, 12, 1)) \
+ fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 12, 1)) \
fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 70, 1, 1)) \
fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 69, 0, 3)) \
- fw_def(ALDERLAKE_S, 0, guc_maj(tgl, 70, 5)) \
+ fw_def(ALDERLAKE_S, 0, guc_maj(tgl, 70, 12, 1)) \
fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 70, 1, 1)) \
fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 69, 0, 3)) \
- fw_def(DG1, 0, guc_maj(dg1, 70, 5)) \
+ fw_def(DG1, 0, guc_maj(dg1, 70, 5, 1)) \
fw_def(ROCKETLAKE, 0, guc_mmp(tgl, 70, 1, 1)) \
fw_def(TIGERLAKE, 0, guc_mmp(tgl, 70, 1, 1)) \
fw_def(JASPERLAKE, 0, guc_mmp(ehl, 70, 1, 1)) \
@@ -101,6 +111,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(SKYLAKE, 0, guc_mmp(skl, 70, 1, 1))
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp, huc_gsc) \
+ fw_def(METEORLAKE, 0, huc_gsc(mtl)) \
fw_def(DG2, 0, huc_gsc(dg2)) \
fw_def(ALDERLAKE_P, 0, huc_raw(tgl)) \
fw_def(ALDERLAKE_P, 0, huc_mmp(tgl, 7, 9, 3)) \
@@ -121,6 +132,17 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(SKYLAKE, 0, huc_mmp(skl, 2, 0, 0))
/*
+ * The GSC FW has multiple version (see intel_gsc_uc.h for details); since what
+ * we care about is the interface, we use the compatibility version in the
+ * binary names.
+ * Same as with the GuC, a major version bump indicate a
+ * backward-incompatible change, while a minor version bump indicates a
+ * backward-compatible one, so we use only the former in the file name.
+ */
+#define INTEL_GSC_FIRMWARE_DEFS(fw_def, gsc_def) \
+ fw_def(METEORLAKE, 0, gsc_def(mtl, 1, 0))
+
+/*
* Set of macros for producing a list of filenames from the above table.
*/
#define __MAKE_UC_FW_PATH_BLANK(prefix_, name_) \
@@ -140,7 +162,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
__stringify(patch_) ".bin"
/* Minor for internal driver use, not part of file name */
-#define MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_) \
+#define MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_, patch_) \
__MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_)
#define MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
@@ -155,6 +177,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
#define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
__MAKE_UC_FW_PATH_MMP(prefix_, "huc", major_, minor_, patch_)
+#define MAKE_GSC_FW_PATH(prefix_, major_, minor_) \
+ __MAKE_UC_FW_PATH_MAJOR(prefix_, "gsc", major_)
+
/*
* All blobs need to be declared via MODULE_FIRMWARE().
* This first expansion of the table macros is solely to provide
@@ -165,6 +190,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP)
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC)
+INTEL_GSC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GSC_FW_PATH)
/*
* The next expansion of the table macros (in __uc_fw_auto_select below) provides
@@ -179,7 +205,7 @@ struct __packed uc_fw_blob {
u8 major;
u8 minor;
u8 patch;
- bool loaded_via_gsc;
+ bool has_gsc_headers;
};
#define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
@@ -190,15 +216,15 @@ struct __packed uc_fw_blob {
#define UC_FW_BLOB_NEW(major_, minor_, patch_, gsc_, path_) \
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
- .legacy = false, .loaded_via_gsc = gsc_ }
+ .legacy = false, .has_gsc_headers = gsc_ }
#define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
.legacy = true }
-#define GUC_FW_BLOB(prefix_, major_, minor_) \
- UC_FW_BLOB_NEW(major_, minor_, 0, false, \
- MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_))
+#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \
+ UC_FW_BLOB_NEW(major_, minor_, patch_, false, \
+ MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_, patch_))
#define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
UC_FW_BLOB_OLD(major_, minor_, patch_, \
@@ -214,6 +240,10 @@ struct __packed uc_fw_blob {
#define HUC_FW_BLOB_GSC(prefix_) \
UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_))
+#define GSC_FW_BLOB(prefix_, major_, minor_) \
+ UC_FW_BLOB_NEW(major_, minor_, 0, true, \
+ MAKE_GSC_FW_PATH(prefix_, major_, minor_))
+
struct __packed uc_fw_platform_requirement {
enum intel_platform p;
u8 rev; /* first platform rev using this FW */
@@ -232,20 +262,27 @@ struct fw_blobs_by_type {
u32 count;
};
+static const struct uc_fw_platform_requirement blobs_guc[] = {
+ INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP)
+};
+
+static const struct uc_fw_platform_requirement blobs_huc[] = {
+ INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
+};
+
+static const struct uc_fw_platform_requirement blobs_gsc[] = {
+ INTEL_GSC_FIRMWARE_DEFS(MAKE_FW_LIST, GSC_FW_BLOB)
+};
+
+static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
+ [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
+ [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+ [INTEL_UC_FW_TYPE_GSC] = { blobs_gsc, ARRAY_SIZE(blobs_gsc) },
+};
+
static void
__uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
{
- static const struct uc_fw_platform_requirement blobs_guc[] = {
- INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP)
- };
- static const struct uc_fw_platform_requirement blobs_huc[] = {
- INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
- };
- static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
- [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
- [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
- };
- static bool verified[INTEL_UC_FW_NUM_TYPES];
const struct uc_fw_platform_requirement *fw_blobs;
enum intel_platform p = INTEL_INFO(i915)->platform;
u32 fw_count;
@@ -254,20 +291,12 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
bool found;
/*
- * GSC FW support is still not fully in place, so we're not defining
- * the FW blob yet because we don't want the driver to attempt to load
- * it until we're ready for it.
- */
- if (uc_fw->type == INTEL_UC_FW_TYPE_GSC)
- return;
-
- /*
* The only difference between the ADL GuC FWs is the HWConfig support.
* ADL-N does not support HWConfig, so we should use the same binary as
* ADL-S, otherwise the GuC might attempt to fetch a config table that
* does not exist.
*/
- if (IS_ADLP_N(i915))
+ if (IS_ALDERLAKE_P_N(i915))
p = INTEL_ALDERLAKE_S;
GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
@@ -285,6 +314,11 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
continue;
if (uc_fw->file_selected.path) {
+ /*
+ * Continuing an earlier search after a found blob failed to load.
+ * Once the previously chosen path has been found, clear it out
+ * and let the search continue from there.
+ */
if (uc_fw->file_selected.path == blob->path)
uc_fw->file_selected.path = NULL;
@@ -295,7 +329,8 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
uc_fw->file_wanted.path = blob->path;
uc_fw->file_wanted.ver.major = blob->major;
uc_fw->file_wanted.ver.minor = blob->minor;
- uc_fw->loaded_via_gsc = blob->loaded_via_gsc;
+ uc_fw->file_wanted.ver.patch = blob->patch;
+ uc_fw->has_gsc_headers = blob->has_gsc_headers;
found = true;
break;
}
@@ -304,76 +339,111 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
/* Failed to find a match for the last attempt?! */
uc_fw->file_selected.path = NULL;
}
+}
- /* make sure the list is ordered as expected */
- if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST) && !verified[uc_fw->type]) {
- verified[uc_fw->type] = true;
+static bool validate_fw_table_type(struct drm_i915_private *i915, enum intel_uc_fw_type type)
+{
+ const struct uc_fw_platform_requirement *fw_blobs;
+ u32 fw_count;
+ int i, j;
- for (i = 1; i < fw_count; i++) {
- /* Next platform is good: */
- if (fw_blobs[i].p < fw_blobs[i - 1].p)
+ if (type >= ARRAY_SIZE(blobs_all)) {
+ drm_err(&i915->drm, "No blob array for %s\n", intel_uc_fw_type_repr(type));
+ return false;
+ }
+
+ fw_blobs = blobs_all[type].blobs;
+ fw_count = blobs_all[type].count;
+
+ if (!fw_count)
+ return true;
+
+ /* make sure the list is ordered as expected */
+ for (i = 1; i < fw_count; i++) {
+ /* Versionless file names must be unique per platform: */
+ for (j = i + 1; j < fw_count; j++) {
+ /* Same platform? */
+ if (fw_blobs[i].p != fw_blobs[j].p)
continue;
- /* Next platform revision is good: */
- if (fw_blobs[i].p == fw_blobs[i - 1].p &&
- fw_blobs[i].rev < fw_blobs[i - 1].rev)
+ if (fw_blobs[i].blob.path != fw_blobs[j].blob.path)
continue;
- /* Platform/revision must be in order: */
- if (fw_blobs[i].p != fw_blobs[i - 1].p ||
- fw_blobs[i].rev != fw_blobs[i - 1].rev)
- goto bad;
+ drm_err(&i915->drm, "Duplicate %s blobs: %s r%u %s%d.%d.%d [%s] matches %s%d.%d.%d [%s]\n",
+ intel_uc_fw_type_repr(type),
+ intel_platform_name(fw_blobs[j].p), fw_blobs[j].rev,
+ fw_blobs[j].blob.legacy ? "L" : "v",
+ fw_blobs[j].blob.major, fw_blobs[j].blob.minor,
+ fw_blobs[j].blob.patch, fw_blobs[j].blob.path,
+ fw_blobs[i].blob.legacy ? "L" : "v",
+ fw_blobs[i].blob.major, fw_blobs[i].blob.minor,
+ fw_blobs[i].blob.patch, fw_blobs[i].blob.path);
+ }
- /* Next major version is good: */
- if (fw_blobs[i].blob.major < fw_blobs[i - 1].blob.major)
- continue;
+ /* Next platform is good: */
+ if (fw_blobs[i].p < fw_blobs[i - 1].p)
+ continue;
- /* New must be before legacy: */
- if (!fw_blobs[i].blob.legacy && fw_blobs[i - 1].blob.legacy)
- goto bad;
+ /* Next platform revision is good: */
+ if (fw_blobs[i].p == fw_blobs[i - 1].p &&
+ fw_blobs[i].rev < fw_blobs[i - 1].rev)
+ continue;
- /* New to legacy also means 0.0 to X.Y (HuC), or X.0 to X.Y (GuC) */
- if (fw_blobs[i].blob.legacy && !fw_blobs[i - 1].blob.legacy) {
- if (!fw_blobs[i - 1].blob.major)
- continue;
+ /* Platform/revision must be in order: */
+ if (fw_blobs[i].p != fw_blobs[i - 1].p ||
+ fw_blobs[i].rev != fw_blobs[i - 1].rev)
+ goto bad;
- if (fw_blobs[i].blob.major == fw_blobs[i - 1].blob.major)
- continue;
- }
+ /* Next major version is good: */
+ if (fw_blobs[i].blob.major < fw_blobs[i - 1].blob.major)
+ continue;
- /* Major versions must be in order: */
- if (fw_blobs[i].blob.major != fw_blobs[i - 1].blob.major)
- goto bad;
+ /* New must be before legacy: */
+ if (!fw_blobs[i].blob.legacy && fw_blobs[i - 1].blob.legacy)
+ goto bad;
- /* Next minor version is good: */
- if (fw_blobs[i].blob.minor < fw_blobs[i - 1].blob.minor)
+ /* New to legacy also means 0.0 to X.Y (HuC), or X.0 to X.Y (GuC) */
+ if (fw_blobs[i].blob.legacy && !fw_blobs[i - 1].blob.legacy) {
+ if (!fw_blobs[i - 1].blob.major)
continue;
- /* Minor versions must be in order: */
- if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor)
- goto bad;
-
- /* Patch versions must be in order: */
- if (fw_blobs[i].blob.patch <= fw_blobs[i - 1].blob.patch)
+ if (fw_blobs[i].blob.major == fw_blobs[i - 1].blob.major)
continue;
+ }
-bad:
- drm_err(&i915->drm, "Invalid %s blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n",
- intel_uc_fw_type_repr(uc_fw->type),
- intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev,
- fw_blobs[i - 1].blob.legacy ? "L" : "v",
- fw_blobs[i - 1].blob.major,
- fw_blobs[i - 1].blob.minor,
- fw_blobs[i - 1].blob.patch,
- intel_platform_name(fw_blobs[i].p), fw_blobs[i].rev,
- fw_blobs[i].blob.legacy ? "L" : "v",
- fw_blobs[i].blob.major,
- fw_blobs[i].blob.minor,
- fw_blobs[i].blob.patch);
+ /* Major versions must be in order: */
+ if (fw_blobs[i].blob.major != fw_blobs[i - 1].blob.major)
+ goto bad;
- uc_fw->file_selected.path = NULL;
- }
+ /* Next minor version is good: */
+ if (fw_blobs[i].blob.minor < fw_blobs[i - 1].blob.minor)
+ continue;
+
+ /* Minor versions must be in order: */
+ if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor)
+ goto bad;
+
+ /* Patch versions must be in order and unique: */
+ if (fw_blobs[i].blob.patch < fw_blobs[i - 1].blob.patch)
+ continue;
+
+bad:
+ drm_err(&i915->drm, "Invalid %s blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n",
+ intel_uc_fw_type_repr(type),
+ intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev,
+ fw_blobs[i - 1].blob.legacy ? "L" : "v",
+ fw_blobs[i - 1].blob.major,
+ fw_blobs[i - 1].blob.minor,
+ fw_blobs[i - 1].blob.patch,
+ intel_platform_name(fw_blobs[i].p), fw_blobs[i].rev,
+ fw_blobs[i].blob.legacy ? "L" : "v",
+ fw_blobs[i].blob.major,
+ fw_blobs[i].blob.minor,
+ fw_blobs[i].blob.patch);
+ return false;
}
+
+ return true;
}
static const char *__override_guc_firmware_path(struct drm_i915_private *i915)
@@ -417,18 +487,32 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc
}
}
+void intel_uc_fw_version_from_gsc_manifest(struct intel_uc_fw_ver *ver,
+ const void *data)
+{
+ const struct intel_gsc_manifest_header *manifest = data;
+
+ ver->major = manifest->fw_version.major;
+ ver->minor = manifest->fw_version.minor;
+ ver->patch = manifest->fw_version.hotfix;
+ ver->build = manifest->fw_version.build;
+}
+
/**
* intel_uc_fw_init_early - initialize the uC object and select the firmware
* @uc_fw: uC firmware
* @type: type of uC
+ * @needs_ggtt_mapping: whether the FW needs to be GGTT mapped for loading
*
* Initialize the state of our uC object and relevant tracking and select the
* firmware to fetch and load.
*/
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
- enum intel_uc_fw_type type)
+ enum intel_uc_fw_type type,
+ bool needs_ggtt_mapping)
{
- struct drm_i915_private *i915 = ____uc_fw_to_gt(uc_fw, type)->i915;
+ struct intel_gt *gt = ____uc_fw_to_gt(uc_fw, type);
+ struct drm_i915_private *i915 = gt->i915;
/*
* we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
@@ -439,8 +523,15 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
GEM_BUG_ON(uc_fw->file_selected.path);
uc_fw->type = type;
+ uc_fw->needs_ggtt_mapping = needs_ggtt_mapping;
if (HAS_GT_UC(i915)) {
+ if (!validate_fw_table_type(i915, type)) {
+ gt->uc.fw_table_invalid = true;
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED);
+ return;
+ }
+
__uc_fw_auto_select(i915, uc_fw);
__uc_fw_user_override(i915, uc_fw);
}
@@ -488,20 +579,6 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e)
}
}
-static int check_gsc_manifest(const struct firmware *fw,
- struct intel_uc_fw *uc_fw)
-{
- u32 *dw = (u32 *)fw->data;
- u32 version_hi = dw[HUC_GSC_VERSION_HI_DW];
- u32 version_lo = dw[HUC_GSC_VERSION_LO_DW];
-
- uc_fw->file_selected.ver.major = FIELD_GET(HUC_GSC_MAJOR_VER_HI_MASK, version_hi);
- uc_fw->file_selected.ver.minor = FIELD_GET(HUC_GSC_MINOR_VER_HI_MASK, version_hi);
- uc_fw->file_selected.ver.patch = FIELD_GET(HUC_GSC_PATCH_VER_LO_MASK, version_lo);
-
- return 0;
-}
-
static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32 css_value)
{
/* Get version numbers from the CSS header */
@@ -558,32 +635,30 @@ static void guc_read_css_info(struct intel_uc_fw *uc_fw, struct uc_css_header *c
uc_fw->private_data_size = css->private_data_size;
}
-static int check_ccs_header(struct intel_gt *gt,
- const struct firmware *fw,
- struct intel_uc_fw *uc_fw)
+static int __check_ccs_header(struct intel_gt *gt,
+ const void *fw_data, size_t fw_size,
+ struct intel_uc_fw *uc_fw)
{
- struct drm_i915_private *i915 = gt->i915;
struct uc_css_header *css;
size_t size;
/* Check the size of the blob before examining buffer contents */
- if (unlikely(fw->size < sizeof(struct uc_css_header))) {
- drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- fw->size, sizeof(struct uc_css_header));
+ if (unlikely(fw_size < sizeof(struct uc_css_header))) {
+ gt_warn(gt, "%s firmware %s: invalid size: %zu < %zu\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
+ fw_size, sizeof(struct uc_css_header));
return -ENODATA;
}
- css = (struct uc_css_header *)fw->data;
+ css = (struct uc_css_header *)fw_data;
/* Check integrity of size values inside CSS header */
size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
css->exponent_size_dw) * sizeof(u32);
if (unlikely(size != sizeof(struct uc_css_header))) {
- drm_warn(&i915->drm,
- "%s firmware %s: unexpected header size: %zu != %zu\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- fw->size, sizeof(struct uc_css_header));
+ gt_warn(gt, "%s firmware %s: unexpected header size: %zu != %zu\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
+ fw_size, sizeof(struct uc_css_header));
return -EPROTO;
}
@@ -595,19 +670,19 @@ static int check_ccs_header(struct intel_gt *gt,
/* At least, it should have header, uCode and RSA. Size of all three. */
size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size;
- if (unlikely(fw->size < size)) {
- drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- fw->size, size);
+ if (unlikely(fw_size < size)) {
+ gt_warn(gt, "%s firmware %s: invalid size: %zu < %zu\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
+ fw_size, size);
return -ENOEXEC;
}
/* Sanity check whether this fw is not larger than whole WOPCM memory */
size = __intel_uc_fw_get_upload_size(uc_fw);
if (unlikely(size >= gt->wopcm.size)) {
- drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- size, (size_t)gt->wopcm.size);
+ gt_warn(gt, "%s firmware %s: invalid size: %zu > %zu\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
+ size, (size_t)gt->wopcm.size);
return -E2BIG;
}
@@ -619,14 +694,53 @@ static int check_ccs_header(struct intel_gt *gt,
return 0;
}
+static int check_gsc_manifest(struct intel_gt *gt,
+ const struct firmware *fw,
+ struct intel_uc_fw *uc_fw)
+{
+ int ret;
+
+ switch (uc_fw->type) {
+ case INTEL_UC_FW_TYPE_HUC:
+ ret = intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ if (ret)
+ return ret;
+ break;
+ case INTEL_UC_FW_TYPE_GSC:
+ ret = intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ if (ret)
+ return ret;
+ break;
+ default:
+ MISSING_CASE(uc_fw->type);
+ return -EINVAL;
+ }
+
+ if (uc_fw->dma_start_offset) {
+ u32 delta = uc_fw->dma_start_offset;
+
+ __check_ccs_header(gt, fw->data + delta, fw->size - delta, uc_fw);
+ }
+
+ return 0;
+}
+
+static int check_ccs_header(struct intel_gt *gt,
+ const struct firmware *fw,
+ struct intel_uc_fw *uc_fw)
+{
+ return __check_ccs_header(gt, fw->data, fw->size, uc_fw);
+}
+
static bool is_ver_8bit(struct intel_uc_fw_ver *ver)
{
return ver->major < 0xFF && ver->minor < 0xFF && ver->patch < 0xFF;
}
-static bool guc_check_version_range(struct intel_uc_fw *uc_fw)
+static int guc_check_version_range(struct intel_uc_fw *uc_fw)
{
struct intel_guc *guc = container_of(uc_fw, struct intel_guc, fw);
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
/*
* GuC version number components are defined as being 8-bits.
@@ -635,24 +749,24 @@ static bool guc_check_version_range(struct intel_uc_fw *uc_fw)
*/
if (!is_ver_8bit(&uc_fw->file_selected.ver)) {
- drm_warn(&__uc_fw_to_gt(uc_fw)->i915->drm, "%s firmware: invalid file version: 0x%02X:%02X:%02X\n",
- intel_uc_fw_type_repr(uc_fw->type),
- uc_fw->file_selected.ver.major,
- uc_fw->file_selected.ver.minor,
- uc_fw->file_selected.ver.patch);
- return false;
+ gt_warn(gt, "%s firmware: invalid file version: 0x%02X:%02X:%02X\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ uc_fw->file_selected.ver.major,
+ uc_fw->file_selected.ver.minor,
+ uc_fw->file_selected.ver.patch);
+ return -EINVAL;
}
if (!is_ver_8bit(&guc->submission_version)) {
- drm_warn(&__uc_fw_to_gt(uc_fw)->i915->drm, "%s firmware: invalid submit version: 0x%02X:%02X:%02X\n",
- intel_uc_fw_type_repr(uc_fw->type),
- guc->submission_version.major,
- guc->submission_version.minor,
- guc->submission_version.patch);
- return false;
+ gt_warn(gt, "%s firmware: invalid submit version: 0x%02X:%02X:%02X\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ guc->submission_version.major,
+ guc->submission_version.minor,
+ guc->submission_version.patch);
+ return -EINVAL;
}
- return true;
+ return i915_inject_probe_error(gt->i915, -EINVAL);
}
static int check_fw_header(struct intel_gt *gt,
@@ -661,12 +775,8 @@ static int check_fw_header(struct intel_gt *gt,
{
int err = 0;
- /* GSC FW version is queried after the FW is loaded */
- if (uc_fw->type == INTEL_UC_FW_TYPE_GSC)
- return 0;
-
- if (uc_fw->loaded_via_gsc)
- err = check_gsc_manifest(fw, uc_fw);
+ if (uc_fw->has_gsc_headers)
+ err = check_gsc_manifest(gt, fw, uc_fw);
else
err = check_ccs_header(gt, fw, uc_fw);
if (err)
@@ -686,11 +796,10 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **
if (err)
return err;
- if ((*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) {
- drm_err(&gt->i915->drm,
- "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- (*fw)->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K);
+ if (uc_fw->needs_ggtt_mapping && (*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) {
+ gt_err(gt, "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
+ (*fw)->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K);
/* try to find another blob to load */
release_firmware(*fw);
@@ -701,6 +810,80 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **
return 0;
}
+static int check_mtl_huc_guc_compatibility(struct intel_gt *gt,
+ struct intel_uc_fw_file *huc_selected)
+{
+ struct intel_uc_fw_file *guc_selected = &gt_to_guc(gt)->fw.file_selected;
+ struct intel_uc_fw_ver *huc_ver = &huc_selected->ver;
+ struct intel_uc_fw_ver *guc_ver = &guc_selected->ver;
+ bool new_huc, new_guc;
+
+ /* we can only do this check after having fetched both GuC and HuC */
+ GEM_BUG_ON(!huc_selected->path || !guc_selected->path);
+
+ /*
+ * Due to changes in the authentication flow for MTL, HuC 8.5.1 or newer
+ * requires GuC 70.7.0 or newer. Older HuC binaries will instead require
+ * GuC < 70.7.0.
+ */
+ new_huc = huc_ver->major > 8 ||
+ (huc_ver->major == 8 && huc_ver->minor > 5) ||
+ (huc_ver->major == 8 && huc_ver->minor == 5 && huc_ver->patch >= 1);
+
+ new_guc = guc_ver->major > 70 ||
+ (guc_ver->major == 70 && guc_ver->minor >= 7);
+
+ if (new_huc != new_guc) {
+ UNEXPECTED(gt, "HuC %u.%u.%u is incompatible with GuC %u.%u.%u\n",
+ huc_ver->major, huc_ver->minor, huc_ver->patch,
+ guc_ver->major, guc_ver->minor, guc_ver->patch);
+ gt_info(gt, "MTL GuC 70.7.0+ and HuC 8.5.1+ don't work with older releases\n");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+int intel_uc_check_file_version(struct intel_uc_fw *uc_fw, bool *old_ver)
+{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
+ struct intel_uc_fw_file *wanted = &uc_fw->file_wanted;
+ struct intel_uc_fw_file *selected = &uc_fw->file_selected;
+ int ret;
+
+ /*
+ * MTL has some compatibility issues with early GuC/HuC binaries
+ * not working with newer ones. This is specific to MTL and we
+ * don't expect it to extend to other platforms.
+ */
+ if (IS_METEORLAKE(gt->i915) && uc_fw->type == INTEL_UC_FW_TYPE_HUC) {
+ ret = check_mtl_huc_guc_compatibility(gt, selected);
+ if (ret)
+ return ret;
+ }
+
+ if (!wanted->ver.major || !selected->ver.major)
+ return 0;
+
+ /* Check the file's major version was as it claimed */
+ if (selected->ver.major != wanted->ver.major) {
+ UNEXPECTED(gt, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+ intel_uc_fw_type_repr(uc_fw->type), selected->path,
+ selected->ver.major, selected->ver.minor,
+ wanted->ver.major, wanted->ver.minor);
+ if (!intel_uc_fw_is_overridden(uc_fw))
+ return -ENOEXEC;
+ } else if (old_ver) {
+ if (selected->ver.minor < wanted->ver.minor)
+ *old_ver = true;
+ else if ((selected->ver.minor == wanted->ver.minor) &&
+ (selected->ver.patch < wanted->ver.patch))
+ *old_ver = true;
+ }
+
+ return 0;
+}
+
/**
* intel_uc_fw_fetch - fetch uC firmware
* @uc_fw: uC firmware
@@ -762,40 +945,32 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
if (err)
goto fail;
- if (uc_fw->type == INTEL_UC_FW_TYPE_GUC && !guc_check_version_range(uc_fw))
- goto fail;
-
- if (uc_fw->file_wanted.ver.major && uc_fw->file_selected.ver.major) {
- /* Check the file's major version was as it claimed */
- if (uc_fw->file_selected.ver.major != uc_fw->file_wanted.ver.major) {
- drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor,
- uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor);
- if (!intel_uc_fw_is_overridden(uc_fw)) {
- err = -ENOEXEC;
- goto fail;
- }
- } else {
- if (uc_fw->file_selected.ver.minor < uc_fw->file_wanted.ver.minor)
- old_ver = true;
- }
+ if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) {
+ err = guc_check_version_range(uc_fw);
+ if (err)
+ goto fail;
}
+ err = intel_uc_check_file_version(uc_fw, &old_ver);
+ if (err)
+ goto fail;
+
if (old_ver && uc_fw->file_selected.ver.major) {
/* Preserve the version that was really wanted */
memcpy(&uc_fw->file_wanted, &file_ideal, sizeof(uc_fw->file_wanted));
- drm_notice(&i915->drm,
- "%s firmware %s (%d.%d) is recommended, but only %s (%d.%d) was found\n",
+ UNEXPECTED(gt, "%s firmware %s (%d.%d.%d) is recommended, but only %s (%d.%d.%d) was found\n",
intel_uc_fw_type_repr(uc_fw->type),
uc_fw->file_wanted.path,
- uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor,
+ uc_fw->file_wanted.ver.major,
+ uc_fw->file_wanted.ver.minor,
+ uc_fw->file_wanted.ver.patch,
uc_fw->file_selected.path,
- uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor);
- drm_info(&i915->drm,
- "Consider updating your linux-firmware pkg or downloading from %s\n",
- INTEL_UC_FIRMWARE_URL);
+ uc_fw->file_selected.ver.major,
+ uc_fw->file_selected.ver.minor,
+ uc_fw->file_selected.ver.patch);
+ gt_info(gt, "Consider updating your linux-firmware pkg or downloading from %s\n",
+ INTEL_UC_FIRMWARE_URL);
}
if (HAS_LMEM(i915)) {
@@ -823,10 +998,10 @@ fail:
INTEL_UC_FIRMWARE_MISSING :
INTEL_UC_FIRMWARE_ERROR);
- i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, err);
- drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
- intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
+ gt_probe_error(gt, "%s firmware %s: fetch failed %pe\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, ERR_PTR(err));
+ gt_info(gt, "%s firmware(s) can be downloaded from %s\n",
+ intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
release_firmware(fw); /* OK even if fw is NULL */
return err;
@@ -864,35 +1039,46 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
{
struct drm_i915_gem_object *obj = uc_fw->obj;
struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
- struct i915_vma_resource *dummy = &uc_fw->dummy;
+ struct i915_vma_resource *vma_res = &uc_fw->vma_res;
u32 pte_flags = 0;
- dummy->start = uc_fw_ggtt_offset(uc_fw);
- dummy->node_size = obj->base.size;
- dummy->bi.pages = obj->mm.pages;
+ if (!uc_fw->needs_ggtt_mapping)
+ return;
+
+ vma_res->start = uc_fw_ggtt_offset(uc_fw);
+ vma_res->node_size = obj->base.size;
+ vma_res->bi.pages = obj->mm.pages;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
/* uc_fw->obj cache domains were not controlled across suspend */
if (i915_gem_object_has_struct_page(obj))
- drm_clflush_sg(dummy->bi.pages);
+ drm_clflush_sg(vma_res->bi.pages);
if (i915_gem_object_is_lmem(obj))
pte_flags |= PTE_LM;
if (ggtt->vm.raw_insert_entries)
- ggtt->vm.raw_insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags);
+ ggtt->vm.raw_insert_entries(&ggtt->vm, vma_res,
+ i915_gem_get_pat_index(ggtt->vm.i915,
+ I915_CACHE_NONE),
+ pte_flags);
else
- ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags);
+ ggtt->vm.insert_entries(&ggtt->vm, vma_res,
+ i915_gem_get_pat_index(ggtt->vm.i915,
+ I915_CACHE_NONE),
+ pte_flags);
}
static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
{
- struct drm_i915_gem_object *obj = uc_fw->obj;
struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
- u64 start = uc_fw_ggtt_offset(uc_fw);
+ struct i915_vma_resource *vma_res = &uc_fw->vma_res;
+
+ if (!vma_res->node_size)
+ return;
- ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size);
+ ggtt->vm.clear_range(&ggtt->vm, vma_res->start, vma_res->node_size);
}
static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
@@ -909,7 +1095,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
/* Set the source address for the uCode */
- offset = uc_fw_ggtt_offset(uc_fw);
+ offset = uc_fw->vma_res.start + uc_fw->dma_start_offset;
GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000);
intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset));
intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset));
@@ -930,11 +1116,11 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
_MASKED_BIT_ENABLE(dma_flags | START_DMA));
/* Wait for DMA to finish */
- ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100);
+ ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100, NULL);
if (ret)
- drm_err(&gt->i915->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
- intel_uc_fw_type_repr(uc_fw->type),
- intel_uncore_read_fw(uncore, DMA_CTRL));
+ gt_err(gt, "DMA for %s fw failed, DMA_CTRL=%u\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ intel_uncore_read_fw(uncore, DMA_CTRL));
/* Disable the bits once DMA is over */
intel_uncore_write_fw(uncore, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
@@ -950,9 +1136,8 @@ int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err)
GEM_BUG_ON(!intel_uc_fw_is_loadable(uc_fw));
- i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- err);
+ gt_probe_error(gt, "Failed to load %s firmware %s %pe\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, ERR_PTR(err));
intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return err;
@@ -984,9 +1169,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
return -ENOEXEC;
/* Call custom loader */
- uc_fw_bind_ggtt(uc_fw);
err = uc_fw_xfer(uc_fw, dst_offset, dma_flags);
- uc_fw_unbind_ggtt(uc_fw);
if (err)
goto fail;
@@ -1032,12 +1215,12 @@ static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw)
* since its GGTT offset will be GuC accessible.
*/
GEM_BUG_ON(uc_fw->rsa_size > PAGE_SIZE);
- vma = intel_guc_allocate_vma(&gt->uc.guc, PAGE_SIZE);
+ vma = intel_guc_allocate_vma(gt_to_guc(gt), PAGE_SIZE);
if (IS_ERR(vma))
return PTR_ERR(vma);
vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
- i915_coherent_map_type(gt->i915, vma->obj, true));
+ intel_gt_coherent_map_type(gt, vma->obj, true));
if (IS_ERR(vaddr)) {
i915_vma_unpin_and_release(&vma, 0);
err = PTR_ERR(vaddr);
@@ -1078,18 +1261,20 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
err = i915_gem_object_pin_pages_unlocked(uc_fw->obj);
if (err) {
- DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n",
- intel_uc_fw_type_repr(uc_fw->type), err);
+ gt_dbg(__uc_fw_to_gt(uc_fw), "%s fw pin-pages failed %pe\n",
+ intel_uc_fw_type_repr(uc_fw->type), ERR_PTR(err));
goto out;
}
err = uc_fw_rsa_data_create(uc_fw);
if (err) {
- DRM_DEBUG_DRIVER("%s fw rsa data creation failed, err=%d\n",
- intel_uc_fw_type_repr(uc_fw->type), err);
+ gt_dbg(__uc_fw_to_gt(uc_fw), "%s fw rsa data creation failed %pe\n",
+ intel_uc_fw_type_repr(uc_fw->type), ERR_PTR(err));
goto out_unpin;
}
+ uc_fw_bind_ggtt(uc_fw);
+
return 0;
out_unpin:
@@ -1100,6 +1285,7 @@ out:
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
{
+ uc_fw_unbind_ggtt(uc_fw);
uc_fw_rsa_data_destroy(uc_fw);
if (i915_gem_object_has_pinned_pages(uc_fw->obj))
@@ -1108,6 +1294,17 @@ void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
}
+void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw)
+{
+ if (!intel_uc_fw_is_available(uc_fw))
+ return;
+
+ if (!i915_gem_object_has_pinned_pages(uc_fw->obj))
+ return;
+
+ uc_fw_bind_ggtt(uc_fw);
+}
+
/**
* intel_uc_fw_cleanup_fetch - cleanup uC firmware
* @uc_fw: uC firmware
@@ -1137,7 +1334,7 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
{
struct intel_memory_region *mr = uc_fw->obj->mm.region;
u32 size = min_t(u32, uc_fw->rsa_size, max_len);
- u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size;
+ u32 offset = uc_fw->dma_start_offset + sizeof(struct uc_css_header) + uc_fw->ucode_size;
struct sgt_iter iter;
size_t count = 0;
int idx;
@@ -1152,16 +1349,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
u32 len = min_t(u32, size, PAGE_SIZE - offset);
- void *vaddr;
if (idx > 0) {
idx--;
continue;
}
- vaddr = kmap_atomic(page);
- memcpy(dst, vaddr + offset, len);
- kunmap_atomic(vaddr);
+ memcpy_from_page(dst, page, offset, len);
offset = 0;
dst += len;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 6ba00e6b3975..ac7b3aad2222 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -70,6 +70,7 @@ struct intel_uc_fw_ver {
u32 major;
u32 minor;
u32 patch;
+ u32 build;
};
/*
@@ -99,20 +100,28 @@ struct intel_uc_fw {
struct drm_i915_gem_object *obj;
/**
- * @dummy: A vma used in binding the uc fw to ggtt. We can't define this
- * vma on the stack as it can lead to a stack overflow, so we define it
- * here. Safe to have 1 copy per uc fw because the binding is single
- * threaded as it done during driver load (inherently single threaded)
- * or during a GT reset (mutex guarantees single threaded).
+ * @needs_ggtt_mapping: indicates whether the fw object needs to be
+ * pinned to ggtt. If true, the fw is pinned at init time and unpinned
+ * during driver unload.
*/
- struct i915_vma_resource dummy;
+ bool needs_ggtt_mapping;
+
+ /**
+ * @vma_res: A vma resource used in binding the uc fw to ggtt. The fw is
+ * pinned in a reserved area of the ggtt (above the maximum address
+ * usable by GuC); therefore, we can't use the normal vma functions to
+ * do the pinning and we instead use this resource to do so.
+ */
+ struct i915_vma_resource vma_res;
struct i915_vma *rsa_data;
u32 rsa_size;
u32 ucode_size;
u32 private_data_size;
- bool loaded_via_gsc;
+ u32 dma_start_offset;
+
+ bool has_gsc_headers;
};
/*
@@ -249,6 +258,11 @@ static inline bool intel_uc_fw_is_running(struct intel_uc_fw *uc_fw)
return __intel_uc_fw_status(uc_fw) == INTEL_UC_FIRMWARE_RUNNING;
}
+static inline bool intel_uc_fw_is_in_error(struct intel_uc_fw *uc_fw)
+{
+ return intel_uc_fw_status_to_error(__intel_uc_fw_status(uc_fw)) != 0;
+}
+
static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw)
{
return uc_fw->user_overridden;
@@ -281,13 +295,18 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
return __intel_uc_fw_get_upload_size(uc_fw);
}
+void intel_uc_fw_version_from_gsc_manifest(struct intel_uc_fw_ver *ver,
+ const void *data);
+int intel_uc_check_file_version(struct intel_uc_fw *uc_fw, bool *old_ver);
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
- enum intel_uc_fw_type type);
+ enum intel_uc_fw_type type,
+ bool needs_ggtt_mapping);
int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
+void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw);
size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len);
int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err);
void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
index 646fa8aa6cf1..7fe405126249 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -84,10 +84,4 @@ struct uc_css_header {
} __packed;
static_assert(sizeof(struct uc_css_header) == 128);
-#define HUC_GSC_VERSION_HI_DW 44
-#define HUC_GSC_MAJOR_VER_HI_MASK (0xFF << 0)
-#define HUC_GSC_MINOR_VER_HI_MASK (0xFF << 16)
-#define HUC_GSC_VERSION_LO_DW 45
-#define HUC_GSC_PATCH_VER_LO_MASK (0xFF << 0)
-
#endif /* _INTEL_UC_FW_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index e28518fe8b90..68feb55654f7 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -3,6 +3,8 @@
* Copyright �� 2021 Intel Corporation
*/
+#include "gt/intel_gt_print.h"
+#include "intel_guc_print.h"
#include "selftests/igt_spinner.h"
#include "selftests/intel_scheduler_helpers.h"
@@ -65,7 +67,7 @@ static int intel_guc_scrub_ctbs(void *arg)
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
ret = PTR_ERR(ce);
- drm_err(&gt->i915->drm, "Failed to create context, %d: %d\n", i, ret);
+ gt_err(gt, "Failed to create context %d: %pe\n", i, ce);
goto err;
}
@@ -86,7 +88,7 @@ static int intel_guc_scrub_ctbs(void *arg)
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
- drm_err(&gt->i915->drm, "Failed to create request, %d: %d\n", i, ret);
+ gt_err(gt, "Failed to create request %d: %pe\n", i, rq);
goto err;
}
@@ -96,7 +98,7 @@ static int intel_guc_scrub_ctbs(void *arg)
for (i = 0; i < 3; ++i) {
ret = i915_request_wait(last[i], 0, HZ);
if (ret < 0) {
- drm_err(&gt->i915->drm, "Last request failed to complete: %d\n", ret);
+ gt_err(gt, "Last request failed to complete: %pe\n", ERR_PTR(ret));
goto err;
}
i915_request_put(last[i]);
@@ -113,7 +115,7 @@ static int intel_guc_scrub_ctbs(void *arg)
/* GT will not idle if G2H are lost */
ret = intel_gt_wait_for_idle(gt, HZ);
if (ret < 0) {
- drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
+ gt_err(gt, "GT failed to idle: %pe\n", ERR_PTR(ret));
goto err;
}
@@ -142,7 +144,7 @@ err:
static int intel_guc_steal_guc_ids(void *arg)
{
struct intel_gt *gt = arg;
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
int ret, sv, context_index = 0;
intel_wakeref_t wakeref;
struct intel_engine_cs *engine;
@@ -153,7 +155,7 @@ static int intel_guc_steal_guc_ids(void *arg)
ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL);
if (!ce) {
- drm_err(&gt->i915->drm, "Context array allocation failed\n");
+ guc_err(guc, "Context array allocation failed\n");
return -ENOMEM;
}
@@ -166,25 +168,25 @@ static int intel_guc_steal_guc_ids(void *arg)
ce[context_index] = intel_context_create(engine);
if (IS_ERR(ce[context_index])) {
ret = PTR_ERR(ce[context_index]);
+ guc_err(guc, "Failed to create context: %pe\n", ce[context_index]);
ce[context_index] = NULL;
- drm_err(&gt->i915->drm, "Failed to create context: %d\n", ret);
goto err_wakeref;
}
ret = igt_spinner_init(&spin, engine->gt);
if (ret) {
- drm_err(&gt->i915->drm, "Failed to create spinner: %d\n", ret);
+ guc_err(guc, "Failed to create spinner: %pe\n", ERR_PTR(ret));
goto err_contexts;
}
spin_rq = igt_spinner_create_request(&spin, ce[context_index],
MI_ARB_CHECK);
if (IS_ERR(spin_rq)) {
ret = PTR_ERR(spin_rq);
- drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
+ guc_err(guc, "Failed to create spinner request: %pe\n", spin_rq);
goto err_contexts;
}
ret = request_add_spin(spin_rq, &spin);
if (ret) {
- drm_err(&gt->i915->drm, "Failed to add Spinner request: %d\n", ret);
+ guc_err(guc, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
goto err_spin_rq;
}
@@ -192,9 +194,9 @@ static int intel_guc_steal_guc_ids(void *arg)
while (ret != -EAGAIN) {
ce[++context_index] = intel_context_create(engine);
if (IS_ERR(ce[context_index])) {
- ret = PTR_ERR(ce[context_index--]);
- ce[context_index] = NULL;
- drm_err(&gt->i915->drm, "Failed to create context: %d\n", ret);
+ ret = PTR_ERR(ce[context_index]);
+ guc_err(guc, "Failed to create context: %pe\n", ce[context_index]);
+ ce[context_index--] = NULL;
goto err_spin_rq;
}
@@ -202,9 +204,9 @@ static int intel_guc_steal_guc_ids(void *arg)
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
rq = NULL;
- if (ret != -EAGAIN) {
- drm_err(&gt->i915->drm, "Failed to create request, %d: %d\n",
- context_index, ret);
+ if ((ret != -EAGAIN) || !last) {
+ guc_err(guc, "Failed to create %srequest %d: %pe\n",
+ last ? "" : "first ", context_index, ERR_PTR(ret));
goto err_spin_rq;
}
} else {
@@ -218,7 +220,7 @@ static int intel_guc_steal_guc_ids(void *arg)
igt_spinner_end(&spin);
ret = intel_selftest_wait_for_rq(spin_rq);
if (ret) {
- drm_err(&gt->i915->drm, "Spin request failed to complete: %d\n", ret);
+ guc_err(guc, "Spin request failed to complete: %pe\n", ERR_PTR(ret));
i915_request_put(last);
goto err_spin_rq;
}
@@ -230,7 +232,7 @@ static int intel_guc_steal_guc_ids(void *arg)
ret = i915_request_wait(last, 0, HZ * 30);
i915_request_put(last);
if (ret < 0) {
- drm_err(&gt->i915->drm, "Last request failed to complete: %d\n", ret);
+ guc_err(guc, "Last request failed to complete: %pe\n", ERR_PTR(ret));
goto err_spin_rq;
}
@@ -238,7 +240,7 @@ static int intel_guc_steal_guc_ids(void *arg)
rq = nop_user_request(ce[context_index], NULL);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
- drm_err(&gt->i915->drm, "Failed to steal guc_id, %d: %d\n", context_index, ret);
+ guc_err(guc, "Failed to steal guc_id %d: %pe\n", context_index, rq);
goto err_spin_rq;
}
@@ -246,20 +248,20 @@ static int intel_guc_steal_guc_ids(void *arg)
ret = i915_request_wait(rq, 0, HZ);
i915_request_put(rq);
if (ret < 0) {
- drm_err(&gt->i915->drm, "Request with stolen guc_id failed to complete: %d\n", ret);
+ guc_err(guc, "Request with stolen guc_id failed to complete: %pe\n", ERR_PTR(ret));
goto err_spin_rq;
}
/* Wait for idle */
ret = intel_gt_wait_for_idle(gt, HZ * 30);
if (ret < 0) {
- drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
+ guc_err(guc, "GT failed to idle: %pe\n", ERR_PTR(ret));
goto err_spin_rq;
}
/* Verify a guc_id was stolen */
if (guc->number_guc_id_stolen == number_guc_id_stolen) {
- drm_err(&gt->i915->drm, "No guc_id was stolen");
+ guc_err(guc, "No guc_id was stolen");
ret = -EINVAL;
} else {
ret = 0;
@@ -284,11 +286,126 @@ err_wakeref:
return ret;
}
+/*
+ * Send a context schedule H2G message with an invalid context id.
+ * This should generate a GUC_RESULT_INVALID_CONTEXT response.
+ */
+static int bad_h2g(struct intel_guc *guc)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_SCHED_CONTEXT,
+ 0x12345678,
+ };
+
+ return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0);
+}
+
+/*
+ * Set a spinner running to make sure the system is alive and active,
+ * then send a bad but asynchronous H2G command and wait to see if an
+ * error response is returned. If no response is received or if the
+ * spinner dies then the test will fail.
+ */
+#define FAST_RESPONSE_TIMEOUT_MS 1000
+static int intel_guc_fast_request(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_context *ce;
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ intel_wakeref_t wakeref;
+ struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
+ bool spinning = false;
+ int ret = 0;
+
+ if (!engine)
+ return 0;
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ ret = PTR_ERR(ce);
+ gt_err(gt, "Failed to create spinner request: %pe\n", ce);
+ goto err_pm;
+ }
+
+ ret = igt_spinner_init(&spin, engine->gt);
+ if (ret) {
+ gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
+ goto err_pm;
+ }
+ spinning = true;
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ gt_err(gt, "Failed to create spinner request: %pe\n", rq);
+ goto err_spin;
+ }
+
+ ret = request_add_spin(rq, &spin);
+ if (ret) {
+ gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
+ goto err_rq;
+ }
+
+ gt->uc.guc.fast_response_selftest = 1;
+
+ ret = bad_h2g(&gt->uc.guc);
+ if (ret) {
+ gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret));
+ goto err_rq;
+ }
+
+ ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq),
+ FAST_RESPONSE_TIMEOUT_MS);
+ if (ret) {
+ gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret));
+ goto err_rq;
+ }
+
+ if (i915_request_completed(rq)) {
+ gt_err(gt, "Spinner died waiting for fast request error!\n");
+ ret = -EIO;
+ goto err_rq;
+ }
+
+ if (gt->uc.guc.fast_response_selftest != 2) {
+ gt_err(gt, "Unexpected fast response count: %d\n",
+ gt->uc.guc.fast_response_selftest);
+ goto err_rq;
+ }
+
+ igt_spinner_end(&spin);
+ spinning = false;
+
+ ret = intel_selftest_wait_for_rq(rq);
+ if (ret) {
+ gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
+ goto err_rq;
+ }
+
+err_rq:
+ i915_request_put(rq);
+
+err_spin:
+ if (spinning)
+ igt_spinner_end(&spin);
+ igt_spinner_fini(&spin);
+
+err_pm:
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ return ret;
+}
+
int intel_guc_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(intel_guc_scrub_ctbs),
SUBTEST(intel_guc_steal_guc_ids),
+ SUBTEST(intel_guc_fast_request),
};
struct intel_gt *gt = to_gt(i915);
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
index d91b58f70403..83801c992488 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
@@ -3,6 +3,7 @@
* Copyright © 2022 Intel Corporation
*/
+#include "gt/intel_gt_print.h"
#include "selftests/igt_spinner.h"
#include "selftests/igt_reset.h"
#include "selftests/intel_scheduler_helpers.h"
@@ -45,7 +46,7 @@ static int intel_hang_guc(void *arg)
ctx = kernel_context(gt->i915, NULL);
if (IS_ERR(ctx)) {
- drm_err(&gt->i915->drm, "Failed get kernel context: %ld\n", PTR_ERR(ctx));
+ gt_err(gt, "Failed get kernel context: %pe\n", ctx);
return PTR_ERR(ctx);
}
@@ -54,7 +55,7 @@ static int intel_hang_guc(void *arg)
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
ret = PTR_ERR(ce);
- drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
+ gt_err(gt, "Failed to create spinner request: %pe\n", ce);
goto err;
}
@@ -63,42 +64,42 @@ static int intel_hang_guc(void *arg)
old_beat = engine->props.heartbeat_interval_ms;
ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL);
if (ret) {
- drm_err(&gt->i915->drm, "Failed to boost heatbeat interval: %d\n", ret);
+ gt_err(gt, "Failed to boost heartbeat interval: %pe\n", ERR_PTR(ret));
goto err;
}
ret = igt_spinner_init(&spin, engine->gt);
if (ret) {
- drm_err(&gt->i915->drm, "Failed to create spinner: %d\n", ret);
+ gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
goto err;
}
- rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
intel_context_put(ce);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
- drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
+ gt_err(gt, "Failed to create spinner request: %pe\n", rq);
goto err_spin;
}
ret = request_add_spin(rq, &spin);
if (ret) {
i915_request_put(rq);
- drm_err(&gt->i915->drm, "Failed to add Spinner request: %d\n", ret);
+ gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
goto err_spin;
}
ret = intel_reset_guc(gt);
if (ret) {
i915_request_put(rq);
- drm_err(&gt->i915->drm, "Failed to reset GuC, ret = %d\n", ret);
+ gt_err(gt, "Failed to reset GuC: %pe\n", ERR_PTR(ret));
goto err_spin;
}
guc_status = intel_uncore_read(gt->uncore, GUC_STATUS);
if (!(guc_status & GS_MIA_IN_RESET)) {
i915_request_put(rq);
- drm_err(&gt->i915->drm, "GuC failed to reset: status = 0x%08X\n", guc_status);
+ gt_err(gt, "Failed to reset GuC: status = 0x%08X\n", guc_status);
ret = -EIO;
goto err_spin;
}
@@ -107,12 +108,12 @@ static int intel_hang_guc(void *arg)
ret = intel_selftest_wait_for_rq(rq);
i915_request_put(rq);
if (ret) {
- drm_err(&gt->i915->drm, "Request failed to complete: %d\n", ret);
+ gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
goto err_spin;
}
if (i915_reset_count(global) == reset_count) {
- drm_err(&gt->i915->drm, "Failed to record a GPU reset\n");
+ gt_err(gt, "Failed to record a GPU reset\n");
ret = -EINVAL;
goto err_spin;
}
@@ -132,7 +133,7 @@ err_spin:
ret = intel_selftest_wait_for_rq(rq);
i915_request_put(rq);
if (ret) {
- drm_err(&gt->i915->drm, "No-op failed to complete: %d\n", ret);
+ gt_err(gt, "No-op failed to complete: %pe\n", ERR_PTR(ret));
goto err;
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
index d17982c36d25..a40e7c32e613 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@@ -3,6 +3,7 @@
* Copyright �� 2019 Intel Corporation
*/
+#include "gt/intel_gt_print.h"
#include "selftests/igt_spinner.h"
#include "selftests/igt_reset.h"
#include "selftests/intel_scheduler_helpers.h"
@@ -115,30 +116,30 @@ static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class)
parent = multi_lrc_create_parent(gt, class, 0);
if (IS_ERR(parent)) {
- drm_err(&gt->i915->drm, "Failed creating contexts: %ld", PTR_ERR(parent));
+ gt_err(gt, "Failed creating contexts: %pe\n", parent);
return PTR_ERR(parent);
} else if (!parent) {
- drm_dbg(&gt->i915->drm, "Not enough engines in class: %d", class);
+ gt_dbg(gt, "Not enough engines in class: %d\n", class);
return 0;
}
rq = multi_lrc_nop_request(parent);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
- drm_err(&gt->i915->drm, "Failed creating requests: %d", ret);
+ gt_err(gt, "Failed creating requests: %pe\n", rq);
goto out;
}
ret = intel_selftest_wait_for_rq(rq);
if (ret)
- drm_err(&gt->i915->drm, "Failed waiting on request: %d", ret);
+ gt_err(gt, "Failed waiting on request: %pe\n", ERR_PTR(ret));
i915_request_put(rq);
if (ret >= 0) {
ret = intel_gt_wait_for_idle(gt, HZ * 5);
if (ret < 0)
- drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
+ gt_err(gt, "GT failed to idle: %pe\n", ERR_PTR(ret));
}
out: