summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c140
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c17
5 files changed, 117 insertions, 79 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 23857cc08eca..2702ad4c26c8 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
+static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine)
{
- u32 gsi_offset = gt->uncore->gsi_offset;
+ switch (engine->id) {
+ case RCS0:
+ return GEN12_CCS_AUX_INV;
+ case BCS0:
+ return GEN12_BCS0_AUX_INV;
+ case VCS0:
+ return GEN12_VD0_AUX_INV;
+ case VCS2:
+ return GEN12_VD2_AUX_INV;
+ case VECS0:
+ return GEN12_VE0_AUX_INV;
+ case CCS0:
+ return GEN12_CCS0_AUX_INV;
+ default:
+ return INVALID_MMIO_REG;
+ }
+}
+
+static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
+{
+ i915_reg_t reg = gen12_get_aux_inv_reg(engine);
+
+ if (IS_PONTEVECCHIO(engine->i915))
+ return false;
+
+ /*
+ * So far platforms supported by i915 having flat ccs do not require
+ * AUX invalidation. Check also whether the engine requires it.
+ */
+ return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915);
+}
+
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
+{
+ i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine);
+ u32 gsi_offset = engine->gt->uncore->gsi_offset;
+
+ if (!gen12_needs_ccs_aux_inv(engine))
+ return cs;
*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
*cs++ = AUX_INV;
- *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+ MI_SEMAPHORE_REGISTER_POLL |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
+ *cs++ = 0;
+ *cs++ = 0;
return cs;
}
@@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
struct intel_engine_cs *engine = rq->engine;
- if (mode & EMIT_FLUSH) {
- u32 flags = 0;
+ /*
+ * On Aux CCS platforms the invalidation of the Aux
+ * table requires quiescing memory traffic beforehand
+ */
+ if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
+ u32 bit_group_0 = 0;
+ u32 bit_group_1 = 0;
int err;
u32 *cs;
@@ -211,32 +262,40 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (err)
return err;
- flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
- flags |= PIPE_CONTROL_FLUSH_L3;
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+ /*
+ * When required, in MTL and beyond platforms we
+ * need to set the CCS_FLUSH bit in the pipe control
+ */
+ if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
+ bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+
+ bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+ bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl,adl-p */
- flags |= PIPE_CONTROL_DEPTH_STALL;
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
+ bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_STORE_DATA_INDEX;
- flags |= PIPE_CONTROL_QW_WRITE;
+ bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX;
+ bit_group_1 |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_CS_STALL;
+ bit_group_1 |= PIPE_CONTROL_CS_STALL;
if (!HAS_3D_PIPELINE(engine->i915))
- flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (engine->class == COMPUTE_CLASS)
- flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen12_emit_pipe_control(cs,
- PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
- flags, LRC_PPHWSP_SCRATCH_ADDR);
+ cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1,
+ LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
@@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
else if (engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
- if (!HAS_FLAT_CCS(rq->engine->i915))
- count = 8 + 4;
- else
- count = 8;
+ count = 8;
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ count += 8;
cs = intel_ring_begin(rq, count);
if (IS_ERR(cs))
@@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
- if (!HAS_FLAT_CCS(rq->engine->i915)) {
- /* hsdes: 1809175790 */
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(engine, cs);
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
@@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
- intel_engine_mask_t aux_inv = 0;
- u32 cmd, *cs;
+ u32 cmd = 4;
+ u32 *cs;
- cmd = 4;
if (mode & EMIT_INVALIDATE) {
cmd += 2;
- if (!HAS_FLAT_CCS(rq->engine->i915) &&
- (rq->engine->class == VIDEO_DECODE_CLASS ||
- rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
- aux_inv = rq->engine->mask &
- ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
- if (aux_inv)
- cmd += 4;
- }
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ cmd += 8;
}
cs = intel_ring_begin(rq, cmd);
@@ -338,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
+
+ if (gen12_needs_ccs_aux_inv(rq->engine) &&
+ rq->engine->class == COPY_ENGINE_CLASS)
+ cmd |= MI_FLUSH_DW_CCS;
}
*cs++ = cmd;
@@ -345,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
- if (aux_inv) { /* hsdes: 1809175790 */
- if (rq->engine->class == VIDEO_DECODE_CLASS)
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VD0_AUX_NV);
- else
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VE0_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(rq->engine, cs);
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
index 655e5c00ddc2..867ba697aceb 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -13,6 +13,7 @@
#include "intel_gt_regs.h"
#include "intel_gpu_commands.h"
+struct intel_engine_cs;
struct intel_gt;
struct i915_request;
@@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
static inline u32 *
-__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+__gen8_emit_pipe_control(u32 *batch, u32 bit_group_0,
+ u32 bit_group_1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
- batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
- batch[1] = flags1;
+ batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
+ batch[1] = bit_group_1;
batch[2] = offset;
return batch + 6;
}
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+static inline u32 *gen8_emit_pipe_control(u32 *batch,
+ u32 bit_group_1, u32 offset)
{
- return __gen8_emit_pipe_control(batch, 0, flags, offset);
+ return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset);
}
-static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0,
+ u32 bit_group_1, u32 offset)
{
- return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
+ return __gen8_emit_pipe_control(batch, bit_group_0,
+ bit_group_1, offset);
}
static inline u32 *
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 5d143e2a8db0..2bd8d98d2110 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -121,6 +121,7 @@
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
+#define MI_SEMAPHORE_REGISTER_POLL (1 << 16)
#define MI_SEMAPHORE_POLL (1 << 15)
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@@ -299,6 +300,7 @@
#define PIPE_CONTROL_QW_WRITE (1<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
+#define PIPE_CONTROL_CCS_FLUSH (1<<13) /* MTL+ */
#define PIPE_CONTROL_WRITE_FLUSH (1<<12)
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 718cb2c80f79..2cdfb2f713d0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -332,9 +332,11 @@
#define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4)
#define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4)
#define BSD_HWS_PGA_GEN7 _MMIO(0x4180)
-#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208)
-#define GEN12_VD0_AUX_NV _MMIO(0x4218)
-#define GEN12_VD1_AUX_NV _MMIO(0x4228)
+
+#define GEN12_CCS_AUX_INV _MMIO(0x4208)
+#define GEN12_VD0_AUX_INV _MMIO(0x4218)
+#define GEN12_VE0_AUX_INV _MMIO(0x4238)
+#define GEN12_BCS0_AUX_INV _MMIO(0x4248)
#define GEN8_RTCR _MMIO(0x4260)
#define GEN8_M1TCR _MMIO(0x4264)
@@ -342,14 +344,12 @@
#define GEN8_BTCR _MMIO(0x426c)
#define GEN8_VTCR _MMIO(0x4270)
-#define GEN12_VD2_AUX_NV _MMIO(0x4298)
-#define GEN12_VD3_AUX_NV _MMIO(0x42a8)
-#define GEN12_VE0_AUX_NV _MMIO(0x4238)
-
#define BLT_HWS_PGA_GEN7 _MMIO(0x4280)
-#define GEN12_VE1_AUX_NV _MMIO(0x42b8)
+#define GEN12_VD2_AUX_INV _MMIO(0x4298)
+#define GEN12_CCS0_AUX_INV _MMIO(0x42c8)
#define AUX_INV REG_BIT(0)
+
#define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380)
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a4ec20aaafe2..9477c2422321 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1364,10 +1364,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
IS_DG2_G11(ce->engine->i915))
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
- /* hsdes: 1809175790 */
- if (!HAS_FLAT_CCS(ce->engine->i915))
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
+ cs = gen12_emit_aux_table_inv(ce->engine, cs);
/* Wa_16014892111 */
if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
@@ -1392,17 +1389,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
0);
- /* hsdes: 1809175790 */
- if (!HAS_FLAT_CCS(ce->engine->i915)) {
- if (ce->engine->class == VIDEO_DECODE_CLASS)
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VD0_AUX_NV);
- else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VE0_AUX_NV);
- }
-
- return cs;
+ return gen12_emit_aux_table_inv(ce->engine, cs);
}
static void