diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/gen8_engine_cs.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 391 |
1 files changed, 306 insertions, 85 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 94e0a5669f90..071c1cc45257 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -3,10 +3,13 @@ * Copyright © 2014 Intel Corporation */ +#include <drm/drm_print.h> + #include "gen8_engine_cs.h" -#include "i915_drv.h" -#include "intel_lrc.h" +#include "intel_engine_regs.h" #include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_lrc.h" #include "intel_ring.h" int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) @@ -38,11 +41,11 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL * pipe control. */ - if (GRAPHICS_VER(rq->engine->i915) == 9) + if (GRAPHICS_VER(rq->i915) == 9) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_B0)) + if (IS_KABYLAKE(rq->i915) && IS_GRAPHICS_STEP(rq->i915, 0, STEP_C0)) dc_flush_wa = true; } @@ -164,73 +167,155 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } -static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) +static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine) { - static const i915_reg_t vd[] = { - GEN12_VD0_AUX_NV, - GEN12_VD1_AUX_NV, - GEN12_VD2_AUX_NV, - GEN12_VD3_AUX_NV, - }; - - static const i915_reg_t ve[] = { - GEN12_VE0_AUX_NV, - GEN12_VE1_AUX_NV, - }; - - if (engine->class == VIDEO_DECODE_CLASS) - return vd[engine->instance]; + switch (engine->id) { + case RCS0: + return GEN12_CCS_AUX_INV; + case BCS0: + return GEN12_BCS0_AUX_INV; + case VCS0: + return GEN12_VD0_AUX_INV; + case VCS2: + return GEN12_VD2_AUX_INV; + case VECS0: + return GEN12_VE0_AUX_INV; + case CCS0: + return GEN12_CCS0_AUX_INV; + default: + return INVALID_MMIO_REG; + } +} - if (engine->class == VIDEO_ENHANCEMENT_CLASS) - return ve[engine->instance]; +static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine) +{ + i915_reg_t reg = gen12_get_aux_inv_reg(engine); - GEM_BUG_ON("unknown aux_inv reg\n"); - return INVALID_MMIO_REG; + /* + * So far platforms supported by i915 having flat ccs do not require + * AUX invalidation. Check also whether the engine requires it. + */ + return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915); } -static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) +u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) { - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(inv_reg); + i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine); + u32 gsi_offset = engine->gt->uncore->gsi_offset; + + if (!gen12_needs_ccs_aux_inv(engine)) + return cs; + + *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; + *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; *cs++ = AUX_INV; - *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_REGISTER_POLL | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; + *cs++ = 0; + *cs++ = 0; return cs; } +static int mtl_dummy_pipe_control(struct i915_request *rq) +{ + /* Wa_14016712196 */ + if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_DG2(rq->i915)) { + u32 *cs; + + /* dummy PIPE_CONTROL + depth flush */ + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + cs = gen12_emit_pipe_control(cs, + 0, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, + LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(rq, cs); + } + + return 0; +} + int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { - if (mode & EMIT_FLUSH) { - u32 flags = 0; + struct intel_engine_cs *engine = rq->engine; + + /* + * On Aux CCS platforms the invalidation of the Aux + * table requires quiescing memory traffic beforehand + */ + if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) { + u32 bit_group_0 = 0; + u32 bit_group_1 = 0; + int err; u32 *cs; - flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; - flags |= PIPE_CONTROL_FLUSH_L3; - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - /* Wa_1409600907:tgl */ - flags |= PIPE_CONTROL_DEPTH_STALL; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; + err = mtl_dummy_pipe_control(rq); + if (err) + return err; - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - flags |= PIPE_CONTROL_QW_WRITE; + bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH; - flags |= PIPE_CONTROL_CS_STALL; + /* + * When required, in MTL and beyond platforms we + * need to set the CCS_FLUSH bit in the pipe control + */ + if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) + bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; + + /* + * L3 fabric flush is needed for AUX CCS invalidation + * which happens as part of pipe-control so we can + * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3 + * deals with Protected Memory which is not needed for + * AUX CCS invalidation and lead to unwanted side effects. + */ + if ((mode & EMIT_FLUSH) && + GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70)) + bit_group_1 |= PIPE_CONTROL_FLUSH_L3; + + bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; + bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* Wa_1409600907:tgl,adl-p */ + bit_group_1 |= PIPE_CONTROL_DEPTH_STALL; + bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE; + bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE; + + bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX; + bit_group_1 |= PIPE_CONTROL_QW_WRITE; + + bit_group_1 |= PIPE_CONTROL_CS_STALL; + + if (!HAS_3D_PIPELINE(engine->i915)) + bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + else if (engine->class == COMPUTE_CLASS) + bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen12_emit_pipe_control(cs, - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - flags, LRC_PPHWSP_SCRATCH_ADDR); + cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, + LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(rq, cs); } if (mode & EMIT_INVALIDATE) { u32 flags = 0; - u32 *cs; + u32 *cs, count; + int err; + + err = mtl_dummy_pipe_control(rq); + if (err) + return err; flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TLB_INVALIDATE; @@ -245,7 +330,16 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_CS_STALL; - cs = intel_ring_begin(rq, 8 + 4); + if (!HAS_3D_PIPELINE(engine->i915)) + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + else if (engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + + count = 8; + if (gen12_needs_ccs_aux_inv(rq->engine)) + count += 8; + + cs = intel_ring_begin(rq, count); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -258,8 +352,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - /* hsdes: 1809175790 */ - cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs); + cs = gen12_emit_aux_table_inv(engine, cs); *cs++ = preparser_disable(false); intel_ring_advance(rq, cs); @@ -270,16 +363,15 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) { - intel_engine_mask_t aux_inv = 0; - u32 cmd, *cs; + u32 cmd = 4; + u32 *cs; - cmd = 4; - if (mode & EMIT_INVALIDATE) + if (mode & EMIT_INVALIDATE) { cmd += 2; - if (mode & EMIT_INVALIDATE) - aux_inv = rq->engine->mask & ~BIT(BCS0); - if (aux_inv) - cmd += 2 * hweight8(aux_inv) + 2; + + if (gen12_needs_ccs_aux_inv(rq->engine)) + cmd += 8; + } cs = intel_ring_begin(rq, cmd); if (IS_ERR(cs)) @@ -302,6 +394,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) cmd |= MI_INVALIDATE_TLB; if (rq->engine->class == VIDEO_DECODE_CLASS) cmd |= MI_INVALIDATE_BSD; + + if (gen12_needs_ccs_aux_inv(rq->engine) && + rq->engine->class == COPY_ENGINE_CLASS) + cmd |= MI_FLUSH_DW_CCS; } *cs++ = cmd; @@ -309,18 +405,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ - if (aux_inv) { /* hsdes: 1809175790 */ - struct intel_engine_cs *engine; - unsigned int tmp; - - *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); - for_each_engine_masked(engine, rq->engine->gt, - aux_inv, tmp) { - *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); - *cs++ = AUX_INV; - } - *cs++ = MI_NOOP; - } + cs = gen12_emit_aux_table_inv(rq->engine, cs); if (mode & EMIT_INVALIDATE) *cs++ = preparser_disable(false); @@ -395,6 +480,61 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } +static int __xehp_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags, + u32 arb) +{ + struct intel_context *ce = rq->context; + u32 wa_offset = lrc_indirect_bb(ce); + u32 *cs; + + GEM_BUG_ON(!ce->wa_bb_page); + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | arb; + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)); + *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA; + *cs++ = 0; + + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */ + *cs++ = MI_BATCH_BUFFER_START_GEN8; + *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB; + *cs++ = 0; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + intel_ring_advance(rq, cs); + + return 0; +} + +int xehp_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); +} + +int xehp_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); +} + int gen8_emit_bb_start_noarb(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags) @@ -506,7 +646,8 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(rq->engine)) + if (intel_engine_has_semaphores(rq->engine) && + !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) cs = emit_preempt_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); @@ -528,6 +669,8 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE, @@ -545,15 +688,21 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + + /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ cs = gen8_emit_ggtt_write_rcs(cs, rq->fence.seqno, hwsp_offset(rq), - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); return gen8_emit_fini_breadcrumb_tail(rq, cs); } @@ -592,15 +741,63 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) return cs; } +/* Wa_14014475959:dg2 */ +/* Wa_16019325821 */ +/* Wa_14019159160 */ +#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540 +static u32 hold_switchout_semaphore_offset(struct i915_request *rq) +{ + return i915_ggtt_offset(rq->context->state) + + (LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET; +} + +/* Wa_14014475959:dg2 */ +/* Wa_16019325821 */ +/* Wa_14019159160 */ +static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs) +{ + int i; + + *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL | + MI_ATOMIC_MOVE; + *cs++ = hold_switchout_semaphore_offset(rq); + *cs++ = 0; + *cs++ = 1; + + /* + * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP) + * to align. 4 DWs above + 8 filler DWs here. + */ + for (i = 0; i < 8; ++i) + *cs++ = 0; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = hold_switchout_semaphore_offset(rq); + *cs++ = 0; + + return cs; +} + static __always_inline u32* gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) { *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(rq->engine)) + if (intel_engine_has_semaphores(rq->engine) && + !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) cs = gen12_emit_preempt_busywait(rq, cs); + /* Wa_14014475959:dg2 */ + /* Wa_16019325821 */ + /* Wa_14019159160 */ + if (intel_engine_uses_wa_hold_switchout(rq->engine)) + cs = hold_switchout_emit_wa_busywait(rq, cs); + rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -616,19 +813,43 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { + struct drm_i915_private *i915 = rq->i915; + struct intel_gt *gt = rq->engine->gt; + u32 flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + if (GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70)) + flags |= PIPE_CONTROL_FLUSH_L3; + + /* Wa_14016712196 */ + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) + /* dummy PIPE_CONTROL + depth flush */ + cs = gen12_emit_pipe_control(cs, 0, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); + + if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) + /* Wa_1409600907 */ + flags |= PIPE_CONTROL_DEPTH_STALL; + + if (!HAS_3D_PIPELINE(rq->i915)) + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + else if (rq->engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + + cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0); + + /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ cs = gen12_emit_ggtt_write_rcs(cs, rq->fence.seqno, hwsp_offset(rq), - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - /* Wa_1409600907:tgl */ - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + 0, + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); return gen12_emit_fini_breadcrumb_tail(rq, cs); } |
