diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.c | 273 |
1 files changed, 166 insertions, 107 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a4ec20aaafe2..e8927ad49142 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3,6 +3,8 @@ * Copyright © 2014 Intel Corporation */ +#include <drm/drm_print.h> + #include "gem/i915_gem_lmem.h" #include "gen8_engine_cs.h" @@ -546,47 +548,6 @@ static const u8 gen12_rcs_offsets[] = { END }; -static const u8 xehp_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - END -}; - static const u8 dg2_rcs_offsets[] = { NOP(1), LRI(15, POSTED), @@ -695,8 +656,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) return mtl_rcs_offsets; else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_rcs_offsets; - else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) - return xehp_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 11) @@ -719,7 +678,7 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return 0x70; else if (GRAPHICS_VER(engine->i915) >= 12) return 0x60; @@ -733,7 +692,7 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) static int lrc_ring_bb_offset(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return 0x80; else if (GRAPHICS_VER(engine->i915) >= 12) return 0x70; @@ -748,7 +707,7 @@ static int lrc_ring_bb_offset(const struct intel_engine_cs *engine) static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return 0x84; else if (GRAPHICS_VER(engine->i915) >= 12) return 0x74; @@ -794,8 +753,7 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) { - - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) /* * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL * simply to match the RCS context image layout. @@ -829,6 +787,18 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) } static void +lrc_setup_bb_per_ctx(u32 *regs, + const struct intel_engine_cs *engine, + u32 ctx_bb_ggtt_addr) +{ + GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); + regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = + ctx_bb_ggtt_addr | + PER_CTX_BB_FORCE | + PER_CTX_BB_VALID; +} + +static void lrc_setup_indirect_ctx(u32 *regs, const struct intel_engine_cs *engine, u32 ctx_bb_ggtt_addr, @@ -845,6 +815,29 @@ lrc_setup_indirect_ctx(u32 *regs, lrc_ring_indirect_offset_default(engine) << 6; } +static bool ctx_needs_runalone(const struct intel_context *ce) +{ + struct i915_gem_context *gem_ctx; + bool ctx_is_protected = false; + + /* + * Wa_14019159160 - Case 2. + * On some platforms, protected contexts require setting + * the LRC run-alone bit or else the encryption/decryption will not happen. + * NOTE: Case 2 only applies to PXP use-case of said workaround. + */ + if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) && + (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) { + rcu_read_lock(); + gem_ctx = rcu_dereference(ce->gem_context); + if (gem_ctx) + ctx_is_protected = gem_ctx->uses_protected_content; + rcu_read_unlock(); + } + + return ctx_is_protected; +} + static void init_common_regs(u32 * const regs, const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -860,6 +853,9 @@ static void init_common_regs(u32 * const regs, if (GRAPHICS_VER(engine->i915) < 11) ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); + /* Wa_14019159160 - Case 2.*/ + if (ctx_needs_runalone(ce)) + ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE); regs[CTX_CONTEXT_CONTROL] = ctl; regs[CTX_TIMESTAMP] = ce->stats.runtime.last; @@ -997,7 +993,13 @@ static u32 context_wa_bb_offset(const struct intel_context *ce) return PAGE_SIZE * ce->wa_bb_page; } -static u32 *context_indirect_bb(const struct intel_context *ce) +/* + * per_ctx below determines which WABB section is used. + * When true, the function returns the location of the + * PER_CTX_BB. When false, the function returns the + * location of the INDIRECT_CTX. + */ +static u32 *context_wabb(const struct intel_context *ce, bool per_ctx) { void *ptr; @@ -1006,6 +1008,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce) ptr = ce->lrc_reg_state; ptr -= LRC_STATE_OFFSET; /* back to start of context image */ ptr += context_wa_bb_offset(ce); + ptr += per_ctx ? PAGE_SIZE : 0; return ptr; } @@ -1018,9 +1021,8 @@ void lrc_init_state(struct intel_context *ce, set_redzone(state, engine); - if (engine->default_state) { - shmem_read(engine->default_state, 0, - state, engine->context_size); + if (ce->default_state) { + shmem_read(ce->default_state, 0, state, engine->context_size); __set_bit(CONTEXT_VALID_BIT, &ce->flags); inhibit = false; } @@ -1082,7 +1084,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (GRAPHICS_VER(engine->i915) >= 12) { ce->wa_bb_page = context_size / PAGE_SIZE; - context_size += PAGE_SIZE; + /* INDIRECT_CTX and PER_CTX_BB need separate pages. */ + context_size += PAGE_SIZE * 2; } if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) { @@ -1092,10 +1095,19 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) obj = i915_gem_object_create_lmem(engine->i915, context_size, I915_BO_ALLOC_PM_VOLATILE); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { obj = i915_gem_object_create_shmem(engine->i915, context_size); - if (IS_ERR(obj)) - return ERR_CAST(obj); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Wa_22016122933: For Media version 13.0, all Media GT shared + * memory needs to be mapped as WC on CPU side and UC (PAT + * index 2) on GPU side. + */ + if (intel_gt_needs_wa_22016122933(engine->gt)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + } vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { @@ -1122,6 +1134,9 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) GEM_BUG_ON(ce->state); + if (!intel_context_has_own_state(ce)) + ce->default_state = engine->default_state; + vma = __lrc_alloc_state(ce, engine); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1184,9 +1199,9 @@ lrc_pre_pin(struct intel_context *ce, GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); *vaddr = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(ce->engine->i915, - ce->state->obj, - false) | + intel_gt_coherent_map_type(ce->engine->gt, + ce->state->obj, + false) | I915_MAP_OVERRIDE); return PTR_ERR_OR_ZERO(*vaddr); @@ -1308,29 +1323,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) } /* - * On DG2 during context restore of a preempted context in GPGPU mode, - * RCS restore hang is detected. This is extremely timing dependent. - * To address this below sw wabb is implemented for DG2 A steppings. - */ -static u32 * -dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) -{ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base)); - *cs++ = 0x21; - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1); - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2); - - return cs; -} - -/* * The bspec's tuning guide asks us to program a vertical watermark value of * 0x3FF. However this register is not saved/restored properly by the * hardware, so we're required to apply the desired value via INDIRECT_CTX @@ -1348,30 +1340,34 @@ dg2_emit_draw_watermark_setting(u32 *cs) } static u32 * +gen12_invalidate_state_cache(u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2); + *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); + return cs; +} + +static u32 * gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) { cs = gen12_emit_timestamp_wa(ce, cs); cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); - /* Wa_22011450934:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0)) - cs = dg2_emit_rcs_hang_wabb(ce, cs); - /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); - /* hsdes: 1809175790 */ - if (!HAS_FLAT_CCS(ce->engine->i915)) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_GFX_CCS_AUX_NV); + cs = gen12_emit_aux_table_inv(ce->engine, cs); + + /* Wa_18022495364 */ + if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10))) + cs = gen12_invalidate_state_cache(cs); /* Wa_16014892111 */ - if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(ce->engine->i915)) cs = dg2_emit_draw_watermark_setting(cs); @@ -1385,32 +1381,94 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_restore_scratch(ce, cs); /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) if (ce->engine->class == COMPUTE_CLASS) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); - /* hsdes: 1809175790 */ - if (!HAS_FLAT_CCS(ce->engine->i915)) { - if (ce->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_VD0_AUX_NV); - else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_VE0_AUX_NV); - } + return gen12_emit_aux_table_inv(ce->engine, cs); +} + +static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs) +{ + struct intel_gt *gt = ce->engine->gt; + int mocs = gt->mocs.uc_index << 1; + + /** + * Wa_16018031267 / Wa_16018063123 requires that SW forces the + * main copy engine arbitration into round robin mode. We + * additionally need to submit the following WABB blt command + * to produce 4 subblits with each subblit generating 0 byte + * write requests as WABB: + * + * XY_FASTCOLOR_BLT + * BG0 -> 5100000E + * BG1 -> 0000003F (Dest pitch) + * BG2 -> 00000000 (X1, Y1) = (0, 0) + * BG3 -> 00040001 (X2, Y2) = (1, 4) + * BG4 -> scratch + * BG5 -> scratch + * BG6-12 -> 00000000 + * BG13 -> 20004004 (Surf. Width= 2,Surf. Height = 5 ) + * BG14 -> 00000010 (Qpitch = 4) + * BG15 -> 00000000 + */ + *cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2); + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f; + *cs++ = 0; + *cs++ = 4 << 16 | 1; + *cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma)); + *cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma)); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0x20004004; + *cs++ = 0x10; + *cs++ = 0; + + return cs; +} + +static u32 * +xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs) +{ + /* Wa_16018031267, Wa_16018063123 */ + if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine)) + cs = xehp_emit_fastcolor_blt_wabb(ce, cs); return cs; } static void +setup_per_ctx_bb(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 *(*emit)(const struct intel_context *, u32 *)) +{ + /* Place PER_CTX_BB on next page after INDIRECT_CTX */ + u32 * const start = context_wabb(ce, true); + u32 *cs; + + cs = emit(ce, start); + + /* PER_CTX_BB must manually terminate */ + *cs++ = MI_BATCH_BUFFER_END; + + GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); + lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine, + lrc_indirect_bb(ce) + PAGE_SIZE); +} + +static void setup_indirect_ctx_bb(const struct intel_context *ce, const struct intel_engine_cs *engine, u32 *(*emit)(const struct intel_context *, u32 *)) { - u32 * const start = context_indirect_bb(ce); + u32 * const start = context_wabb(ce, false); u32 *cs; cs = emit(ce, start); @@ -1509,6 +1567,7 @@ u32 lrc_update_regs(const struct intel_context *ce, /* Mutually exclusive wrt to global indirect bb */ GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); setup_indirect_ctx_bb(ce, engine, fn); + setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb); } return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; |
