diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_ring_ops.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_ring_ops.c | 89 |
1 files changed, 65 insertions, 24 deletions
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 5b2b37b59813..9f327f27c072 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -7,9 +7,9 @@ #include <generated/xe_wa_oob.h> +#include "instructions/xe_gpu_commands.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "xe_exec_queue_types.h" @@ -17,6 +17,7 @@ #include "xe_lrc.h" #include "xe_macros.h" #include "xe_sched_job.h" +#include "xe_sriov.h" #include "xe_vm_types.h" #include "xe_vm.h" #include "xe_wa.h" @@ -79,6 +80,16 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) return i; } +static int emit_flush_dw(u32 *dw, int i) +{ + dw[i++] = MI_FLUSH_DW | MI_FLUSH_IMM_DW; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + + return i; +} + static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, u32 *dw, int i) { @@ -210,7 +221,23 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, static u32 get_ppgtt_flag(struct xe_sched_job *job) { - return job->q->vm ? BIT(8) : 0; + if (job->q->vm && !job->ggtt) + return BIT(8); + + return 0; +} + +static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) +{ + dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | + MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); + dw[i++] = 0; + dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); + dw[i++] = 0; + dw[i++] = MI_NOOP; + + return i; } /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ @@ -221,6 +248,8 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_gt *gt = job->q->gt; + i = emit_copy_timestamp(lrc, dw, i); + if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), @@ -233,10 +262,12 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); - if (job->user_fence.used) + if (job->user_fence.used) { + i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, job->user_fence.value, dw, i); + } i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); @@ -270,6 +301,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_device *xe = gt_to_xe(gt); bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; + i = emit_copy_timestamp(lrc, dw, i); + dw[i++] = preparser_disable(true); /* hsdes: 1809175790 */ @@ -292,10 +325,12 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); - if (job->user_fence.used) + if (job->user_fence.used) { + i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, job->user_fence.value, dw, i); + } i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); @@ -317,6 +352,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 mask_flags = 0; + i = emit_copy_timestamp(lrc, dw, i); + dw[i++] = preparser_disable(true); if (lacks_render) mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; @@ -360,19 +397,23 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, { u32 dw[MAX_JOB_SIZE_DW], i = 0; + i = emit_copy_timestamp(lrc, dw, i); + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ - i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i); + i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(0, dw, i); - dw[i++] = preparser_disable(false); + if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { + /* XXX: Do we need this? Leaving for now. */ + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(0, dw, i); + dw[i++] = preparser_disable(false); + } - i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i); + i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW; @@ -393,9 +434,9 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job) xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */ - __emit_job_gen12_simple(job, job->q->lrc, - job->batch_addr[0], - xe_sched_job_seqno(job)); + __emit_job_gen12_simple(job, job->q->lrc[0], + job->ptrs[0].batch_addr, + xe_sched_job_lrc_seqno(job)); } static void emit_job_gen12_copy(struct xe_sched_job *job) @@ -403,15 +444,15 @@ static void emit_job_gen12_copy(struct xe_sched_job *job) int i; if (xe_sched_job_is_migration(job->q)) { - emit_migration_job_gen12(job, job->q->lrc, - xe_sched_job_seqno(job)); + emit_migration_job_gen12(job, job->q->lrc[0], + xe_sched_job_lrc_seqno(job)); return; } for (i = 0; i < job->q->width; ++i) - __emit_job_gen12_simple(job, job->q->lrc + i, - job->batch_addr[i], - xe_sched_job_seqno(job)); + __emit_job_gen12_simple(job, job->q->lrc[i], + job->ptrs[i].batch_addr, + xe_sched_job_lrc_seqno(job)); } static void emit_job_gen12_video(struct xe_sched_job *job) @@ -420,9 +461,9 @@ static void emit_job_gen12_video(struct xe_sched_job *job) /* FIXME: Not doing parallel handshake for now */ for (i = 0; i < job->q->width; ++i) - __emit_job_gen12_video(job, job->q->lrc + i, - job->batch_addr[i], - xe_sched_job_seqno(job)); + __emit_job_gen12_video(job, job->q->lrc[i], + job->ptrs[i].batch_addr, + xe_sched_job_lrc_seqno(job)); } static void emit_job_gen12_render_compute(struct xe_sched_job *job) @@ -430,9 +471,9 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job) int i; for (i = 0; i < job->q->width; ++i) - __emit_job_gen12_render_compute(job, job->q->lrc + i, - job->batch_addr[i], - xe_sched_job_seqno(job)); + __emit_job_gen12_render_compute(job, job->q->lrc[i], + job->ptrs[i].batch_addr, + xe_sched_job_lrc_seqno(job)); } static const struct xe_ring_ops ring_ops_gen12_gsc = { |