summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_ring_ops.c
diff options
context:
space:
mode:
authorThomas Hellström <thomas.hellstrom@linux.intel.com>2023-06-05 15:04:54 +0200
committerRodrigo Vivi <rodrigo.vivi@intel.com>2023-12-19 18:35:20 -0500
commit85dbfe47d07cddeac959ccc9352c4b0f1683225b (patch)
tree51df37529303676ec02140aa973218e1ef32ff98 /drivers/gpu/drm/xe/xe_ring_ops.c
parent5eeb8b443875f2a6f751ed2c77cc410fad6b2e61 (diff)
drm/xe: Invalidate TLB also on bind if in scratch page mode
For scratch table mode we need to cover the case where a scratch PTE might have been pre-fetched and cached and used instead of that of the newly bound vma. For compute vms, invalidate TLB globally using GuC before signalling bind complete. For !long-running vms, invalidate TLB at batch start. Also document how TLB invalidation works. v2: - Fix a pointer to the comment about TLB invalidation (Jose Souza). - Add a bool to the vm whether we want to invalidate TLB at batch start. - Invalidate TLB also on BCS- and video engines at batch start where needed. - Use BIT() macro instead of explicit shift. Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Tested-by: José Roberto de Souza <jose.souza@intel.com> #v1 Reported-by: José Roberto de Souza <jose.souza@intel.com> #v1 Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291 Acked-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/xe/xe_ring_ops.c')
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c47
1 files changed, 36 insertions, 11 deletions
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 45117a2ab1a0..215606b5fae0 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -15,6 +15,7 @@
#include "xe_macros.h"
#include "xe_sched_job.h"
#include "xe_vm_types.h"
+#include "xe_vm.h"
/*
* 3D-related flags that can't be set on _engines_ that lack access to the 3D
@@ -74,9 +75,11 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
return i;
}
-static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
+ u32 *dw, int i)
{
- dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW |
+ (invalidate_tlb ? MI_INVALIDATE_TLB : 0);
dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
dw[i++] = 0;
dw[i++] = value;
@@ -107,7 +110,8 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
return i;
}
-static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
+static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
+ int i)
{
u32 flags = PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
@@ -119,6 +123,9 @@ static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_STORE_DATA_INDEX;
+ if (invalidate_tlb)
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+
flags &= ~mask_flags;
dw[i++] = GFX_OP_PIPE_CONTROL(6);
@@ -170,9 +177,17 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
{
u32 dw[MAX_JOB_SIZE_DW], i = 0;
u32 ppgtt_flag = get_ppgtt_flag(job);
-
- i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
- seqno, dw, i);
+ struct xe_vm *vm = job->engine->vm;
+
+ if (vm->batch_invalidate_tlb) {
+ dw[i++] = preparser_disable(true);
+ i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+ seqno, true, dw, i);
+ dw[i++] = preparser_disable(false);
+ } else {
+ i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+ seqno, dw, i);
+ }
i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
@@ -181,7 +196,7 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
job->user_fence.value,
dw, i);
- i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+ i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
i = emit_user_interrupt(dw, i);
@@ -210,6 +225,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
struct xe_gt *gt = job->engine->gt;
struct xe_device *xe = gt_to_xe(gt);
bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
+ struct xe_vm *vm = job->engine->vm;
dw[i++] = preparser_disable(true);
@@ -220,10 +236,16 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
else
i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
}
+
+ if (vm->batch_invalidate_tlb)
+ i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+ seqno, true, dw, i);
+
dw[i++] = preparser_disable(false);
- i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
- seqno, dw, i);
+ if (!vm->batch_invalidate_tlb)
+ i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+ seqno, dw, i);
i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
@@ -232,7 +254,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
job->user_fence.value,
dw, i);
- i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+ i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
i = emit_user_interrupt(dw, i);
@@ -250,6 +272,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
struct xe_gt *gt = job->engine->gt;
struct xe_device *xe = gt_to_xe(gt);
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
+ struct xe_vm *vm = job->engine->vm;
u32 mask_flags = 0;
dw[i++] = preparser_disable(true);
@@ -257,7 +280,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
- i = emit_pipe_invalidate(mask_flags, dw, i);
+
+ /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
+ i = emit_pipe_invalidate(mask_flags, vm->batch_invalidate_tlb, dw, i);
/* hsdes: 1809175790 */
if (has_aux_ccs(xe))