summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_migrate.c
diff options
context:
space:
mode:
authorJani Nikula <jani.nikula@intel.com>2024-06-19 11:38:31 +0300
committerJani Nikula <jani.nikula@intel.com>2024-06-19 11:38:31 +0300
commitd754ed2821fd9675d203cb73c4afcd593e28b7d0 (patch)
treecd16683cd956a7c334d7e1b3baf02e2e7baa729c /drivers/gpu/drm/xe/xe_migrate.c
parentdcaacff03a9fa2838f936e1009b4b7ad56807152 (diff)
parent1ddaaa244021aba8496536a6627b4ad2bc0f936a (diff)
Merge drm/drm-next into drm-intel-next
Sync to v6.10-rc3. Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu/drm/xe/xe_migrate.c')
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c90
1 files changed, 58 insertions, 32 deletions
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9f6e9b7f11c8..7e3fb33110d9 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -14,8 +14,8 @@
#include <generated/xe_wa_oob.h>
+#include "instructions/xe_gpu_commands.h"
#include "instructions/xe_mi_commands.h"
-#include "regs/xe_gpu_commands.h"
#include "regs/xe_gtt_defs.h"
#include "tests/xe_test.h"
#include "xe_assert.h"
@@ -34,7 +34,6 @@
#include "xe_sync.h"
#include "xe_trace.h"
#include "xe_vm.h"
-#include "xe_wa.h"
/**
* struct xe_migrate - migrate context.
@@ -70,7 +69,7 @@ struct xe_migrate {
#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
#define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */
-#define NUM_KERNEL_PDE 17
+#define NUM_KERNEL_PDE 15
#define NUM_PT_SLOTS 32
#define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
#define MAX_NUM_PTE 512
@@ -138,10 +137,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
struct xe_device *xe = tile_to_xe(tile);
u16 pat_index = xe->pat.idx[XE_CACHE_WB];
u8 id = tile->id;
- u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
+ u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level,
+ num_setup = num_level + 1;
u32 map_ofs, level, i;
struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
- u64 entry;
+ u64 entry, pt30_ofs;
/* Can't bump NUM_PT_SLOTS too high */
BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
@@ -161,10 +161,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (IS_ERR(bo))
return PTR_ERR(bo);
- entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index);
+ /* PT31 reserved for 2M identity map */
+ pt30_ofs = bo->size - 2 * XE_PAGE_SIZE;
+ entry = vm->pt_ops->pde_encode_bo(bo, pt30_ofs, pat_index);
xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
- map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
+ map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE;
/* Map the entire BO in our level 0 pt */
for (i = 0, level = 0; i < num_entries; level++) {
@@ -235,7 +237,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
}
/* Write PDE's that point to our BO. */
- for (i = 0; i < num_entries - num_level; i++) {
+ for (i = 0; i < map_ofs / PAGE_SIZE; i++) {
entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
pat_index);
@@ -253,28 +255,54 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
/* Identity map the entire vram at 256GiB offset */
if (IS_DGFX(xe)) {
u64 pos, ofs, flags;
+ /* XXX: Unclear if this should be usable_size? */
+ u64 vram_limit = xe->mem.vram.actual_physical_size +
+ xe->mem.vram.dpa_base;
level = 2;
ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level,
true, 0);
+ xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M));
+
/*
- * Use 1GB pages, it shouldn't matter the physical amount of
- * vram is less, when we don't access it.
+ * Use 1GB pages when possible, last chunk always use 2M
+ * pages as mixing reserved memory (stolen, WOCPM) with a single
+ * mapping is not allowed on certain platforms.
*/
- for (pos = xe->mem.vram.dpa_base;
- pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base;
- pos += SZ_1G, ofs += 8)
+ for (pos = xe->mem.vram.dpa_base; pos < vram_limit;
+ pos += SZ_1G, ofs += 8) {
+ if (pos + SZ_1G >= vram_limit) {
+ u64 pt31_ofs = bo->size - XE_PAGE_SIZE;
+
+ entry = vm->pt_ops->pde_encode_bo(bo, pt31_ofs,
+ pat_index);
+ xe_map_wr(xe, &bo->vmap, ofs, u64, entry);
+
+ flags = vm->pt_ops->pte_encode_addr(xe, 0,
+ pat_index,
+ level - 1,
+ true, 0);
+
+ for (ofs = pt31_ofs; pos < vram_limit;
+ pos += SZ_2M, ofs += 8)
+ xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+ break; /* Ensure pos == vram_limit assert correct */
+ }
+
xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+ }
+
+ xe_assert(xe, pos == vram_limit);
}
/*
* Example layout created above, with root level = 3:
* [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
* [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's
- * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's
- * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2]
+ * [PT9...PT27]: Userspace PT's for VM_BIND, 4 KiB PTE's
+ * [PT28 = PDE 0] [PT29 = PDE 1] [PT30 = PDE 2] [PT31 = 2M vram identity map]
*
* This makes the lowest part of the VM point to the pagetables.
* Hence the lowest 2M in the vm should point to itself, with a few writes
@@ -300,10 +328,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
}
/*
- * Due to workaround 16017236439, odd instance hardware copy engines are
- * faster than even instance ones.
- * This function returns the mask involving all fast copy engines and the
- * reserved copy engine to be used as logical mask for migrate engine.
* Including the reserved copy engine is required to avoid deadlocks due to
* migrate jobs servicing the faults gets stuck behind the job that faulted.
*/
@@ -317,8 +341,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
if (hwe->class != XE_ENGINE_CLASS_COPY)
continue;
- if (!XE_WA(gt, 16017236439) ||
- xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+ if (xe_gt_is_usm_hwe(gt, hwe))
logical_mask |= BIT(hwe->logical_instance);
}
@@ -369,6 +392,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
if (!hwe || !logical_mask)
return ERR_PTR(-EINVAL);
+ /*
+ * XXX: Currently only reserving 1 (likely slow) BCS instance on
+ * PVC, may want to revisit if performance is needed.
+ */
m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
EXEC_QUEUE_FLAG_KERNEL |
EXEC_QUEUE_FLAG_PERMANENT |
@@ -385,6 +412,9 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
}
mutex_init(&m->job_mutex);
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&m->job_mutex);
+ fs_reclaim_release(GFP_KERNEL);
err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
if (err)
@@ -809,7 +839,6 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
src_L0, ccs_ofs, copy_ccs);
- mutex_lock(&m->job_mutex);
job = xe_bb_create_migration_job(m->q, bb,
xe_migrate_batch_base(m, usm),
update_idx);
@@ -829,6 +858,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
goto err_job;
}
+ mutex_lock(&m->job_mutex);
xe_sched_job_arm(job);
dma_fence_put(fence);
fence = dma_fence_get(&job->drm.s_fence->finished);
@@ -846,7 +876,6 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
err_job:
xe_sched_job_put(job);
err:
- mutex_unlock(&m->job_mutex);
xe_bb_free(bb, NULL);
err_sync:
@@ -936,8 +965,8 @@ static bool has_service_copy_support(struct xe_gt *gt)
* all of the actual service copy engines (BCS1-BCS8) have been fused
* off.
*/
- return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
- XE_HW_ENGINE_BCS1);
+ return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
+ XE_HW_ENGINE_BCS1);
}
static u32 emit_clear_cmd_len(struct xe_gt *gt)
@@ -1046,7 +1075,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
flush_flags = MI_FLUSH_DW_CCS;
}
- mutex_lock(&m->job_mutex);
job = xe_bb_create_migration_job(m->q, bb,
xe_migrate_batch_base(m, usm),
update_idx);
@@ -1069,6 +1097,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
goto err_job;
}
+ mutex_lock(&m->job_mutex);
xe_sched_job_arm(job);
dma_fence_put(fence);
fence = dma_fence_get(&job->drm.s_fence->finished);
@@ -1085,7 +1114,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
err_job:
xe_sched_job_put(job);
err:
- mutex_unlock(&m->job_mutex);
xe_bb_free(bb, NULL);
err_sync:
/* Sync partial copies if any. FIXME: job_mutex? */
@@ -1379,9 +1407,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
write_pgtable(tile, bb, 0, &updates[i], pt_update);
}
- if (!q)
- mutex_lock(&m->job_mutex);
-
job = xe_bb_create_migration_job(q ?: m->q, bb,
xe_migrate_batch_base(m, usm),
update_idx);
@@ -1422,6 +1447,9 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
if (err)
goto err_job;
}
+ if (!q)
+ mutex_lock(&m->job_mutex);
+
xe_sched_job_arm(job);
fence = dma_fence_get(&job->drm.s_fence->finished);
xe_sched_job_push(job);
@@ -1437,8 +1465,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
err_job:
xe_sched_job_put(job);
err_bb:
- if (!q)
- mutex_unlock(&m->job_mutex);
xe_bb_free(bb, NULL);
err:
drm_suballoc_free(sa_bo, NULL);