diff options
author | Jani Nikula <jani.nikula@intel.com> | 2024-06-19 11:38:31 +0300 |
---|---|---|
committer | Jani Nikula <jani.nikula@intel.com> | 2024-06-19 11:38:31 +0300 |
commit | d754ed2821fd9675d203cb73c4afcd593e28b7d0 (patch) | |
tree | cd16683cd956a7c334d7e1b3baf02e2e7baa729c /drivers/gpu/drm/xe/xe_migrate.c | |
parent | dcaacff03a9fa2838f936e1009b4b7ad56807152 (diff) | |
parent | 1ddaaa244021aba8496536a6627b4ad2bc0f936a (diff) |
Merge drm/drm-next into drm-intel-next
Sync to v6.10-rc3.
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu/drm/xe/xe_migrate.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_migrate.c | 90 |
1 files changed, 58 insertions, 32 deletions
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 9f6e9b7f11c8..7e3fb33110d9 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -14,8 +14,8 @@ #include <generated/xe_wa_oob.h> +#include "instructions/xe_gpu_commands.h" #include "instructions/xe_mi_commands.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_gtt_defs.h" #include "tests/xe_test.h" #include "xe_assert.h" @@ -34,7 +34,6 @@ #include "xe_sync.h" #include "xe_trace.h" #include "xe_vm.h" -#include "xe_wa.h" /** * struct xe_migrate - migrate context. @@ -70,7 +69,7 @@ struct xe_migrate { #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ #define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */ -#define NUM_KERNEL_PDE 17 +#define NUM_KERNEL_PDE 15 #define NUM_PT_SLOTS 32 #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M #define MAX_NUM_PTE 512 @@ -138,10 +137,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_device *xe = tile_to_xe(tile); u16 pat_index = xe->pat.idx[XE_CACHE_WB]; u8 id = tile->id; - u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; + u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level, + num_setup = num_level + 1; u32 map_ofs, level, i; struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo; - u64 entry; + u64 entry, pt30_ofs; /* Can't bump NUM_PT_SLOTS too high */ BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE); @@ -161,10 +161,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (IS_ERR(bo)) return PTR_ERR(bo); - entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index); + /* PT31 reserved for 2M identity map */ + pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; + entry = vm->pt_ops->pde_encode_bo(bo, pt30_ofs, pat_index); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); - map_ofs = (num_entries - num_level) * XE_PAGE_SIZE; + map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE; /* Map the entire BO in our level 0 pt */ for (i = 0, level = 0; i < num_entries; level++) { @@ -235,7 +237,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } /* Write PDE's that point to our BO. */ - for (i = 0; i < num_entries - num_level; i++) { + for (i = 0; i < map_ofs / PAGE_SIZE; i++) { entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE, pat_index); @@ -253,28 +255,54 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { u64 pos, ofs, flags; + /* XXX: Unclear if this should be usable_size? */ + u64 vram_limit = xe->mem.vram.actual_physical_size + + xe->mem.vram.dpa_base; level = 2; ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, true, 0); + xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M)); + /* - * Use 1GB pages, it shouldn't matter the physical amount of - * vram is less, when we don't access it. + * Use 1GB pages when possible, last chunk always use 2M + * pages as mixing reserved memory (stolen, WOCPM) with a single + * mapping is not allowed on certain platforms. */ - for (pos = xe->mem.vram.dpa_base; - pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base; - pos += SZ_1G, ofs += 8) + for (pos = xe->mem.vram.dpa_base; pos < vram_limit; + pos += SZ_1G, ofs += 8) { + if (pos + SZ_1G >= vram_limit) { + u64 pt31_ofs = bo->size - XE_PAGE_SIZE; + + entry = vm->pt_ops->pde_encode_bo(bo, pt31_ofs, + pat_index); + xe_map_wr(xe, &bo->vmap, ofs, u64, entry); + + flags = vm->pt_ops->pte_encode_addr(xe, 0, + pat_index, + level - 1, + true, 0); + + for (ofs = pt31_ofs; pos < vram_limit; + pos += SZ_2M, ofs += 8) + xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); + break; /* Ensure pos == vram_limit assert correct */ + } + xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); + } + + xe_assert(xe, pos == vram_limit); } /* * Example layout created above, with root level = 3: * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's - * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's - * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2] + * [PT9...PT27]: Userspace PT's for VM_BIND, 4 KiB PTE's + * [PT28 = PDE 0] [PT29 = PDE 1] [PT30 = PDE 2] [PT31 = 2M vram identity map] * * This makes the lowest part of the VM point to the pagetables. * Hence the lowest 2M in the vm should point to itself, with a few writes @@ -300,10 +328,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } /* - * Due to workaround 16017236439, odd instance hardware copy engines are - * faster than even instance ones. - * This function returns the mask involving all fast copy engines and the - * reserved copy engine to be used as logical mask for migrate engine. * Including the reserved copy engine is required to avoid deadlocks due to * migrate jobs servicing the faults gets stuck behind the job that faulted. */ @@ -317,8 +341,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) if (hwe->class != XE_ENGINE_CLASS_COPY) continue; - if (!XE_WA(gt, 16017236439) || - xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1) + if (xe_gt_is_usm_hwe(gt, hwe)) logical_mask |= BIT(hwe->logical_instance); } @@ -369,6 +392,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) if (!hwe || !logical_mask) return ERR_PTR(-EINVAL); + /* + * XXX: Currently only reserving 1 (likely slow) BCS instance on + * PVC, may want to revisit if performance is needed. + */ m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | @@ -385,6 +412,9 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) } mutex_init(&m->job_mutex); + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&m->job_mutex); + fs_reclaim_release(GFP_KERNEL); err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); if (err) @@ -809,7 +839,6 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, IS_DGFX(xe) ? dst_is_vram : dst_is_pltt, src_L0, ccs_ofs, copy_ccs); - mutex_lock(&m->job_mutex); job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm), update_idx); @@ -829,6 +858,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, goto err_job; } + mutex_lock(&m->job_mutex); xe_sched_job_arm(job); dma_fence_put(fence); fence = dma_fence_get(&job->drm.s_fence->finished); @@ -846,7 +876,6 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, err_job: xe_sched_job_put(job); err: - mutex_unlock(&m->job_mutex); xe_bb_free(bb, NULL); err_sync: @@ -936,8 +965,8 @@ static bool has_service_copy_support(struct xe_gt *gt) * all of the actual service copy engines (BCS1-BCS8) have been fused * off. */ - return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8, - XE_HW_ENGINE_BCS1); + return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8, + XE_HW_ENGINE_BCS1); } static u32 emit_clear_cmd_len(struct xe_gt *gt) @@ -1046,7 +1075,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, flush_flags = MI_FLUSH_DW_CCS; } - mutex_lock(&m->job_mutex); job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm), update_idx); @@ -1069,6 +1097,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, goto err_job; } + mutex_lock(&m->job_mutex); xe_sched_job_arm(job); dma_fence_put(fence); fence = dma_fence_get(&job->drm.s_fence->finished); @@ -1085,7 +1114,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, err_job: xe_sched_job_put(job); err: - mutex_unlock(&m->job_mutex); xe_bb_free(bb, NULL); err_sync: /* Sync partial copies if any. FIXME: job_mutex? */ @@ -1379,9 +1407,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m, write_pgtable(tile, bb, 0, &updates[i], pt_update); } - if (!q) - mutex_lock(&m->job_mutex); - job = xe_bb_create_migration_job(q ?: m->q, bb, xe_migrate_batch_base(m, usm), update_idx); @@ -1422,6 +1447,9 @@ xe_migrate_update_pgtables(struct xe_migrate *m, if (err) goto err_job; } + if (!q) + mutex_lock(&m->job_mutex); + xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); @@ -1437,8 +1465,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m, err_job: xe_sched_job_put(job); err_bb: - if (!q) - mutex_unlock(&m->job_mutex); xe_bb_free(bb, NULL); err: drm_suballoc_free(sa_bo, NULL); |