summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_pt.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-06-11 09:08:54 +1000
committerDave Airlie <airlied@redhat.com>2024-06-11 09:09:07 +1000
commit7957066ca614b63aa6687e825ccbc215fa4584ea (patch)
treedf0dc7f4f762cab6b59f84463c1a4a0949827c9d /drivers/gpu/drm/xe/xe_pt.c
parent83a7eefedc9b56fe7bfeff13b6c7356688ffa670 (diff)
parent6800e63cf97bae62bca56d8e691544540d945f53 (diff)
Merge tag 'drm-xe-next-2024-06-06' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
UAPI Changes: - Expose the L3 bank mask (Francois) Cross-subsystem Changes: - Update Xe driver maintainers (Oded) Display (i915): - Add missing include to intel_vga.c (Michal Wajdeczko) Driver Changes: - Fix Display (xe-only) detection for ADL-N (Lucas) - Runtime PM fixes that enabled PC-10 and D3Cold (Francois, Rodrigo) - Fix unexpected silent drm backmerge issues (Thomas) - More (a lot more) preparation for SR-IOV support (Michal Wajdeczko) - Devcoredump fixes and improvements (Jose, Tejas, Matt Brost) - Introduce device 'wedged' state (Rodrigo) - Improve debug and info messages (Michal Wajdeczko, Rodrigo, Nirmoy) - Adding or fixing workarounds (Tejas, Shekhar, Lucas, Bommu) - Check result of drmm_mutex_init (Michal Wajdeczko) - Enlarge the critical dma fence area for preempt fences (Matt Auld) - Prevent UAF in VM's rebind work (Matt Auld) - GuC submit related clean-ups and fixes (Matt Brost, Himal, Jonathan, Niranjana) - Prefer local helpers to perform dma reservation locking (Himal) - Spelling and typo fixes (Colin, Francois) - Prep patches for 1 job per VM bind IOCTL (no uapi change yet) (Matt Brost) - Remove uninitialized end var from xe_gt_tlb_invalidation_range (Nirmoy) - GSC related changes targeting LNL support (Daniele) - Fix assert in L3 bank mask generation (Francois) - Perform dma_map when moving system buffer objects to TT (Thomas) - Add helpers for manipulating macro arguments (Michal Wajdeczko) - Refactor default device atomic settings (Nirmoy) - Add debugfs node to dump mocs (Janga) - Use ordered WQ for G2H handler (Matt Brost) - Clean up and fixes in header includes (Michal Wajdeczko) - Prefer flexible-array over deprecated zero-lenght ones (Lucas) - Add Indirect Ring State support (Niranjana) - Fix UBSAN shift-out-of-bounds failure (Shuicheng) - HWMon fixes and additions (Karthik) - Clean-up refactor around probe init functions (Lucas, Michal Wajdeczko) - Fix PCODE init function (Himal) - Only use reserved BCS instances for usm migrate exec queue (Matt Brost) - Only zap PTEs as needed (Matt Brost) - Per client usage info (Lucas) - Core hotunplug improvements converting stuff towards devm (Matt Auld) - Don't emit false error if running in execlist mode (Michal Wajdeczko) - Remove unused struct (Dr. David) - Support/debug for slow GuC loads (John Harrison) - Decouple job seqno and lrc seqno (Matt Brost) - Allow migrate vm gpu submissions from reclaim context (Thomas) - Rename drm-client running time to run_ticks and fix a UAF (Umesh) - Check empty pinned BO list with lock held (Nirmoy) - Drop undesired prefix from the platform name (Michal Wajdeczko) - Remove unwanted mutex locking on xe file close (Niranjana) - Replace format-less snprintf() with strscpy() (Arnd) - Other general clean-ups on registers definitions and function names (Michal Wajdeczko) - Add kernel-doc to some xe_lrc interfaces (Niranajana) - Use missing lock in relay_needs_worker (Nirmoy) - Drop redundant W=1 warnings from Makefile (Jani) - Simplify if condition in preempt fences code (Thorsten) - Flush engine buffers before signalling user fence on all engines (Andrzej) - Don't overmap identity VRAM mapping (Matt Brost) - Do not dereference NULL job->fence in trace points (Matt Brost) - Add synchronous gt reset debugfs (Jonathan) - Xe gt_idle fixes (Riana) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZmItmuf7vq_xvRjJ@intel.com
Diffstat (limited to 'drivers/gpu/drm/xe/xe_pt.c')
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c69
1 files changed, 56 insertions, 13 deletions
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 5b7930f46cf3..cd60c009b679 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -619,9 +619,40 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
int ret;
- if ((vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
- (is_devmem || !IS_DGFX(xe)))
- xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ /**
+ * Default atomic expectations for different allocation scenarios are as follows:
+ *
+ * 1. Traditional API: When the VM is not in LR mode:
+ * - Device atomics are expected to function with all allocations.
+ *
+ * 2. Compute/SVM API: When the VM is in LR mode:
+ * - Device atomics are the default behavior when the bo is placed in a single region.
+ * - In all other cases device atomics will be disabled with AE=0 until an application
+ * request differently using a ioctl like madvise.
+ */
+ if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
+ if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
+ if (bo && xe_bo_has_single_placement(bo))
+ xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ /**
+ * If a SMEM+LMEM allocation is backed by SMEM, a device
+ * atomics will cause a gpu page fault and which then
+ * gets migrated to LMEM, bind such allocations with
+ * device atomics enabled.
+ */
+ else if (is_devmem && !xe_bo_has_single_placement(bo))
+ xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ } else {
+ xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+ }
+
+ /**
+ * Unset AE if the platform(PVC) doesn't support it on an
+ * allocation
+ */
+ if (!xe->info.has_device_atomics_on_smem && !is_devmem)
+ xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
+ }
if (is_devmem) {
xe_walk.default_pte |= XE_PPGTT_PTE_DM;
@@ -732,7 +763,7 @@ static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
pgoff_t end_offset;
XE_WARN_ON(!*child);
- XE_WARN_ON(!level && xe_child->is_compact);
+ XE_WARN_ON(!level);
/*
* Note that we're called from an entry callback, and we're dealing
@@ -781,8 +812,9 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
.tile = tile,
};
struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+ u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated);
- if (!(vma->tile_present & BIT(tile->id)))
+ if (!(pt_mask & BIT(tile->id)))
return false;
(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
@@ -1075,10 +1107,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
struct invalidation_fence {
struct xe_gt_tlb_invalidation_fence base;
struct xe_gt *gt;
- struct xe_vma *vma;
struct dma_fence *fence;
struct dma_fence_cb cb;
struct work_struct work;
+ u64 start;
+ u64 end;
+ u32 asid;
};
static const char *
@@ -1121,13 +1155,14 @@ static void invalidation_fence_work_func(struct work_struct *w)
container_of(w, struct invalidation_fence, work);
trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
- xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
+ xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
+ ifence->end, ifence->asid);
}
static int invalidation_fence_init(struct xe_gt *gt,
struct invalidation_fence *ifence,
struct dma_fence *fence,
- struct xe_vma *vma)
+ u64 start, u64 end, u32 asid)
{
int ret;
@@ -1144,7 +1179,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
dma_fence_get(&ifence->base.base); /* Ref for caller */
ifence->fence = fence;
ifence->gt = gt;
- ifence->vma = vma;
+ ifence->start = start;
+ ifence->end = end;
+ ifence->asid = asid;
INIT_WORK(&ifence->work, invalidation_fence_work_func);
ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
@@ -1295,8 +1332,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
/* TLB invalidation must be done before signaling rebind */
if (ifence) {
- int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
- vma);
+ int err = invalidation_fence_init(tile->primary_gt,
+ ifence, fence,
+ xe_vma_start(vma),
+ xe_vma_end(vma),
+ xe_vma_vm(vma)->usm.asid);
if (err) {
dma_fence_put(fence);
kfree(ifence);
@@ -1405,7 +1445,7 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
XE_WARN_ON(!*child);
- XE_WARN_ON(!level && xe_child->is_compact);
+ XE_WARN_ON(!level);
xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
@@ -1641,7 +1681,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
dma_fence_wait(fence, false);
/* TLB invalidation must be done before signaling unbind */
- err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
+ err = invalidation_fence_init(tile->primary_gt, ifence, fence,
+ xe_vma_start(vma),
+ xe_vma_end(vma),
+ xe_vma_vm(vma)->usm.asid);
if (err) {
dma_fence_put(fence);
kfree(ifence);