summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_vm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_vm.c')
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c845
1 files changed, 495 insertions, 350 deletions
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4aa3943e6f29..99bf7412475c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -85,8 +85,8 @@ static bool preempt_fences_waiting(struct xe_vm *vm)
list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
if (!q->compute.pfence ||
- (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
- &q->compute.pfence->flags))) {
+ test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &q->compute.pfence->flags)) {
return true;
}
}
@@ -315,19 +315,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
-static void xe_vm_kill(struct xe_vm *vm)
+static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
{
struct xe_exec_queue *q;
lockdep_assert_held(&vm->lock);
- xe_vm_lock(vm, false);
+ if (unlocked)
+ xe_vm_lock(vm, false);
+
vm->flags |= XE_VM_FLAG_BANNED;
trace_xe_vm_kill(vm);
list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
q->ops->kill(q);
- xe_vm_unlock(vm);
+
+ if (unlocked)
+ xe_vm_unlock(vm);
/* TODO: Inform user the VM is banned */
}
@@ -557,7 +561,7 @@ out_unlock_outer:
if (err) {
drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
- xe_vm_kill(vm);
+ xe_vm_kill(vm, true);
}
up_write(&vm->lock);
@@ -708,37 +712,116 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
}
-static struct dma_fence *
-xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_sync_entry *syncs, u32 num_syncs,
- bool first_op, bool last_op);
+static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
+ u8 tile_mask)
+{
+ INIT_LIST_HEAD(&op->link);
+ op->tile_mask = tile_mask;
+ op->base.op = DRM_GPUVA_OP_MAP;
+ op->base.map.va.addr = vma->gpuva.va.addr;
+ op->base.map.va.range = vma->gpuva.va.range;
+ op->base.map.gem.obj = vma->gpuva.gem.obj;
+ op->base.map.gem.offset = vma->gpuva.gem.offset;
+ op->map.vma = vma;
+ op->map.immediate = true;
+ op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
+ op->map.is_null = xe_vma_is_null(vma);
+}
+
+static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
+ u8 tile_mask)
+{
+ struct xe_vma_op *op;
+
+ op = kzalloc(sizeof(*op), GFP_KERNEL);
+ if (!op)
+ return -ENOMEM;
+
+ xe_vm_populate_rebind(op, vma, tile_mask);
+ list_add_tail(&op->link, &vops->list);
+
+ return 0;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+ struct xe_vma_ops *vops);
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+ struct xe_exec_queue *q,
+ struct xe_sync_entry *syncs, u32 num_syncs);
int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
{
struct dma_fence *fence;
struct xe_vma *vma, *next;
+ struct xe_vma_ops vops;
+ struct xe_vma_op *op, *next_op;
+ int err;
lockdep_assert_held(&vm->lock);
- if (xe_vm_in_lr_mode(vm) && !rebind_worker)
+ if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
+ list_empty(&vm->rebind_list))
return 0;
+ xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+
xe_vm_assert_held(vm);
- list_for_each_entry_safe(vma, next, &vm->rebind_list,
- combined_links.rebind) {
+ list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
xe_assert(vm->xe, vma->tile_present);
- list_del_init(&vma->combined_links.rebind);
if (rebind_worker)
trace_xe_vma_rebind_worker(vma);
else
trace_xe_vma_rebind_exec(vma);
- fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
- if (IS_ERR(fence))
- return PTR_ERR(fence);
+
+ err = xe_vm_ops_add_rebind(&vops, vma,
+ vma->tile_present);
+ if (err)
+ goto free_ops;
+ }
+
+ fence = ops_execute(vm, &vops);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ } else {
dma_fence_put(fence);
+ list_for_each_entry_safe(vma, next, &vm->rebind_list,
+ combined_links.rebind)
+ list_del_init(&vma->combined_links.rebind);
+ }
+free_ops:
+ list_for_each_entry_safe(op, next_op, &vops.list, link) {
+ list_del(&op->link);
+ kfree(op);
}
- return 0;
+ return err;
+}
+
+struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
+{
+ struct dma_fence *fence = NULL;
+ struct xe_vma_ops vops;
+ struct xe_vma_op *op, *next_op;
+ int err;
+
+ lockdep_assert_held(&vm->lock);
+ xe_vm_assert_held(vm);
+ xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
+
+ xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+
+ err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
+ if (err)
+ return ERR_PTR(err);
+
+ fence = ops_execute(vm, &vops);
+
+ list_for_each_entry_safe(op, next_op, &vops.list, link) {
+ list_del(&op->link);
+ kfree(op);
+ }
+
+ return fence;
}
static void xe_vma_free(struct xe_vma *vma)
@@ -805,7 +888,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
for_each_tile(tile, vm->xe, id)
vma->tile_mask |= 0x1 << id;
- if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
+ if (vm->xe->info.has_atomic_enable_pte_bit)
vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
vma->pat_index = pat_index;
@@ -1173,6 +1256,8 @@ static const struct xe_pt_ops xelp_pt_ops = {
.pde_encode_bo = xelp_pde_encode_bo,
};
+static void vm_destroy_work_func(struct work_struct *w);
+
/**
* xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
* given tile and vm.
@@ -1252,6 +1337,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
init_rwsem(&vm->userptr.notifier_lock);
spin_lock_init(&vm->userptr.invalidated_lock);
+ INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
INIT_LIST_HEAD(&vm->preempt.exec_queues);
vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
@@ -1260,7 +1347,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
vm->pt_ops = &xelp_pt_ops;
- if (!(flags & XE_VM_FLAG_MIGRATION))
+ /*
+ * Long-running workloads are not protected by the scheduler references.
+ * By design, run_job for long-running workloads returns NULL and the
+ * scheduler drops all the references of it, hence protecting the VM
+ * for this case is necessary.
+ */
+ if (flags & XE_VM_FLAG_LR_MODE)
xe_pm_runtime_get_noresume(xe);
vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
@@ -1274,7 +1367,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
drm_gem_object_put(vm_resv_obj);
- err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+ err = xe_vm_lock(vm, true);
if (err)
goto err_close;
@@ -1318,7 +1411,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
}
- dma_resv_unlock(xe_vm_resv(vm));
+ xe_vm_unlock(vm);
/* Kernel migration VM shouldn't have a circular loop.. */
if (!(flags & XE_VM_FLAG_MIGRATION)) {
@@ -1360,7 +1453,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
return vm;
err_unlock_close:
- dma_resv_unlock(xe_vm_resv(vm));
+ xe_vm_unlock(vm);
err_close:
xe_vm_close_and_put(vm);
return ERR_PTR(err);
@@ -1370,7 +1463,7 @@ err_no_resv:
for_each_tile(tile, xe, id)
xe_range_fence_tree_fini(&vm->rftree[id]);
kfree(vm);
- if (!(flags & XE_VM_FLAG_MIGRATION))
+ if (flags & XE_VM_FLAG_LR_MODE)
xe_pm_runtime_put(xe);
return ERR_PTR(err);
}
@@ -1489,9 +1582,10 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe_vm_put(vm);
}
-static void xe_vm_free(struct drm_gpuvm *gpuvm)
+static void vm_destroy_work_func(struct work_struct *w)
{
- struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+ struct xe_vm *vm =
+ container_of(w, struct xe_vm, destroy_work);
struct xe_device *xe = vm->xe;
struct xe_tile *tile;
u8 id;
@@ -1504,7 +1598,7 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm)
mutex_destroy(&vm->snap_mutex);
- if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+ if (vm->flags & XE_VM_FLAG_LR_MODE)
xe_pm_runtime_put(xe);
for_each_tile(tile, xe, id)
@@ -1514,6 +1608,14 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm)
kfree(vm);
}
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
+{
+ struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+
+ /* To destroy the VM we need to be able to sleep */
+ queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
{
struct xe_vm *vm;
@@ -1550,23 +1652,13 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
struct dma_fence *fence = NULL;
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
- int cur_fence = 0, i;
+ int cur_fence = 0;
int number_tiles = hweight8(vma->tile_present);
int err;
u8 id;
trace_xe_vma_unbind(vma);
- if (vma->ufence) {
- struct xe_user_fence * const f = vma->ufence;
-
- if (!xe_sync_ufence_get_status(f))
- return ERR_PTR(-EBUSY);
-
- vma->ufence = NULL;
- xe_sync_ufence_put(f);
- }
-
if (number_tiles > 1) {
fences = kmalloc_array(number_tiles, sizeof(*fences),
GFP_KERNEL);
@@ -1608,10 +1700,6 @@ next:
fence = cf ? &cf->base : !fence ?
xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
- if (last_op) {
- for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], fence);
- }
return fence;
@@ -1628,15 +1716,15 @@ err_fences:
static struct dma_fence *
xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
struct xe_sync_entry *syncs, u32 num_syncs,
- bool first_op, bool last_op)
+ u8 tile_mask, bool first_op, bool last_op)
{
struct xe_tile *tile;
struct dma_fence *fence;
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
struct xe_vm *vm = xe_vma_vm(vma);
- int cur_fence = 0, i;
- int number_tiles = hweight8(vma->tile_mask);
+ int cur_fence = 0;
+ int number_tiles = hweight8(tile_mask);
int err;
u8 id;
@@ -1650,7 +1738,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
}
for_each_tile(tile, vm->xe, id) {
- if (!(vma->tile_mask & BIT(id)))
+ if (!(tile_mask & BIT(id)))
goto next;
fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
@@ -1682,12 +1770,6 @@ next:
}
}
- if (last_op) {
- for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i],
- cf ? &cf->base : fence);
- }
-
return cf ? &cf->base : fence;
err_fences:
@@ -1715,87 +1797,46 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
return NULL;
}
-static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_exec_queue *q, struct xe_sync_entry *syncs,
- u32 num_syncs, bool immediate, bool first_op,
- bool last_op)
+static struct dma_fence *
+xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
+ struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
+ u8 tile_mask, bool immediate, bool first_op, bool last_op)
{
struct dma_fence *fence;
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
- struct xe_user_fence *ufence;
xe_vm_assert_held(vm);
-
- ufence = find_ufence_get(syncs, num_syncs);
- if (vma->ufence && ufence)
- xe_sync_ufence_put(vma->ufence);
-
- vma->ufence = ufence ?: vma->ufence;
+ xe_bo_assert_held(bo);
if (immediate) {
- fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
- last_op);
+ fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask,
+ first_op, last_op);
if (IS_ERR(fence))
- return PTR_ERR(fence);
+ return fence;
} else {
- int i;
-
xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
- if (last_op) {
- for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], fence);
- }
}
- if (last_op)
- xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
- dma_fence_put(fence);
-
- return 0;
-}
-
-static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_bo *bo, struct xe_sync_entry *syncs,
- u32 num_syncs, bool immediate, bool first_op,
- bool last_op)
-{
- int err;
-
- xe_vm_assert_held(vm);
- xe_bo_assert_held(bo);
-
- if (bo && immediate) {
- err = xe_bo_validate(bo, vm, true);
- if (err)
- return err;
- }
-
- return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
- last_op);
+ return fence;
}
-static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_exec_queue *q, struct xe_sync_entry *syncs,
- u32 num_syncs, bool first_op, bool last_op)
+static struct dma_fence *
+xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+ u32 num_syncs, bool first_op, bool last_op)
{
struct dma_fence *fence;
- struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
xe_vm_assert_held(vm);
xe_bo_assert_held(xe_vma_bo(vma));
fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
if (IS_ERR(fence))
- return PTR_ERR(fence);
+ return fence;
- xe_vma_destroy(vma, fence);
- if (last_op)
- xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
- dma_fence_put(fence);
-
- return 0;
+ return fence;
}
#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
@@ -1938,40 +1979,18 @@ static const u32 region_to_mem_type[] = {
XE_PL_VRAM1,
};
-static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_exec_queue *q, u32 region,
- struct xe_sync_entry *syncs, u32 num_syncs,
- bool first_op, bool last_op)
+static struct dma_fence *
+xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+ u32 num_syncs, bool first_op, bool last_op)
{
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
- int err;
-
- xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
-
- if (!xe_vma_has_no_bo(vma)) {
- err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
- if (err)
- return err;
- }
if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
- true, first_op, last_op);
+ vma->tile_mask, true, first_op, last_op);
} else {
- int i;
-
- /* Nothing to do, signal fences now */
- if (last_op) {
- for (i = 0; i < num_syncs; i++) {
- struct dma_fence *fence =
- xe_exec_queue_last_fence_get(wait_exec_queue, vm);
-
- xe_sync_entry_signal(&syncs[i], fence);
- dma_fence_put(fence);
- }
- }
-
- return 0;
+ return xe_exec_queue_last_fence_get(wait_exec_queue, vm);
}
}
@@ -2267,23 +2286,28 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
struct drm_gpuva_ops *ops,
struct xe_sync_entry *syncs, u32 num_syncs,
- struct list_head *ops_list, bool last)
+ struct xe_vma_ops *vops, bool last)
{
struct xe_device *xe = vm->xe;
struct xe_vma_op *last_op = NULL;
struct drm_gpuva_op *__op;
+ struct xe_tile *tile;
+ u8 id, tile_mask = 0;
int err = 0;
lockdep_assert_held_write(&vm->lock);
+ for_each_tile(tile, vm->xe, id)
+ tile_mask |= 0x1 << id;
+
drm_gpuva_for_each_op(__op, ops) {
struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
struct xe_vma *vma;
- bool first = list_empty(ops_list);
+ bool first = list_empty(&vops->list);
unsigned int flags = 0;
INIT_LIST_HEAD(&op->link);
- list_add_tail(&op->link, ops_list);
+ list_add_tail(&op->link, &vops->list);
if (first) {
op->flags |= XE_VMA_OP_FIRST;
@@ -2292,6 +2316,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
}
op->q = q;
+ op->tile_mask = tile_mask;
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
@@ -2409,12 +2434,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
}
/* FIXME: Unhandled corner case */
- XE_WARN_ON(!last_op && last && !list_empty(ops_list));
+ XE_WARN_ON(!last_op && last && !list_empty(&vops->list));
if (!last_op)
return 0;
- last_op->ops = ops;
if (last) {
last_op->flags |= XE_VMA_OP_LAST;
last_op->num_syncs = num_syncs;
@@ -2424,27 +2448,24 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
return 0;
}
-static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
- struct xe_vma *vma, struct xe_vma_op *op)
+static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_vma_op *op)
{
- int err;
-
- lockdep_assert_held_write(&vm->lock);
+ struct dma_fence *fence = NULL;
- err = xe_vm_lock_vma(exec, vma);
- if (err)
- return err;
+ lockdep_assert_held(&vm->lock);
xe_vm_assert_held(vm);
xe_bo_assert_held(xe_vma_bo(vma));
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
- err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
- op->syncs, op->num_syncs,
- op->map.immediate || !xe_vm_in_fault_mode(vm),
- op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST);
+ fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
+ op->syncs, op->num_syncs,
+ op->tile_mask,
+ op->map.immediate || !xe_vm_in_fault_mode(vm),
+ op->flags & XE_VMA_OP_FIRST,
+ op->flags & XE_VMA_OP_LAST);
break;
case DRM_GPUVA_OP_REMAP:
{
@@ -2454,37 +2475,41 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
if (!op->remap.unmap_done) {
if (prev || next)
vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
- err = xe_vm_unbind(vm, vma, op->q, op->syncs,
- op->num_syncs,
- op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST &&
- !prev && !next);
- if (err)
+ fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+ op->num_syncs,
+ op->flags & XE_VMA_OP_FIRST,
+ op->flags & XE_VMA_OP_LAST &&
+ !prev && !next);
+ if (IS_ERR(fence))
break;
op->remap.unmap_done = true;
}
if (prev) {
op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
- err = xe_vm_bind(vm, op->remap.prev, op->q,
- xe_vma_bo(op->remap.prev), op->syncs,
- op->num_syncs, true, false,
- op->flags & XE_VMA_OP_LAST && !next);
+ dma_fence_put(fence);
+ fence = xe_vm_bind(vm, op->remap.prev, op->q,
+ xe_vma_bo(op->remap.prev), op->syncs,
+ op->num_syncs,
+ op->remap.prev->tile_mask, true,
+ false,
+ op->flags & XE_VMA_OP_LAST && !next);
op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
- if (err)
+ if (IS_ERR(fence))
break;
op->remap.prev = NULL;
}
if (next) {
op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
- err = xe_vm_bind(vm, op->remap.next, op->q,
- xe_vma_bo(op->remap.next),
- op->syncs, op->num_syncs,
- true, false,
- op->flags & XE_VMA_OP_LAST);
+ dma_fence_put(fence);
+ fence = xe_vm_bind(vm, op->remap.next, op->q,
+ xe_vma_bo(op->remap.next),
+ op->syncs, op->num_syncs,
+ op->remap.next->tile_mask, true,
+ false, op->flags & XE_VMA_OP_LAST);
op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
- if (err)
+ if (IS_ERR(fence))
break;
op->remap.next = NULL;
}
@@ -2492,43 +2517,35 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
break;
}
case DRM_GPUVA_OP_UNMAP:
- err = xe_vm_unbind(vm, vma, op->q, op->syncs,
- op->num_syncs, op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST);
+ fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+ op->num_syncs, op->flags & XE_VMA_OP_FIRST,
+ op->flags & XE_VMA_OP_LAST);
break;
case DRM_GPUVA_OP_PREFETCH:
- err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
- op->syncs, op->num_syncs,
- op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST);
+ fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs,
+ op->flags & XE_VMA_OP_FIRST,
+ op->flags & XE_VMA_OP_LAST);
break;
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
- if (err)
+ if (IS_ERR(fence))
trace_xe_vma_fail(vma);
- return err;
+ return fence;
}
-static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_vma_op *op)
+static struct dma_fence *
+__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_vma_op *op)
{
- struct drm_exec exec;
+ struct dma_fence *fence;
int err;
retry_userptr:
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
- drm_exec_until_all_locked(&exec) {
- err = op_execute(&exec, vm, vma, op);
- drm_exec_retry_on_contention(&exec);
- if (err)
- break;
- }
- drm_exec_fini(&exec);
-
- if (err == -EAGAIN) {
+ fence = op_execute(vm, vma, op);
+ if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
lockdep_assert_held_write(&vm->lock);
if (op->base.op == DRM_GPUVA_OP_REMAP) {
@@ -2545,22 +2562,24 @@ retry_userptr:
if (!err)
goto retry_userptr;
+ fence = ERR_PTR(err);
trace_xe_vma_fail(vma);
}
}
- return err;
+ return fence;
}
-static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
+static struct dma_fence *
+xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
{
- int ret = 0;
+ struct dma_fence *fence = ERR_PTR(-ENOMEM);
- lockdep_assert_held_write(&vm->lock);
+ lockdep_assert_held(&vm->lock);
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
- ret = __xe_vma_op_execute(vm, op->map.vma, op);
+ fence = __xe_vma_op_execute(vm, op->map.vma, op);
break;
case DRM_GPUVA_OP_REMAP:
{
@@ -2573,42 +2592,23 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
else
vma = op->remap.next;
- ret = __xe_vma_op_execute(vm, vma, op);
+ fence = __xe_vma_op_execute(vm, vma, op);
break;
}
case DRM_GPUVA_OP_UNMAP:
- ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
- op);
+ fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
+ op);
break;
case DRM_GPUVA_OP_PREFETCH:
- ret = __xe_vma_op_execute(vm,
- gpuva_to_vma(op->base.prefetch.va),
- op);
+ fence = __xe_vma_op_execute(vm,
+ gpuva_to_vma(op->base.prefetch.va),
+ op);
break;
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
- return ret;
-}
-
-static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
-{
- bool last = op->flags & XE_VMA_OP_LAST;
-
- if (last) {
- while (op->num_syncs--)
- xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
- kfree(op->syncs);
- if (op->q)
- xe_exec_queue_put(op->q);
- }
- if (!list_empty(&op->link))
- list_del(&op->link);
- if (op->ops)
- drm_gpuva_ops_free(&vm->gpuvm, op->ops);
- if (last)
- xe_vm_put(vm);
+ return fence;
}
static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
@@ -2687,34 +2687,223 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
op->flags & XE_VMA_OP_PREV_COMMITTED,
op->flags & XE_VMA_OP_NEXT_COMMITTED);
}
+ }
+}
- drm_gpuva_ops_free(&vm->gpuvm, __ops);
+static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
+ bool validate)
+{
+ struct xe_bo *bo = xe_vma_bo(vma);
+ int err = 0;
+
+ if (bo) {
+ if (!bo->vm)
+ err = drm_exec_lock_obj(exec, &bo->ttm.base);
+ if (!err && validate)
+ err = xe_bo_validate(bo, xe_vma_vm(vma), true);
}
+
+ return err;
}
-static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
- struct list_head *ops_list)
+static int check_ufence(struct xe_vma *vma)
+{
+ if (vma->ufence) {
+ struct xe_user_fence * const f = vma->ufence;
+
+ if (!xe_sync_ufence_get_status(f))
+ return -EBUSY;
+
+ vma->ufence = NULL;
+ xe_sync_ufence_put(f);
+ }
+
+ return 0;
+}
+
+static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
+ struct xe_vma_op *op)
+{
+ int err = 0;
+
+ switch (op->base.op) {
+ case DRM_GPUVA_OP_MAP:
+ err = vma_lock_and_validate(exec, op->map.vma,
+ !xe_vm_in_fault_mode(vm) ||
+ op->map.immediate);
+ break;
+ case DRM_GPUVA_OP_REMAP:
+ err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
+ if (err)
+ break;
+
+ err = vma_lock_and_validate(exec,
+ gpuva_to_vma(op->base.remap.unmap->va),
+ false);
+ if (!err && op->remap.prev)
+ err = vma_lock_and_validate(exec, op->remap.prev, true);
+ if (!err && op->remap.next)
+ err = vma_lock_and_validate(exec, op->remap.next, true);
+ break;
+ case DRM_GPUVA_OP_UNMAP:
+ err = check_ufence(gpuva_to_vma(op->base.unmap.va));
+ if (err)
+ break;
+
+ err = vma_lock_and_validate(exec,
+ gpuva_to_vma(op->base.unmap.va),
+ false);
+ break;
+ case DRM_GPUVA_OP_PREFETCH:
+ {
+ struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+ u32 region = op->prefetch.region;
+
+ xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+
+ err = vma_lock_and_validate(exec,
+ gpuva_to_vma(op->base.prefetch.va),
+ false);
+ if (!err && !xe_vma_has_no_bo(vma))
+ err = xe_bo_migrate(xe_vma_bo(vma),
+ region_to_mem_type[region]);
+ break;
+ }
+ default:
+ drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+ }
+
+ return err;
+}
+
+static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
+ struct xe_vm *vm,
+ struct xe_vma_ops *vops)
+{
+ struct xe_vma_op *op;
+ int err;
+
+ err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+ if (err)
+ return err;
+
+ list_for_each_entry(op, &vops->list, link) {
+ err = op_lock_and_prep(exec, vm, op);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+ struct xe_vma_ops *vops)
{
struct xe_vma_op *op, *next;
+ struct dma_fence *fence = NULL;
+
+ list_for_each_entry_safe(op, next, &vops->list, link) {
+ dma_fence_put(fence);
+ fence = xe_vma_op_execute(vm, op);
+ if (IS_ERR(fence)) {
+ drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld",
+ op->base.op, PTR_ERR(fence));
+ fence = ERR_PTR(-ENOSPC);
+ break;
+ }
+ }
+
+ return fence;
+}
+
+static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
+{
+ if (vma->ufence)
+ xe_sync_ufence_put(vma->ufence);
+ vma->ufence = __xe_sync_ufence_get(ufence);
+}
+
+static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
+ struct xe_user_fence *ufence)
+{
+ switch (op->base.op) {
+ case DRM_GPUVA_OP_MAP:
+ vma_add_ufence(op->map.vma, ufence);
+ break;
+ case DRM_GPUVA_OP_REMAP:
+ if (op->remap.prev)
+ vma_add_ufence(op->remap.prev, ufence);
+ if (op->remap.next)
+ vma_add_ufence(op->remap.next, ufence);
+ break;
+ case DRM_GPUVA_OP_UNMAP:
+ break;
+ case DRM_GPUVA_OP_PREFETCH:
+ vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
+ break;
+ default:
+ drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+ }
+}
+
+static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
+ struct dma_fence *fence)
+{
+ struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
+ struct xe_user_fence *ufence;
+ struct xe_vma_op *op;
+ int i;
+
+ ufence = find_ufence_get(vops->syncs, vops->num_syncs);
+ list_for_each_entry(op, &vops->list, link) {
+ if (ufence)
+ op_add_ufence(vm, op, ufence);
+
+ if (op->base.op == DRM_GPUVA_OP_UNMAP)
+ xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
+ else if (op->base.op == DRM_GPUVA_OP_REMAP)
+ xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
+ fence);
+ }
+ if (ufence)
+ xe_sync_ufence_put(ufence);
+ for (i = 0; i < vops->num_syncs; i++)
+ xe_sync_entry_signal(vops->syncs + i, fence);
+ xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+ dma_fence_put(fence);
+}
+
+static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
+ struct xe_vma_ops *vops)
+{
+ struct drm_exec exec;
+ struct dma_fence *fence;
int err;
lockdep_assert_held_write(&vm->lock);
- list_for_each_entry_safe(op, next, ops_list, link) {
- err = xe_vma_op_execute(vm, op);
- if (err) {
- drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
- op->base.op, err);
- /*
- * FIXME: Killing VM rather than proper error handling
- */
- xe_vm_kill(vm);
- return -ENOSPC;
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ goto unlock;
+
+ fence = ops_execute(vm, vops);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ /* FIXME: Killing VM rather than proper error handling */
+ xe_vm_kill(vm, false);
+ goto unlock;
+ } else {
+ vm_bind_ioctl_ops_fini(vm, vops, fence);
}
- xe_vma_op_cleanup(vm, op);
}
- return 0;
+unlock:
+ drm_exec_fini(&exec);
+ return err;
}
#define SUPPORTED_FLAGS \
@@ -2862,6 +3051,58 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
return err;
}
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+ struct xe_exec_queue *q,
+ struct xe_sync_entry *syncs, u32 num_syncs)
+{
+ memset(vops, 0, sizeof(*vops));
+ INIT_LIST_HEAD(&vops->list);
+ vops->vm = vm;
+ vops->q = q;
+ vops->syncs = syncs;
+ vops->num_syncs = num_syncs;
+}
+
+static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
+ u64 addr, u64 range, u64 obj_offset,
+ u16 pat_index)
+{
+ u16 coh_mode;
+
+ if (XE_IOCTL_DBG(xe, range > bo->size) ||
+ XE_IOCTL_DBG(xe, obj_offset >
+ bo->size - range)) {
+ return -EINVAL;
+ }
+
+ if (bo->flags & XE_BO_FLAG_INTERNAL_64K) {
+ if (XE_IOCTL_DBG(xe, obj_offset &
+ XE_64K_PAGE_MASK) ||
+ XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
+ XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
+ return -EINVAL;
+ }
+ }
+
+ coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+ if (bo->cpu_caching) {
+ if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+ bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+ return -EINVAL;
+ }
+ } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+ /*
+ * Imported dma-buf from a different device should
+ * require 1way or 2way coherency since we don't know
+ * how it was mapped on the CPU. Just assume is it
+ * potentially cached on CPU side.
+ */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@@ -2875,7 +3116,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
u32 num_syncs, num_ufence = 0;
struct xe_sync_entry *syncs = NULL;
struct drm_xe_vm_bind_op *bind_ops;
- LIST_HEAD(ops_list);
+ struct xe_vma_ops vops;
int err;
int i;
@@ -2945,7 +3186,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
u32 obj = bind_ops[i].obj;
u64 obj_offset = bind_ops[i].obj_offset;
u16 pat_index = bind_ops[i].pat_index;
- u16 coh_mode;
if (!obj)
continue;
@@ -2957,40 +3197,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
bos[i] = gem_to_xe_bo(gem_obj);
- if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
- XE_IOCTL_DBG(xe, obj_offset >
- bos[i]->size - range)) {
- err = -EINVAL;
- goto put_obj;
- }
-
- if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) {
- if (XE_IOCTL_DBG(xe, obj_offset &
- XE_64K_PAGE_MASK) ||
- XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
- XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
- err = -EINVAL;
- goto put_obj;
- }
- }
-
- coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
- if (bos[i]->cpu_caching) {
- if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
- bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
- err = -EINVAL;
- goto put_obj;
- }
- } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
- /*
- * Imported dma-buf from a different device should
- * require 1way or 2way coherency since we don't know
- * how it was mapped on the CPU. Just assume is it
- * potentially cached on CPU side.
- */
- err = -EINVAL;
+ err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
+ obj_offset, pat_index);
+ if (err)
goto put_obj;
- }
}
if (args->num_syncs) {
@@ -3026,6 +3236,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto free_syncs;
}
+ xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
for (i = 0; i < args->num_binds; ++i) {
u64 range = bind_ops[i].range;
u64 addr = bind_ops[i].addr;
@@ -3045,42 +3256,25 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
- &ops_list,
- i == args->num_binds - 1);
+ &vops, i == args->num_binds - 1);
if (err)
goto unwind_ops;
}
/* Nothing to do */
- if (list_empty(&ops_list)) {
+ if (list_empty(&vops.list)) {
err = -ENODATA;
goto unwind_ops;
}
- xe_vm_get(vm);
- if (q)
- xe_exec_queue_get(q);
-
- err = vm_bind_ioctl_ops_execute(vm, &ops_list);
-
- up_write(&vm->lock);
-
- if (q)
- xe_exec_queue_put(q);
- xe_vm_put(vm);
-
- for (i = 0; bos && i < args->num_binds; ++i)
- xe_bo_put(bos[i]);
-
- kvfree(bos);
- kvfree(ops);
- if (args->num_binds > 1)
- kvfree(bind_ops);
-
- return err;
+ err = vm_bind_ioctl_ops_execute(vm, &vops);
unwind_ops:
- vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+ if (err && err != -ENODATA)
+ vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+ for (i = args->num_binds - 1; i >= 0; --i)
+ if (ops[i])
+ drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
free_syncs:
if (err == -ENODATA)
err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
@@ -3201,55 +3395,6 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
return 0;
}
-int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
-{
- struct drm_gpuva *gpuva;
- bool is_vram;
- uint64_t addr;
-
- if (!down_read_trylock(&vm->lock)) {
- drm_printf(p, " Failed to acquire VM lock to dump capture");
- return 0;
- }
- if (vm->pt_root[gt_id]) {
- addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
- is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
- drm_printf(p, " VM root: A:0x%llx %s\n", addr,
- is_vram ? "VRAM" : "SYS");
- }
-
- drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
- struct xe_vma *vma = gpuva_to_vma(gpuva);
- bool is_userptr = xe_vma_is_userptr(vma);
- bool is_null = xe_vma_is_null(vma);
-
- if (is_null) {
- addr = 0;
- } else if (is_userptr) {
- struct sg_table *sg = to_userptr_vma(vma)->userptr.sg;
- struct xe_res_cursor cur;
-
- if (sg) {
- xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur);
- addr = xe_res_dma(&cur);
- } else {
- addr = 0;
- }
- } else {
- addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
- is_vram = xe_bo_is_vram(xe_vma_bo(vma));
- }
- drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
- xe_vma_start(vma), xe_vma_end(vma) - 1,
- xe_vma_size(vma),
- addr, is_null ? "NULL" : is_userptr ? "USR" :
- is_vram ? "VRAM" : "SYS");
- }
- up_read(&vm->lock);
-
- return 0;
-}
-
struct xe_vm_snapshot {
unsigned long num_snaps;
struct {
@@ -3339,7 +3484,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
}
if (bo) {
- dma_resv_lock(bo->ttm.base.resv, NULL);
+ xe_bo_lock(bo, false);
err = ttm_bo_vmap(&bo->ttm, &src);
if (!err) {
xe_map_memcpy_from(xe_bo_device(bo),
@@ -3348,7 +3493,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
snap->snap[i].len);
ttm_bo_vunmap(&bo->ttm, &src);
}
- dma_resv_unlock(bo->ttm.base.resv);
+ xe_bo_unlock(bo);
} else {
void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;