diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_vm.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 845 |
1 files changed, 495 insertions, 350 deletions
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4aa3943e6f29..99bf7412475c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -85,8 +85,8 @@ static bool preempt_fences_waiting(struct xe_vm *vm) list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { if (!q->compute.pfence || - (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &q->compute.pfence->flags))) { + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &q->compute.pfence->flags)) { return true; } } @@ -315,19 +315,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm) #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 -static void xe_vm_kill(struct xe_vm *vm) +static void xe_vm_kill(struct xe_vm *vm, bool unlocked) { struct xe_exec_queue *q; lockdep_assert_held(&vm->lock); - xe_vm_lock(vm, false); + if (unlocked) + xe_vm_lock(vm, false); + vm->flags |= XE_VM_FLAG_BANNED; trace_xe_vm_kill(vm); list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) q->ops->kill(q); - xe_vm_unlock(vm); + + if (unlocked) + xe_vm_unlock(vm); /* TODO: Inform user the VM is banned */ } @@ -557,7 +561,7 @@ out_unlock_outer: if (err) { drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); - xe_vm_kill(vm); + xe_vm_kill(vm, true); } up_write(&vm->lock); @@ -708,37 +712,116 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm) list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; } -static struct dma_fence * -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op); +static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, + u8 tile_mask) +{ + INIT_LIST_HEAD(&op->link); + op->tile_mask = tile_mask; + op->base.op = DRM_GPUVA_OP_MAP; + op->base.map.va.addr = vma->gpuva.va.addr; + op->base.map.va.range = vma->gpuva.va.range; + op->base.map.gem.obj = vma->gpuva.gem.obj; + op->base.map.gem.offset = vma->gpuva.gem.offset; + op->map.vma = vma; + op->map.immediate = true; + op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; + op->map.is_null = xe_vma_is_null(vma); +} + +static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, + u8 tile_mask) +{ + struct xe_vma_op *op; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) + return -ENOMEM; + + xe_vm_populate_rebind(op, vma, tile_mask); + list_add_tail(&op->link, &vops->list); + + return 0; +} + +static struct dma_fence *ops_execute(struct xe_vm *vm, + struct xe_vma_ops *vops); +static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, + struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs); int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) { struct dma_fence *fence; struct xe_vma *vma, *next; + struct xe_vma_ops vops; + struct xe_vma_op *op, *next_op; + int err; lockdep_assert_held(&vm->lock); - if (xe_vm_in_lr_mode(vm) && !rebind_worker) + if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || + list_empty(&vm->rebind_list)) return 0; + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + xe_vm_assert_held(vm); - list_for_each_entry_safe(vma, next, &vm->rebind_list, - combined_links.rebind) { + list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { xe_assert(vm->xe, vma->tile_present); - list_del_init(&vma->combined_links.rebind); if (rebind_worker) trace_xe_vma_rebind_worker(vma); else trace_xe_vma_rebind_exec(vma); - fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); - if (IS_ERR(fence)) - return PTR_ERR(fence); + + err = xe_vm_ops_add_rebind(&vops, vma, + vma->tile_present); + if (err) + goto free_ops; + } + + fence = ops_execute(vm, &vops); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + } else { dma_fence_put(fence); + list_for_each_entry_safe(vma, next, &vm->rebind_list, + combined_links.rebind) + list_del_init(&vma->combined_links.rebind); + } +free_ops: + list_for_each_entry_safe(op, next_op, &vops.list, link) { + list_del(&op->link); + kfree(op); } - return 0; + return err; +} + +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) +{ + struct dma_fence *fence = NULL; + struct xe_vma_ops vops; + struct xe_vma_op *op, *next_op; + int err; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); + + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + + err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); + if (err) + return ERR_PTR(err); + + fence = ops_execute(vm, &vops); + + list_for_each_entry_safe(op, next_op, &vops.list, link) { + list_del(&op->link); + kfree(op); + } + + return fence; } static void xe_vma_free(struct xe_vma *vma) @@ -805,7 +888,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; - if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC) + if (vm->xe->info.has_atomic_enable_pte_bit) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; vma->pat_index = pat_index; @@ -1173,6 +1256,8 @@ static const struct xe_pt_ops xelp_pt_ops = { .pde_encode_bo = xelp_pde_encode_bo, }; +static void vm_destroy_work_func(struct work_struct *w); + /** * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the * given tile and vm. @@ -1252,6 +1337,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); + INIT_WORK(&vm->destroy_work, vm_destroy_work_func); + INIT_LIST_HEAD(&vm->preempt.exec_queues); vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ @@ -1260,7 +1347,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->pt_ops = &xelp_pt_ops; - if (!(flags & XE_VM_FLAG_MIGRATION)) + /* + * Long-running workloads are not protected by the scheduler references. + * By design, run_job for long-running workloads returns NULL and the + * scheduler drops all the references of it, hence protecting the VM + * for this case is necessary. + */ + if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_get_noresume(xe); vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); @@ -1274,7 +1367,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) drm_gem_object_put(vm_resv_obj); - err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + err = xe_vm_lock(vm, true); if (err) goto err_close; @@ -1318,7 +1411,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) xe_pt_populate_empty(tile, vm, vm->pt_root[id]); } - dma_resv_unlock(xe_vm_resv(vm)); + xe_vm_unlock(vm); /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { @@ -1360,7 +1453,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) return vm; err_unlock_close: - dma_resv_unlock(xe_vm_resv(vm)); + xe_vm_unlock(vm); err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); @@ -1370,7 +1463,7 @@ err_no_resv: for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); kfree(vm); - if (!(flags & XE_VM_FLAG_MIGRATION)) + if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); return ERR_PTR(err); } @@ -1489,9 +1582,10 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vm_put(vm); } -static void xe_vm_free(struct drm_gpuvm *gpuvm) +static void vm_destroy_work_func(struct work_struct *w) { - struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); + struct xe_vm *vm = + container_of(w, struct xe_vm, destroy_work); struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; @@ -1504,7 +1598,7 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm) mutex_destroy(&vm->snap_mutex); - if (!(vm->flags & XE_VM_FLAG_MIGRATION)) + if (vm->flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); for_each_tile(tile, xe, id) @@ -1514,6 +1608,14 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm) kfree(vm); } +static void xe_vm_free(struct drm_gpuvm *gpuvm) +{ + struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); + + /* To destroy the VM we need to be able to sleep */ + queue_work(system_unbound_wq, &vm->destroy_work); +} + struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) { struct xe_vm *vm; @@ -1550,23 +1652,13 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - int cur_fence = 0, i; + int cur_fence = 0; int number_tiles = hweight8(vma->tile_present); int err; u8 id; trace_xe_vma_unbind(vma); - if (vma->ufence) { - struct xe_user_fence * const f = vma->ufence; - - if (!xe_sync_ufence_get_status(f)) - return ERR_PTR(-EBUSY); - - vma->ufence = NULL; - xe_sync_ufence_put(f); - } - if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -1608,10 +1700,6 @@ next: fence = cf ? &cf->base : !fence ? xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); - } return fence; @@ -1628,15 +1716,15 @@ err_fences: static struct dma_fence * xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op) + u8 tile_mask, bool first_op, bool last_op) { struct xe_tile *tile; struct dma_fence *fence; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct xe_vm *vm = xe_vma_vm(vma); - int cur_fence = 0, i; - int number_tiles = hweight8(vma->tile_mask); + int cur_fence = 0; + int number_tiles = hweight8(tile_mask); int err; u8 id; @@ -1650,7 +1738,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, } for_each_tile(tile, vm->xe, id) { - if (!(vma->tile_mask & BIT(id))) + if (!(tile_mask & BIT(id))) goto next; fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id], @@ -1682,12 +1770,6 @@ next: } } - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], - cf ? &cf->base : fence); - } - return cf ? &cf->base : fence; err_fences: @@ -1715,87 +1797,46 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) return NULL; } -static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, bool immediate, bool first_op, - bool last_op) +static struct dma_fence * +xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs, + u8 tile_mask, bool immediate, bool first_op, bool last_op) { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); - struct xe_user_fence *ufence; xe_vm_assert_held(vm); - - ufence = find_ufence_get(syncs, num_syncs); - if (vma->ufence && ufence) - xe_sync_ufence_put(vma->ufence); - - vma->ufence = ufence ?: vma->ufence; + xe_bo_assert_held(bo); if (immediate) { - fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, - last_op); + fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask, + first_op, last_op); if (IS_ERR(fence)) - return PTR_ERR(fence); + return fence; } else { - int i; - xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); - } } - if (last_op) - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); - - return 0; -} - -static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_bo *bo, struct xe_sync_entry *syncs, - u32 num_syncs, bool immediate, bool first_op, - bool last_op) -{ - int err; - - xe_vm_assert_held(vm); - xe_bo_assert_held(bo); - - if (bo && immediate) { - err = xe_bo_validate(bo, vm, true); - if (err) - return err; - } - - return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op, - last_op); + return fence; } -static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, bool first_op, bool last_op) +static struct dma_fence * +xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_exec_queue *q, struct xe_sync_entry *syncs, + u32 num_syncs, bool first_op, bool last_op) { struct dma_fence *fence; - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op); if (IS_ERR(fence)) - return PTR_ERR(fence); + return fence; - xe_vma_destroy(vma, fence); - if (last_op) - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); - - return 0; + return fence; } #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ @@ -1938,40 +1979,18 @@ static const u32 region_to_mem_type[] = { XE_PL_VRAM1, }; -static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, u32 region, - struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op) +static struct dma_fence * +xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, + struct xe_exec_queue *q, struct xe_sync_entry *syncs, + u32 num_syncs, bool first_op, bool last_op) { struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); - int err; - - xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type)); - - if (!xe_vma_has_no_bo(vma)) { - err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); - if (err) - return err; - } if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) { return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, - true, first_op, last_op); + vma->tile_mask, true, first_op, last_op); } else { - int i; - - /* Nothing to do, signal fences now */ - if (last_op) { - for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = - xe_exec_queue_last_fence_get(wait_exec_queue, vm); - - xe_sync_entry_signal(&syncs[i], fence); - dma_fence_put(fence); - } - } - - return 0; + return xe_exec_queue_last_fence_get(wait_exec_queue, vm); } } @@ -2267,23 +2286,28 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, struct drm_gpuva_ops *ops, struct xe_sync_entry *syncs, u32 num_syncs, - struct list_head *ops_list, bool last) + struct xe_vma_ops *vops, bool last) { struct xe_device *xe = vm->xe; struct xe_vma_op *last_op = NULL; struct drm_gpuva_op *__op; + struct xe_tile *tile; + u8 id, tile_mask = 0; int err = 0; lockdep_assert_held_write(&vm->lock); + for_each_tile(tile, vm->xe, id) + tile_mask |= 0x1 << id; + drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); struct xe_vma *vma; - bool first = list_empty(ops_list); + bool first = list_empty(&vops->list); unsigned int flags = 0; INIT_LIST_HEAD(&op->link); - list_add_tail(&op->link, ops_list); + list_add_tail(&op->link, &vops->list); if (first) { op->flags |= XE_VMA_OP_FIRST; @@ -2292,6 +2316,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, } op->q = q; + op->tile_mask = tile_mask; switch (op->base.op) { case DRM_GPUVA_OP_MAP: @@ -2409,12 +2434,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, } /* FIXME: Unhandled corner case */ - XE_WARN_ON(!last_op && last && !list_empty(ops_list)); + XE_WARN_ON(!last_op && last && !list_empty(&vops->list)); if (!last_op) return 0; - last_op->ops = ops; if (last) { last_op->flags |= XE_VMA_OP_LAST; last_op->num_syncs = num_syncs; @@ -2424,27 +2448,24 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, return 0; } -static int op_execute(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma *vma, struct xe_vma_op *op) +static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) { - int err; - - lockdep_assert_held_write(&vm->lock); + struct dma_fence *fence = NULL; - err = xe_vm_lock_vma(exec, vma); - if (err) - return err; + lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); switch (op->base.op) { case DRM_GPUVA_OP_MAP: - err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), - op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); + fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), + op->syncs, op->num_syncs, + op->tile_mask, + op->map.immediate || !xe_vm_in_fault_mode(vm), + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); break; case DRM_GPUVA_OP_REMAP: { @@ -2454,37 +2475,41 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, if (!op->remap.unmap_done) { if (prev || next) vma->gpuva.flags |= XE_VMA_FIRST_REBIND; - err = xe_vm_unbind(vm, vma, op->q, op->syncs, - op->num_syncs, - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST && - !prev && !next); - if (err) + fence = xe_vm_unbind(vm, vma, op->q, op->syncs, + op->num_syncs, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST && + !prev && !next); + if (IS_ERR(fence)) break; op->remap.unmap_done = true; } if (prev) { op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND; - err = xe_vm_bind(vm, op->remap.prev, op->q, - xe_vma_bo(op->remap.prev), op->syncs, - op->num_syncs, true, false, - op->flags & XE_VMA_OP_LAST && !next); + dma_fence_put(fence); + fence = xe_vm_bind(vm, op->remap.prev, op->q, + xe_vma_bo(op->remap.prev), op->syncs, + op->num_syncs, + op->remap.prev->tile_mask, true, + false, + op->flags & XE_VMA_OP_LAST && !next); op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND; - if (err) + if (IS_ERR(fence)) break; op->remap.prev = NULL; } if (next) { op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND; - err = xe_vm_bind(vm, op->remap.next, op->q, - xe_vma_bo(op->remap.next), - op->syncs, op->num_syncs, - true, false, - op->flags & XE_VMA_OP_LAST); + dma_fence_put(fence); + fence = xe_vm_bind(vm, op->remap.next, op->q, + xe_vma_bo(op->remap.next), + op->syncs, op->num_syncs, + op->remap.next->tile_mask, true, + false, op->flags & XE_VMA_OP_LAST); op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND; - if (err) + if (IS_ERR(fence)) break; op->remap.next = NULL; } @@ -2492,43 +2517,35 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, break; } case DRM_GPUVA_OP_UNMAP: - err = xe_vm_unbind(vm, vma, op->q, op->syncs, - op->num_syncs, op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); + fence = xe_vm_unbind(vm, vma, op->q, op->syncs, + op->num_syncs, op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); break; case DRM_GPUVA_OP_PREFETCH: - err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region, - op->syncs, op->num_syncs, - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); + fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } - if (err) + if (IS_ERR(fence)) trace_xe_vma_fail(vma); - return err; + return fence; } -static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vma_op *op) +static struct dma_fence * +__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) { - struct drm_exec exec; + struct dma_fence *fence; int err; retry_userptr: - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - drm_exec_until_all_locked(&exec) { - err = op_execute(&exec, vm, vma, op); - drm_exec_retry_on_contention(&exec); - if (err) - break; - } - drm_exec_fini(&exec); - - if (err == -EAGAIN) { + fence = op_execute(vm, vma, op); + if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) { lockdep_assert_held_write(&vm->lock); if (op->base.op == DRM_GPUVA_OP_REMAP) { @@ -2545,22 +2562,24 @@ retry_userptr: if (!err) goto retry_userptr; + fence = ERR_PTR(err); trace_xe_vma_fail(vma); } } - return err; + return fence; } -static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) +static struct dma_fence * +xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) { - int ret = 0; + struct dma_fence *fence = ERR_PTR(-ENOMEM); - lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->lock); switch (op->base.op) { case DRM_GPUVA_OP_MAP: - ret = __xe_vma_op_execute(vm, op->map.vma, op); + fence = __xe_vma_op_execute(vm, op->map.vma, op); break; case DRM_GPUVA_OP_REMAP: { @@ -2573,42 +2592,23 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) else vma = op->remap.next; - ret = __xe_vma_op_execute(vm, vma, op); + fence = __xe_vma_op_execute(vm, vma, op); break; } case DRM_GPUVA_OP_UNMAP: - ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), - op); + fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), + op); break; case DRM_GPUVA_OP_PREFETCH: - ret = __xe_vma_op_execute(vm, - gpuva_to_vma(op->base.prefetch.va), - op); + fence = __xe_vma_op_execute(vm, + gpuva_to_vma(op->base.prefetch.va), + op); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } - return ret; -} - -static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) -{ - bool last = op->flags & XE_VMA_OP_LAST; - - if (last) { - while (op->num_syncs--) - xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); - kfree(op->syncs); - if (op->q) - xe_exec_queue_put(op->q); - } - if (!list_empty(&op->link)) - list_del(&op->link); - if (op->ops) - drm_gpuva_ops_free(&vm->gpuvm, op->ops); - if (last) - xe_vm_put(vm); + return fence; } static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, @@ -2687,34 +2687,223 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, op->flags & XE_VMA_OP_PREV_COMMITTED, op->flags & XE_VMA_OP_NEXT_COMMITTED); } + } +} - drm_gpuva_ops_free(&vm->gpuvm, __ops); +static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, + bool validate) +{ + struct xe_bo *bo = xe_vma_bo(vma); + int err = 0; + + if (bo) { + if (!bo->vm) + err = drm_exec_lock_obj(exec, &bo->ttm.base); + if (!err && validate) + err = xe_bo_validate(bo, xe_vma_vm(vma), true); } + + return err; } -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, - struct list_head *ops_list) +static int check_ufence(struct xe_vma *vma) +{ + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return -EBUSY; + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + + return 0; +} + +static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, + struct xe_vma_op *op) +{ + int err = 0; + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + err = vma_lock_and_validate(exec, op->map.vma, + !xe_vm_in_fault_mode(vm) || + op->map.immediate); + break; + case DRM_GPUVA_OP_REMAP: + err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); + if (err) + break; + + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.remap.unmap->va), + false); + if (!err && op->remap.prev) + err = vma_lock_and_validate(exec, op->remap.prev, true); + if (!err && op->remap.next) + err = vma_lock_and_validate(exec, op->remap.next, true); + break; + case DRM_GPUVA_OP_UNMAP: + err = check_ufence(gpuva_to_vma(op->base.unmap.va)); + if (err) + break; + + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.unmap.va), + false); + break; + case DRM_GPUVA_OP_PREFETCH: + { + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + u32 region = op->prefetch.region; + + xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); + + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.prefetch.va), + false); + if (!err && !xe_vma_has_no_bo(vma)) + err = xe_bo_migrate(xe_vma_bo(vma), + region_to_mem_type[region]); + break; + } + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } + + return err; +} + +static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, + struct xe_vm *vm, + struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + int err; + + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (err) + return err; + + list_for_each_entry(op, &vops->list, link) { + err = op_lock_and_prep(exec, vm, op); + if (err) + return err; + } + + return 0; +} + +static struct dma_fence *ops_execute(struct xe_vm *vm, + struct xe_vma_ops *vops) { struct xe_vma_op *op, *next; + struct dma_fence *fence = NULL; + + list_for_each_entry_safe(op, next, &vops->list, link) { + dma_fence_put(fence); + fence = xe_vma_op_execute(vm, op); + if (IS_ERR(fence)) { + drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld", + op->base.op, PTR_ERR(fence)); + fence = ERR_PTR(-ENOSPC); + break; + } + } + + return fence; +} + +static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) +{ + if (vma->ufence) + xe_sync_ufence_put(vma->ufence); + vma->ufence = __xe_sync_ufence_get(ufence); +} + +static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, + struct xe_user_fence *ufence) +{ + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + vma_add_ufence(op->map.vma, ufence); + break; + case DRM_GPUVA_OP_REMAP: + if (op->remap.prev) + vma_add_ufence(op->remap.prev, ufence); + if (op->remap.next) + vma_add_ufence(op->remap.next, ufence); + break; + case DRM_GPUVA_OP_UNMAP: + break; + case DRM_GPUVA_OP_PREFETCH: + vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } +} + +static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, + struct dma_fence *fence) +{ + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); + struct xe_user_fence *ufence; + struct xe_vma_op *op; + int i; + + ufence = find_ufence_get(vops->syncs, vops->num_syncs); + list_for_each_entry(op, &vops->list, link) { + if (ufence) + op_add_ufence(vm, op, ufence); + + if (op->base.op == DRM_GPUVA_OP_UNMAP) + xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); + else if (op->base.op == DRM_GPUVA_OP_REMAP) + xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), + fence); + } + if (ufence) + xe_sync_ufence_put(ufence); + for (i = 0; i < vops->num_syncs; i++) + xe_sync_entry_signal(vops->syncs + i, fence); + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); + dma_fence_put(fence); +} + +static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, + struct xe_vma_ops *vops) +{ + struct drm_exec exec; + struct dma_fence *fence; int err; lockdep_assert_held_write(&vm->lock); - list_for_each_entry_safe(op, next, ops_list, link) { - err = xe_vma_op_execute(vm, op); - if (err) { - drm_warn(&vm->xe->drm, "VM op(%d) failed with %d", - op->base.op, err); - /* - * FIXME: Killing VM rather than proper error handling - */ - xe_vm_kill(vm); - return -ENOSPC; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); + drm_exec_retry_on_contention(&exec); + if (err) + goto unlock; + + fence = ops_execute(vm, vops); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + /* FIXME: Killing VM rather than proper error handling */ + xe_vm_kill(vm, false); + goto unlock; + } else { + vm_bind_ioctl_ops_fini(vm, vops, fence); } - xe_vma_op_cleanup(vm, op); } - return 0; +unlock: + drm_exec_fini(&exec); + return err; } #define SUPPORTED_FLAGS \ @@ -2862,6 +3051,58 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, return err; } +static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, + struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + memset(vops, 0, sizeof(*vops)); + INIT_LIST_HEAD(&vops->list); + vops->vm = vm; + vops->q = q; + vops->syncs = syncs; + vops->num_syncs = num_syncs; +} + +static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, + u64 addr, u64 range, u64 obj_offset, + u16 pat_index) +{ + u16 coh_mode; + + if (XE_IOCTL_DBG(xe, range > bo->size) || + XE_IOCTL_DBG(xe, obj_offset > + bo->size - range)) { + return -EINVAL; + } + + if (bo->flags & XE_BO_FLAG_INTERNAL_64K) { + if (XE_IOCTL_DBG(xe, obj_offset & + XE_64K_PAGE_MASK) || + XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || + XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { + return -EINVAL; + } + } + + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (bo->cpu_caching) { + if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && + bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { + return -EINVAL; + } + } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { + /* + * Imported dma-buf from a different device should + * require 1way or 2way coherency since we don't know + * how it was mapped on the CPU. Just assume is it + * potentially cached on CPU side. + */ + return -EINVAL; + } + + return 0; +} + int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -2875,7 +3116,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; - LIST_HEAD(ops_list); + struct xe_vma_ops vops; int err; int i; @@ -2945,7 +3186,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 obj = bind_ops[i].obj; u64 obj_offset = bind_ops[i].obj_offset; u16 pat_index = bind_ops[i].pat_index; - u16 coh_mode; if (!obj) continue; @@ -2957,40 +3197,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } bos[i] = gem_to_xe_bo(gem_obj); - if (XE_IOCTL_DBG(xe, range > bos[i]->size) || - XE_IOCTL_DBG(xe, obj_offset > - bos[i]->size - range)) { - err = -EINVAL; - goto put_obj; - } - - if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) { - if (XE_IOCTL_DBG(xe, obj_offset & - XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { - err = -EINVAL; - goto put_obj; - } - } - - coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); - if (bos[i]->cpu_caching) { - if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && - bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { - err = -EINVAL; - goto put_obj; - } - } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { - /* - * Imported dma-buf from a different device should - * require 1way or 2way coherency since we don't know - * how it was mapped on the CPU. Just assume is it - * potentially cached on CPU side. - */ - err = -EINVAL; + err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, + obj_offset, pat_index); + if (err) goto put_obj; - } } if (args->num_syncs) { @@ -3026,6 +3236,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto free_syncs; } + xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; @@ -3045,42 +3256,25 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs, - &ops_list, - i == args->num_binds - 1); + &vops, i == args->num_binds - 1); if (err) goto unwind_ops; } /* Nothing to do */ - if (list_empty(&ops_list)) { + if (list_empty(&vops.list)) { err = -ENODATA; goto unwind_ops; } - xe_vm_get(vm); - if (q) - xe_exec_queue_get(q); - - err = vm_bind_ioctl_ops_execute(vm, &ops_list); - - up_write(&vm->lock); - - if (q) - xe_exec_queue_put(q); - xe_vm_put(vm); - - for (i = 0; bos && i < args->num_binds; ++i) - xe_bo_put(bos[i]); - - kvfree(bos); - kvfree(ops); - if (args->num_binds > 1) - kvfree(bind_ops); - - return err; + err = vm_bind_ioctl_ops_execute(vm, &vops); unwind_ops: - vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); + if (err && err != -ENODATA) + vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); + for (i = args->num_binds - 1; i >= 0; --i) + if (ops[i]) + drm_gpuva_ops_free(&vm->gpuvm, ops[i]); free_syncs: if (err == -ENODATA) err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); @@ -3201,55 +3395,6 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) return 0; } -int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) -{ - struct drm_gpuva *gpuva; - bool is_vram; - uint64_t addr; - - if (!down_read_trylock(&vm->lock)) { - drm_printf(p, " Failed to acquire VM lock to dump capture"); - return 0; - } - if (vm->pt_root[gt_id]) { - addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE); - is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo); - drm_printf(p, " VM root: A:0x%llx %s\n", addr, - is_vram ? "VRAM" : "SYS"); - } - - drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { - struct xe_vma *vma = gpuva_to_vma(gpuva); - bool is_userptr = xe_vma_is_userptr(vma); - bool is_null = xe_vma_is_null(vma); - - if (is_null) { - addr = 0; - } else if (is_userptr) { - struct sg_table *sg = to_userptr_vma(vma)->userptr.sg; - struct xe_res_cursor cur; - - if (sg) { - xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur); - addr = xe_res_dma(&cur); - } else { - addr = 0; - } - } else { - addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE); - is_vram = xe_bo_is_vram(xe_vma_bo(vma)); - } - drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", - xe_vma_start(vma), xe_vma_end(vma) - 1, - xe_vma_size(vma), - addr, is_null ? "NULL" : is_userptr ? "USR" : - is_vram ? "VRAM" : "SYS"); - } - up_read(&vm->lock); - - return 0; -} - struct xe_vm_snapshot { unsigned long num_snaps; struct { @@ -3339,7 +3484,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) } if (bo) { - dma_resv_lock(bo->ttm.base.resv, NULL); + xe_bo_lock(bo, false); err = ttm_bo_vmap(&bo->ttm, &src); if (!err) { xe_map_memcpy_from(xe_bo_device(bo), @@ -3348,7 +3493,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) snap->snap[i].len); ttm_bo_vunmap(&bo->ttm, &src); } - dma_resv_unlock(bo->ttm.base.resv); + xe_bo_unlock(bo); } else { void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; |
