diff options
-rw-r--r-- | drivers/gpu/drm/xe/xe_engine.c | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_trace.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 104 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.h | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm_madvise.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm_types.h | 10 |
7 files changed, 107 insertions, 46 deletions
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index f1b8b22e0216..af75c9a0ea7b 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -597,10 +597,23 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, !vm)) return -ENOENT; + err = down_read_interruptible(&vm->lock); + if (err) { + xe_vm_put(vm); + return err; + } + + if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { + up_read(&vm->lock); + xe_vm_put(vm); + return -ENOENT; + } + e = xe_engine_create(xe, vm, logical_mask, args->width, hwe, xe_vm_no_dma_fences(vm) ? 0 : ENGINE_FLAG_PERSISTENT); + up_read(&vm->lock); xe_vm_put(vm); if (IS_ERR(e)) return PTR_ERR(e); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index c52edff9a358..bdf00e59e7a4 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -297,9 +297,9 @@ retry: if (err) goto err_unlock_list; - if (xe_vm_is_closed(engine->vm)) { - drm_warn(&xe->drm, "Trying to schedule after vm is closed\n"); - err = -EIO; + if (xe_vm_is_closed_or_banned(engine->vm)) { + drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n"); + err = -ECANCELED; goto err_engine_end; } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 8a5d35f15791..d5894570f196 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -478,6 +478,11 @@ DECLARE_EVENT_CLASS(xe_vm, __entry->asid) ); +DEFINE_EVENT(xe_vm, xe_vm_kill, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + DEFINE_EVENT(xe_vm, xe_vm_create, TP_PROTO(struct xe_vm *vm), TP_ARGS(vm) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a9cf62f7aac6..47e3d37b757c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -514,6 +514,24 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm, #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 +static void xe_vm_kill(struct xe_vm *vm) +{ + struct ww_acquire_ctx ww; + struct xe_engine *e; + + lockdep_assert_held(&vm->lock); + + xe_vm_lock(vm, &ww, 0, false); + vm->flags |= XE_VM_FLAG_BANNED; + trace_xe_vm_kill(vm); + + list_for_each_entry(e, &vm->preempt.engines, compute.link) + e->ops->kill(e); + xe_vm_unlock(vm, &ww); + + /* TODO: Inform user the VM is banned */ +} + static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); @@ -533,13 +551,14 @@ static void preempt_rebind_work_func(struct work_struct *w) XE_BUG_ON(!xe_vm_in_compute_mode(vm)); trace_xe_vm_rebind_worker_enter(vm); - if (xe_vm_is_closed(vm)) { + down_write(&vm->lock); + + if (xe_vm_is_closed_or_banned(vm)) { + up_write(&vm->lock); trace_xe_vm_rebind_worker_exit(vm); return; } - down_write(&vm->lock); - retry: if (vm->async_ops.error) goto out_unlock_outer; @@ -666,11 +685,14 @@ out_unlock_outer: goto retry; } } + if (err) { + drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); + xe_vm_kill(vm); + } up_write(&vm->lock); free_preempt_fences(&preempt_fences); - XE_WARN_ON(err < 0); /* TODO: Kill VM or put in error state */ trace_xe_vm_rebind_worker_exit(vm); } @@ -1140,11 +1162,12 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma) { struct rb_node *node; - if (xe_vm_is_closed(vm)) + lockdep_assert_held(&vm->lock); + + if (xe_vm_is_closed_or_banned(vm)) return NULL; XE_BUG_ON(vma->end >= vm->size); - lockdep_assert_held(&vm->lock); node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb); @@ -1377,6 +1400,13 @@ mm_closed: wake_up_all(&vm->async_ops.error_capture.wq); } +static void xe_vm_close(struct xe_vm *vm) +{ + down_write(&vm->lock); + vm->size = 0; + up_write(&vm->lock); +} + void xe_vm_close_and_put(struct xe_vm *vm) { struct rb_root contested = RB_ROOT; @@ -1387,8 +1417,8 @@ void xe_vm_close_and_put(struct xe_vm *vm) XE_BUG_ON(vm->preempt.num_engines); - vm->size = 0; - smp_mb(); + xe_vm_close(vm); + flush_async_ops(vm); if (xe_vm_in_compute_mode(vm)) flush_work(&vm->preempt.rebind_work); @@ -3072,30 +3102,34 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) return err; - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) { - err = -EINVAL; - goto free_objs; - } - - if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { - drm_err(dev, "VM closed while we began looking up?\n"); - err = -ENOENT; - goto put_vm; - } - if (args->engine_id) { e = xe_engine_lookup(xef, args->engine_id); if (XE_IOCTL_ERR(xe, !e)) { err = -ENOENT; - goto put_vm; + goto free_objs; } + if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) { err = -EINVAL; goto put_engine; } } + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) { + err = -EINVAL; + goto put_engine; + } + + err = down_write_killable(&vm->lock); + if (err) + goto put_vm; + + if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { + err = -ENOENT; + goto release_vm_lock; + } + if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) { if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) err = -EOPNOTSUPP; @@ -3105,10 +3139,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) err = -EPROTO; if (!err) { - down_write(&vm->lock); trace_xe_vm_restart(vm); vm_set_async_error(vm, 0); - up_write(&vm->lock); queue_work(system_unbound_wq, &vm->async_ops.work); @@ -3117,13 +3149,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) xe_vm_queue_rebind_worker(vm); } - goto put_engine; + goto release_vm_lock; } if (XE_IOCTL_ERR(xe, !vm->async_ops.error && async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) { err = -EOPNOTSUPP; - goto put_engine; + goto release_vm_lock; } for (i = 0; i < args->num_binds; ++i) { @@ -3133,7 +3165,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_ERR(xe, range > vm->size) || XE_IOCTL_ERR(xe, addr > vm->size - range)) { err = -EINVAL; - goto put_engine; + goto release_vm_lock; } if (bind_ops[i].tile_mask) { @@ -3142,7 +3174,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_ERR(xe, bind_ops[i].tile_mask & ~valid_tiles)) { err = -EINVAL; - goto put_engine; + goto release_vm_lock; } } } @@ -3150,13 +3182,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL); if (!bos) { err = -ENOMEM; - goto put_engine; + goto release_vm_lock; } vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL); if (!vmas) { err = -ENOMEM; - goto put_engine; + goto release_vm_lock; } for (i = 0; i < args->num_binds; ++i) { @@ -3211,10 +3243,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto free_syncs; } - err = down_write_killable(&vm->lock); - if (err) - goto free_syncs; - /* Do some error checking first to make the unwind easier */ for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; @@ -3223,7 +3251,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op); if (err) - goto release_vm_lock; + goto free_syncs; } for (i = 0; i < args->num_binds; ++i) { @@ -3343,8 +3371,6 @@ destroy_vmas: break; } } -release_vm_lock: - up_write(&vm->lock); free_syncs: while (num_syncs--) { if (async && j && @@ -3357,11 +3383,13 @@ free_syncs: put_obj: for (i = j; i < args->num_binds; ++i) xe_bo_put(bos[i]); +release_vm_lock: + up_write(&vm->lock); +put_vm: + xe_vm_put(vm); put_engine: if (e) xe_engine_put(e); -put_vm: - xe_vm_put(vm); free_objs: kfree(bos); kfree(vmas); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 5edb7771629c..02b409dd77d5 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -45,10 +45,21 @@ void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww); static inline bool xe_vm_is_closed(struct xe_vm *vm) { - /* Only guaranteed not to change when vm->resv is held */ + /* Only guaranteed not to change when vm->lock is held */ return !vm->size; } +static inline bool xe_vm_is_banned(struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_BANNED; +} + +static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm) +{ + lockdep_assert_held(&vm->lock); + return xe_vm_is_closed(vm) || xe_vm_is_banned(vm); +} + struct xe_vma * xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 6c196431a60e..670c80c1f0a3 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -313,7 +313,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, !vm)) return -EINVAL; - if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { + if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { err = -ENOENT; goto put_vm; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index c148dd49a6ca..3c885211a8d1 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -176,15 +176,19 @@ struct xe_vm { struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE]; struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL]; - /** @flags: flags for this VM, statically setup a creation time */ + /** + * @flags: flags for this VM, statically setup a creation time aside + * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely + */ #define XE_VM_FLAGS_64K BIT(0) #define XE_VM_FLAG_COMPUTE_MODE BIT(1) #define XE_VM_FLAG_ASYNC_BIND_OPS BIT(2) #define XE_VM_FLAG_MIGRATION BIT(3) #define XE_VM_FLAG_SCRATCH_PAGE BIT(4) #define XE_VM_FLAG_FAULT_MODE BIT(5) -#define XE_VM_FLAG_GT_ID(flags) (((flags) >> 6) & 0x3) -#define XE_VM_FLAG_SET_TILE_ID(tile) ((tile)->id << 6) +#define XE_VM_FLAG_BANNED BIT(6) +#define XE_VM_FLAG_GT_ID(flags) (((flags) >> 7) & 0x3) +#define XE_VM_FLAG_SET_TILE_ID(tile) ((tile)->id << 7) unsigned long flags; /** @composite_fence_ctx: context composite fence */ |