diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_exec_queue.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue.c | 495 |
1 files changed, 310 insertions, 185 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 49223026c89f..7e1abbbfba12 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -8,13 +8,16 @@ #include <linux/nospec.h> #include <drm/drm_device.h> +#include <drm/drm_drv.h> #include <drm/drm_file.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" +#include "xe_irq.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_migrate.h" @@ -30,21 +33,34 @@ enum xe_exec_queue_sched_prop { XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, }; -static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, - struct xe_vm *vm, - u32 logical_mask, - u16 width, struct xe_hw_engine *hwe, - u32 flags) +static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, + u64 extensions, int ext_number); + +static void __xe_exec_queue_free(struct xe_exec_queue *q) +{ + if (q->vm) + xe_vm_put(q->vm); + + if (q->xef) + xe_file_put(q->xef); + + kfree(q); +} + +static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, + struct xe_vm *vm, + u32 logical_mask, + u16 width, struct xe_hw_engine *hwe, + u32 flags, u64 extensions) { struct xe_exec_queue *q; struct xe_gt *gt = hwe->gt; int err; - int i; /* only kernel queues can be permanent */ XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); - q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL); + q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); if (!q) return ERR_PTR(-ENOMEM); @@ -52,87 +68,111 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, q->flags = flags; q->hwe = hwe; q->gt = gt; - if (vm) - q->vm = xe_vm_get(vm); q->class = hwe->class; q->width = width; + q->msix_vec = XE_IRQ_DEFAULT_MSIX; q->logical_mask = logical_mask; q->fence_irq = >->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; - INIT_LIST_HEAD(&q->compute.link); + INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); + INIT_LIST_HEAD(&q->hw_engine_group_link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = hwe->eclass->sched_props.preempt_timeout_us; + q->sched_props.job_timeout_ms = + hwe->eclass->sched_props.job_timeout_ms; if (q->flags & EXEC_QUEUE_FLAG_KERNEL && q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; else q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; - if (xe_exec_queue_is_parallel(q)) { - q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); - q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; - } - if (q->flags & EXEC_QUEUE_FLAG_VM) { - q->bind.fence_ctx = dma_fence_context_alloc(1); - q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO; + if (vm) + q->vm = xe_vm_get(vm); + + if (extensions) { + /* + * may set q->usm, must come before xe_lrc_create(), + * may overwrite q->sched_props, must come before q->ops->init() + */ + err = exec_queue_user_extensions(xe, q, extensions, 0); + if (err) { + __xe_exec_queue_free(q); + return ERR_PTR(err); + } } - for (i = 0; i < width; ++i) { - err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K); + return q; +} + +static int __xe_exec_queue_init(struct xe_exec_queue *q) +{ + struct xe_vm *vm = q->vm; + int i, err; + + if (vm) { + err = xe_vm_lock(vm, true); if (err) - goto err_lrc; + return err; + } + + for (i = 0; i < q->width; ++i) { + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); + if (IS_ERR(q->lrc[i])) { + err = PTR_ERR(q->lrc[i]); + goto err_unlock; + } } + if (vm) + xe_vm_unlock(vm); + err = q->ops->init(q); if (err) goto err_lrc; - /* - * Normally the user vm holds an rpm ref to keep the device - * awake, and the context holds a ref for the vm, however for - * some engines we use the kernels migrate vm underneath which offers no - * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we - * can perform GuC CT actions when needed. Caller is expected to have - * already grabbed the rpm ref outside any sensitive locks. - */ - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm)) - drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); - - return q; + return 0; +err_unlock: + if (vm) + xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) - xe_lrc_finish(q->lrc + i); - kfree(q); - return ERR_PTR(err); + xe_lrc_put(q->lrc[i]); + return err; } struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, - struct xe_hw_engine *hwe, u32 flags) + struct xe_hw_engine *hwe, u32 flags, + u64 extensions) { struct xe_exec_queue *q; int err; - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - return ERR_PTR(err); - } - q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags); - if (vm) - xe_vm_unlock(vm); + q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, + extensions); + if (IS_ERR(q)) + return q; + + err = __xe_exec_queue_init(q); + if (err) + goto err_post_alloc; return q; + +err_post_alloc: + __xe_exec_queue_free(q); + return ERR_PTR(err); } struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags) + enum xe_engine_class class, + u32 flags, u64 extensions) { struct xe_hw_engine *hwe, *hwe0 = NULL; enum xe_hw_engine_id id; @@ -152,9 +192,59 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe if (!logical_mask) return ERR_PTR(-ENODEV); - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags); + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); } +/** + * xe_exec_queue_create_bind() - Create bind exec queue. + * @xe: Xe device. + * @tile: tile which bind exec queue belongs to. + * @flags: exec queue creation flags + * @extensions: exec queue creation extensions + * + * Normalize bind exec queue creation. Bind exec queue is tied to migration VM + * for access to physical memory required for page table programming. On a + * faulting devices the reserved copy engine instance must be used to avoid + * deadlocking (user binds cannot get stuck behind faults as kernel binds which + * resolve faults depend on user binds). On non-faulting devices any copy engine + * can be used. + * + * Returns exec queue on success, ERR_PTR on failure + */ +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions) +{ + struct xe_gt *gt = tile->primary_gt; + struct xe_exec_queue *q; + struct xe_vm *migrate_vm; + + migrate_vm = xe_migrate_get_vm(tile->migrate); + if (xe->info.has_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + XE_ENGINE_CLASS_COPY, + gt->usm.reserved_bcs_instance, + false); + + if (!hwe) { + xe_vm_put(migrate_vm); + return ERR_PTR(-EINVAL); + } + + q = xe_exec_queue_create(xe, migrate_vm, + BIT(hwe->logical_instance), 1, hwe, + flags, extensions); + } else { + q = xe_exec_queue_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, flags, + extensions); + } + xe_vm_put(migrate_vm); + + return q; +} +ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); + void xe_exec_queue_destroy(struct kref *ref) { struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); @@ -174,36 +264,40 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) { int i; + /* + * Before releasing our ref to lrc and xef, accumulate our run ticks + * and wakeup any waiters. + */ + xe_exec_queue_update_run_ticks(q); + if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) + wake_up_var(&q->xef->exec_queue.pending_removal); + for (i = 0; i < q->width; ++i) - xe_lrc_finish(q->lrc + i); - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) - xe_device_mem_access_put(gt_to_xe(q->gt)); - if (q->vm) - xe_vm_put(q->vm); + xe_lrc_put(q->lrc[i]); - kfree(q); + __xe_exec_queue_free(q); } void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) { switch (q->class) { case XE_ENGINE_CLASS_RENDER: - sprintf(q->name, "rcs%d", instance); + snprintf(q->name, sizeof(q->name), "rcs%d", instance); break; case XE_ENGINE_CLASS_VIDEO_DECODE: - sprintf(q->name, "vcs%d", instance); + snprintf(q->name, sizeof(q->name), "vcs%d", instance); break; case XE_ENGINE_CLASS_VIDEO_ENHANCE: - sprintf(q->name, "vecs%d", instance); + snprintf(q->name, sizeof(q->name), "vecs%d", instance); break; case XE_ENGINE_CLASS_COPY: - sprintf(q->name, "bcs%d", instance); + snprintf(q->name, sizeof(q->name), "bcs%d", instance); break; case XE_ENGINE_CLASS_COMPUTE: - sprintf(q->name, "ccs%d", instance); + snprintf(q->name, sizeof(q->name), "ccs%d", instance); break; case XE_ENGINE_CLASS_OTHER: - sprintf(q->name, "gsccs%d", instance); + snprintf(q->name, sizeof(q->name), "gsccs%d", instance); break; default: XE_WARN_ON(q->class); @@ -231,7 +325,7 @@ xe_exec_queue_device_get_max_priority(struct xe_device *xe) } static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) + u64 value) { if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) return -EINVAL; @@ -239,7 +333,8 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) return -EPERM; - return q->ops->set_priority(q, value); + q->sched_props.priority = value; + return 0; } static bool xe_exec_queue_enforce_schedule_limit(void) @@ -295,7 +390,7 @@ xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, } static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) + u64 value) { u32 min = 0, max = 0; @@ -306,12 +401,13 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * !xe_hw_engine_timeout_in_range(value, min, max)) return -EINVAL; - return q->ops->set_timeslice(q, value); + q->sched_props.timeslice_us = value; + return 0; } typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create); + u64 value); static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, @@ -320,8 +416,7 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { static int exec_queue_user_ext_set_property(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension, - bool create) + u64 extension) { u64 __user *address = u64_to_user_ptr(extension); struct drm_xe_ext_set_property ext; @@ -343,21 +438,20 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (!exec_queue_set_property_funcs[idx]) return -EINVAL; - return exec_queue_set_property_funcs[idx](xe, q, ext.value, create); + return exec_queue_set_property_funcs[idx](xe, q, ext.value); } typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension, - bool create); + u64 extension); -static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = { +static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, }; #define MAX_USER_EXTENSIONS 16 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number, bool create) + u64 extensions, int ext_number) { u64 __user *address = u64_to_user_ptr(extensions); struct drm_xe_user_extension ext; @@ -378,74 +472,17 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue idx = array_index_nospec(ext.name, ARRAY_SIZE(exec_queue_user_extension_funcs)); - err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create); + err = exec_queue_user_extension_funcs[idx](xe, q, extensions); if (XE_IOCTL_DBG(xe, err)) return err; if (ext.next_extension) return exec_queue_user_extensions(xe, q, ext.next_extension, - ++ext_number, create); + ++ext_number); return 0; } -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) @@ -468,7 +505,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, n = j * width + i; - hwe = find_hw_engine(xe, eci[n]); + hwe = xe_hw_engine_lookup(xe, eci[n]); if (XE_IOCTL_DBG(xe, !hwe)) return 0; @@ -507,8 +544,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_class_instance __user *user_eci = u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; - struct xe_vm *vm, *migrate_vm; + struct xe_vm *vm; struct xe_gt *gt; + struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; u32 id; @@ -533,38 +571,20 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - for_each_gt(gt, xe, id) { - struct xe_exec_queue *new; - - if (xe_gt_is_media_type(gt)) - continue; - - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_device_mem_access_get(xe); + if (XE_IOCTL_DBG(xe, args->width != 1) || + XE_IOCTL_DBG(xe, args->num_placements != 1) || + XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) + return -EINVAL; - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, - EXEC_QUEUE_FLAG_PERSISTENT | - EXEC_QUEUE_FLAG_VM | - (id ? - EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : - 0)); + for_each_tile(tile, xe, id) { + struct xe_exec_queue *new; + u32 flags = EXEC_QUEUE_FLAG_VM; - xe_device_mem_access_put(xe); /* now held by engine */ + if (id) + flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - xe_vm_put(migrate_vm); + new = xe_exec_queue_create_bind(xe, tile, flags, + args->extensions); if (IS_ERR(new)) { err = PTR_ERR(new); if (q) @@ -585,7 +605,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; - hwe = find_hw_engine(xe, eci[0]); + hwe = xe_hw_engine_lookup(xe, eci[0]); if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; @@ -606,31 +626,32 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, 0); + args->width, hwe, 0, + args->extensions); up_read(&vm->lock); xe_vm_put(vm); if (IS_ERR(q)) return PTR_ERR(q); if (xe_vm_in_preempt_fence_mode(vm)) { - q->compute.context = dma_fence_context_alloc(1); - spin_lock_init(&q->compute.lock); + q->lr.context = dma_fence_context_alloc(1); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) goto put_exec_queue; } - } - if (args->extensions) { - err = exec_queue_user_extensions(xe, q, args->extensions, 0, true); - if (XE_IOCTL_DBG(xe, err)) - goto kill_exec_queue; + if (q->vm && q->hwe->hw_engine_group) { + err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); + if (err) + goto put_exec_queue; + } } - mutex_lock(&xef->exec_queue.lock); + q->xef = xe_file_get(xef); + + /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->exec_queue.lock); if (err) goto kill_exec_queue; @@ -663,7 +684,7 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, switch (args->property) { case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: - args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED); + args->value = q->ops->reset_status(q); ret = 0; break; default: @@ -689,7 +710,7 @@ bool xe_exec_queue_is_lr(struct xe_exec_queue *q) static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) { - return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1; + return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; } /** @@ -700,7 +721,7 @@ static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) */ bool xe_exec_queue_ring_full(struct xe_exec_queue *q) { - struct xe_lrc *lrc = q->lrc; + struct xe_lrc *lrc = q->lrc[0]; s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; return xe_exec_queue_num_job_inflight(q) >= max_job; @@ -726,18 +747,67 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) int i; for (i = 0; i < q->width; ++i) { - if (xe_lrc_seqno(&q->lrc[i]) != - q->lrc[i].fence_ctx.next_seqno - 1) + if (xe_lrc_seqno(q->lrc[i]) != + q->lrc[i]->fence_ctx.next_seqno - 1) return false; } return true; } - return xe_lrc_seqno(&q->lrc[0]) == - q->lrc[0].fence_ctx.next_seqno - 1; + return xe_lrc_seqno(q->lrc[0]) == + q->lrc[0]->fence_ctx.next_seqno - 1; } +/** + * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue + * from hw + * @q: The exec queue + * + * Update the timestamp saved by HW for this exec queue and save run ticks + * calculated by using the delta from last update. + */ +void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) +{ + struct xe_device *xe = gt_to_xe(q->gt); + struct xe_lrc *lrc; + u32 old_ts, new_ts; + int idx; + + /* + * Jobs that are executed by kernel doesn't have a corresponding xe_file + * and thus are not accounted. + */ + if (!q->xef) + return; + + /* Synchronize with unbind while holding the xe file open */ + if (!drm_dev_enter(&xe->drm, &idx)) + return; + /* + * Only sample the first LRC. For parallel submission, all of them are + * scheduled together and we compensate that below by multiplying by + * width - this may introduce errors if that premise is not true and + * they don't exit 100% aligned. On the other hand, looping through + * the LRCs and reading them in different time could also introduce + * errors. + */ + lrc = q->lrc[0]; + new_ts = xe_lrc_update_timestamp(lrc, &old_ts); + q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; + + drm_dev_exit(idx); +} + +/** + * xe_exec_queue_kill - permanently stop all execution from an exec queue + * @q: The exec queue + * + * This function permanently stops all activity on an exec queue. If the queue + * is actively executing on the HW, it will be kicked off the engine; any + * pending jobs are discarded and all future submissions are rejected. + * This function is safe to call multiple times. + */ void xe_exec_queue_kill(struct xe_exec_queue *q) { struct xe_exec_queue *eq = q, *next; @@ -766,10 +836,16 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, mutex_lock(&xef->exec_queue.lock); q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); + if (q) + atomic_inc(&xef->exec_queue.pending_removal); mutex_unlock(&xef->exec_queue.lock); + if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); @@ -781,10 +857,12 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) + if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); - else + } else { xe_vm_assert_held(vm); + lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); + } } /** @@ -796,10 +874,7 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) { xe_exec_queue_last_fence_lockdep_assert(q, vm); - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } + xe_exec_queue_last_fence_put_unlocked(q); } /** @@ -842,6 +917,33 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, } /** + * xe_exec_queue_last_fence_get_for_resume() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, takes a ref. Only safe to be called in the context of + * resuming the hw engine group's long-running exec queue, when the group + * semaphore is held. + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + struct dma_fence *fence; + + lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put_unlocked(q); + + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + +/** * xe_exec_queue_last_fence_set() - Set last fence * @q: The exec queue * @vm: The VM the engine does a bind or exec for @@ -858,3 +960,26 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, xe_exec_queue_last_fence_put(q, vm); q->last_fence = dma_fence_get(fence); } + +/** + * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Returns: + * -ETIME if there exists an unsignalled last fence dependency, zero otherwise. + */ +int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) +{ + struct dma_fence *fence; + int err = 0; + + fence = xe_exec_queue_last_fence_get(q, vm); + if (fence) { + err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ? + 0 : -ETIME; + dma_fence_put(fence); + } + + return err; +} |