diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_exec.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec.c | 95 |
1 files changed, 59 insertions, 36 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index cc5e0f75de3c..44364c042ad7 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -8,12 +8,13 @@ #include <drm/drm_device.h> #include <drm/drm_exec.h> #include <drm/drm_file.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include <linux/delay.h> #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_hw_engine_group.h" #include "xe_macros.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" @@ -32,7 +33,7 @@ * * In XE we avoid all of this complication by not allowing a BO list to be * passed into an exec, using the dma-buf implicit sync uAPI, have binds as - * seperate operations, and using the DRM scheduler to flow control the ring. + * separate operations, and using the DRM scheduler to flow control the ring. * Let's deep dive on each of these. * * We can get away from a BO list by forcing the user to use in / out fences on @@ -40,11 +41,6 @@ * user knows an exec writes to a BO and reads from the BO in the next exec, it * is the user's responsibility to pass in / out fence between the two execs). * - * Implicit dependencies for external BOs are handled by using the dma-buf - * implicit dependency uAPI (TODO: add link). To make this works each exec must - * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external - * BO mapped in the VM. - * * We do not allow a user to trigger a bind at exec time rather we have a VM * bind IOCTL which uses the same in / out fence interface as exec. In that * sense, a VM bind is basically the same operation as an exec from the user @@ -58,8 +54,8 @@ * behind any pending kernel operations on any external BOs in VM or any BOs * private to the VM. This is accomplished by the rebinds waiting on BOs * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs - * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and - * in DMA_RESV_USAGE_WRITE for external BOs). + * slots (inflight execs are in the DMA_RESV_USAGE_BOOKKEEP for private BOs and + * for external BOs). * * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute * mode VMs we use preempt fences and a rebind worker (TODO: add link). @@ -118,12 +114,14 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; - u32 i, num_syncs = 0, num_ufence = 0; + u32 i, num_syncs, num_ufence = 0; struct xe_sched_job *job; struct xe_vm *vm; bool write_locked, skip_retry = false; ktime_t end = 0; int err = 0; + struct xe_hw_engine_group *group; + enum xe_hw_engine_group_execution_mode mode, previous_mode; if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || @@ -134,14 +132,18 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) { + err = -EINVAL; + goto err_exec_queue; + } if (XE_IOCTL_DBG(xe, args->num_batch_buffer && - q->width != args->num_batch_buffer)) - return -EINVAL; + q->width != args->num_batch_buffer)) { + err = -EINVAL; + goto err_exec_queue; + } - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) { + if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) { err = -ECANCELED; goto err_exec_queue; } @@ -156,15 +158,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) vm = q->vm; - for (i = 0; i < args->num_syncs; i++) { - err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], - &syncs_user[i], SYNC_PARSE_FLAG_EXEC | + for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], + &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) goto err_syncs; - if (xe_sync_is_ufence(&syncs[i])) + if (xe_sync_is_ufence(&syncs[num_syncs])) num_ufence++; } @@ -174,14 +176,23 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (xe_exec_queue_is_parallel(q)) { - err = __copy_from_user(addresses, addresses_user, sizeof(u64) * - q->width); + err = copy_from_user(addresses, addresses_user, sizeof(u64) * + q->width); if (err) { err = -EFAULT; goto err_syncs; } } + group = q->hwe->hw_engine_group; + mode = xe_hw_engine_group_find_exec_mode(q); + + if (mode == EXEC_MODE_DMA_FENCE) { + err = xe_hw_engine_group_get_mode(group, mode, &previous_mode); + if (err) + goto err_syncs; + } + retry: if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { err = down_write_killable(&vm->lock); @@ -192,7 +203,7 @@ retry: write_locked = false; } if (err) - goto err_syncs; + goto err_hw_exec_mode; if (write_locked) { err = xe_vm_userptr_pin(vm); @@ -213,10 +224,11 @@ retry: fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm); if (IS_ERR(fence)) { err = PTR_ERR(fence); + xe_vm_unlock(vm); goto err_unlock_list; } for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); xe_exec_queue_last_fence_set(q, vm, fence); dma_fence_put(fence); } @@ -250,6 +262,12 @@ retry: goto err_exec; } + if (xe_exec_queue_uses_pxp(q)) { + err = xe_vm_validate_protected(q->vm); + if (err) + goto err_exec; + } + job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ? addresses : &args->address); if (IS_ERR(job)) { @@ -259,9 +277,9 @@ retry: /* Wait behind rebinds */ if (!xe_vm_in_lr_mode(vm)) { - err = drm_sched_job_add_resv_dependencies(&job->drm, - xe_vm_resv(vm), - DMA_RESV_USAGE_KERNEL); + err = xe_sched_job_add_deps(job, + xe_vm_resv(vm), + DMA_RESV_USAGE_KERNEL); if (err) goto err_put_job; } @@ -292,11 +310,13 @@ retry: xe_sched_job_arm(job); if (!xe_vm_in_lr_mode(vm)) drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_BOOKKEEP, + DMA_RESV_USAGE_BOOKKEEP); - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], job, - &job->drm.s_fence->finished); + for (i = 0; i < num_syncs; i++) { + xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished); + xe_sched_job_init_user_fence(job, &syncs[i]); + } if (xe_exec_queue_is_lr(q)) q->ring_ops->emit_job(job); @@ -311,6 +331,9 @@ retry: spin_unlock(&xe->ttm.lru_lock); } + if (mode == EXEC_MODE_LR) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + err_repin: if (!xe_vm_in_lr_mode(vm)) up_read(&vm->userptr.notifier_lock); @@ -320,15 +343,15 @@ err_put_job: err_exec: drm_exec_fini(exec); err_unlock_list: - if (write_locked) - up_write(&vm->lock); - else - up_read(&vm->lock); + up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) goto retry; +err_hw_exec_mode: + if (mode == EXEC_MODE_DMA_FENCE) + xe_hw_engine_group_put(group); err_syncs: - for (i = 0; i < num_syncs; i++) - xe_sync_entry_cleanup(&syncs[i]); + while (num_syncs--) + xe_sync_entry_cleanup(&syncs[num_syncs]); kfree(syncs); err_exec_queue: xe_exec_queue_put(q); |