diff options
author | Matthew Brost <matthew.brost@intel.com> | 2023-09-14 13:40:51 -0700 |
---|---|---|
committer | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2023-12-21 11:43:18 -0500 |
commit | e669f10cd3182943058fa84b1e81f3727f6e0520 (patch) | |
tree | 057932b144e3569787060034c4f965440eb66143 /drivers/gpu/drm | |
parent | f3e9b1f43458746e7e0211dbe4289412e5c0d16a (diff) |
drm/xe: Fix VM bind out-sync signaling ordering
A case existed where an out-sync of a later VM bind operation could
signal before a previous one if the later operation results in a NOP
(e.g. a unbind or prefetch to a VA range without any mappings). This
breaks the ordering rules, fix this. This patch also lays the groundwork
for users to pass in num_binds == 0 and out-syncs.
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue.c | 75 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue.h | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue_types.h | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 45 |
4 files changed, 125 insertions, 8 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 9b373b9ea472..8e0620cb89e5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -156,6 +156,7 @@ void xe_exec_queue_destroy(struct kref *ref) struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + xe_exec_queue_last_fence_put_unlocked(q); if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { list_for_each_entry_safe(eq, next, &q->multi_gt_list, multi_gt_link) @@ -916,3 +917,77 @@ out: return ret; } + +static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + lockdep_assert_held_write(&vm->lock); +} + +/** + * xe_exec_queue_last_fence_put() - Drop ref to last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + */ +void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + + if (q->last_fence) { + dma_fence_put(q->last_fence); + q->last_fence = NULL; + } +} + +/** + * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked + * @q: The exec queue + * + * Only safe to be called from xe_exec_queue_destroy(). + */ +void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) +{ + if (q->last_fence) { + dma_fence_put(q->last_fence); + q->last_fence = NULL; + } +} + +/** + * xe_exec_queue_last_fence_get() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, does not take a ref + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put(q, vm); + + return q->last_fence ? q->last_fence : dma_fence_get_stub(); +} + +/** + * xe_exec_queue_last_fence_set() - Set last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * @fence: The fence + * + * Set the last fence for the engine. Increases reference count for fence, when + * closing engine xe_exec_queue_last_fence_put should be called. + */ +void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, + struct dma_fence *fence) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + + xe_exec_queue_last_fence_put(q, vm); + q->last_fence = dma_fence_get(fence); +} diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 22499a2f522b..533da1b0c457 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -61,4 +61,11 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file); enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe); +void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); +void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e); +struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, + struct xe_vm *vm); +void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, + struct dma_fence *fence); + #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 4e382304010e..6826feb650f3 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -65,6 +65,12 @@ struct xe_exec_queue { /** @fence_irq: fence IRQ used to signal job completion */ struct xe_hw_fence_irq *fence_irq; + /** + * @last_fence: last fence on engine, protected by vm->lock in write + * mode if bind engine + */ + struct dma_fence *last_fence; + /* queue no longer allowed to submit */ #define EXEC_QUEUE_FLAG_BANNED BIT(0) /* queue used for kernel submission only */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 89df50f49e11..4c8d77c4c7c0 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1524,6 +1524,13 @@ void xe_vm_close_and_put(struct xe_vm *vm) if (xe_vm_in_compute_mode(vm)) flush_work(&vm->preempt.rebind_work); + down_write(&vm->lock); + for_each_tile(tile, xe, id) { + if (vm->q[id]) + xe_exec_queue_last_fence_put(vm->q[id], vm); + } + up_write(&vm->lock); + for_each_tile(tile, xe, id) { if (vm->q[id]) { xe_exec_queue_kill(vm->q[id]); @@ -1665,16 +1672,23 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); } +static struct xe_exec_queue * +to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + return q ? q : vm->q[0]; +} + static struct dma_fence * xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op) { + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); struct xe_tile *tile; struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - struct xe_vm *vm = xe_vma_vm(vma); int cur_fence = 0, i; int number_tiles = hweight8(vma->tile_present); int err; @@ -1727,7 +1741,8 @@ next: cf ? &cf->base : fence); } - return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence; + return cf ? &cf->base : !fence ? + xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; err_fences: if (fences) { @@ -1826,6 +1841,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, bool last_op) { struct dma_fence *fence; + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); xe_vm_assert_held(vm); @@ -1839,13 +1855,15 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); - fence = dma_fence_get_stub(); + fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); if (last_op) { for (i = 0; i < num_syncs; i++) xe_sync_entry_signal(&syncs[i], NULL, fence); } } + if (last_op) + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); if (last_op && xe_vm_sync_mode(vm, q)) dma_fence_wait(fence, true); dma_fence_put(fence); @@ -1878,6 +1896,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, u32 num_syncs, bool first_op, bool last_op) { struct dma_fence *fence; + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); @@ -1887,6 +1906,8 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, return PTR_ERR(fence); xe_vma_destroy(vma, fence); + if (last_op) + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); if (last_op && xe_vm_sync_mode(vm, q)) dma_fence_wait(fence, true); dma_fence_put(fence); @@ -2036,6 +2057,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op) { + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); int err; xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); @@ -2054,9 +2076,12 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, /* Nothing to do, signal fences now */ if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, - dma_fence_get_stub()); + for (i = 0; i < num_syncs; i++) { + struct dma_fence *fence = + xe_exec_queue_last_fence_get(wait_exec_queue, vm); + + xe_sync_entry_signal(&syncs[i], NULL, fence); + } } return 0; @@ -3108,8 +3133,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) unwind_ops: vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); free_syncs: - for (i = 0; err == -ENODATA && i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub()); + for (i = 0; err == -ENODATA && i < num_syncs; i++) { + struct dma_fence *fence = + xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm); + + xe_sync_entry_signal(&syncs[i], NULL, fence); + } while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); |