diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_vma_resource.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_vma_resource.c | 354 |
1 files changed, 323 insertions, 31 deletions
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c index b50e67035d15..3e55a30b2da7 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.c +++ b/drivers/gpu/drm/i915/i915_vma_resource.c @@ -2,39 +2,44 @@ /* * Copyright © 2021 Intel Corporation */ + +#include <linux/interval_tree_generic.h> #include <linux/slab.h> +#include "i915_sw_fence.h" #include "i915_vma_resource.h" +#include "i915_drv.h" -/* Callbacks for the unbind dma-fence. */ -static const char *get_driver_name(struct dma_fence *fence) -{ - return "vma unbind fence"; -} +#include "gt/intel_gtt.h" -static const char *get_timeline_name(struct dma_fence *fence) -{ - return "unbound"; -} - -static struct dma_fence_ops unbind_fence_ops = { - .get_driver_name = get_driver_name, - .get_timeline_name = get_timeline_name, -}; +static struct kmem_cache *slab_vma_resources; /** - * __i915_vma_resource_init - Initialize a vma resource. - * @vma_res: The vma resource to initialize + * DOC: + * We use a per-vm interval tree to keep track of vma_resources + * scheduled for unbind but not yet unbound. The tree is protected by + * the vm mutex, and nodes are removed just after the unbind fence signals. + * The removal takes the vm mutex from a kernel thread which we need to + * keep in mind so that we don't grab the mutex and try to wait for all + * pending unbinds to complete, because that will temporaryily block many + * of the workqueue threads, and people will get angry. * - * Initializes the private members of a vma resource. + * We should consider using a single ordered fence per VM instead but that + * requires ordering the unbinds and might introduce unnecessary waiting + * for unrelated unbinds. Amount of code will probably be roughly the same + * due to the simplicity of using the interval tree interface. + * + * Another drawback of this interval tree is that the complexity of insertion + * and removal of fences increases as O(ln(pending_unbinds)) instead of + * O(1) for a single fence without interval tree. */ -void __i915_vma_resource_init(struct i915_vma_resource *vma_res) -{ - spin_lock_init(&vma_res->lock); - dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops, - &vma_res->lock, 0, 0); - refcount_set(&vma_res->hold_count, 1); -} +#define VMA_RES_START(_node) ((_node)->start) +#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1) +INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb, + u64, __subtree_last, + VMA_RES_START, VMA_RES_LAST, static, vma_res_itree); + +/* Callbacks for the unbind dma-fence. */ /** * i915_vma_resource_alloc - Allocate a vma resource @@ -45,15 +50,73 @@ void __i915_vma_resource_init(struct i915_vma_resource *vma_res) struct i915_vma_resource *i915_vma_resource_alloc(void) { struct i915_vma_resource *vma_res = - kzalloc(sizeof(*vma_res), GFP_KERNEL); + kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL); return vma_res ? vma_res : ERR_PTR(-ENOMEM); } +/** + * i915_vma_resource_free - Free a vma resource + * @vma_res: The vma resource to free. + */ +void i915_vma_resource_free(struct i915_vma_resource *vma_res) +{ + kmem_cache_free(slab_vma_resources, vma_res); +} + +static const char *get_driver_name(struct dma_fence *fence) +{ + return "vma unbind fence"; +} + +static const char *get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static void unbind_fence_free_rcu(struct rcu_head *head) +{ + struct i915_vma_resource *vma_res = + container_of(head, typeof(*vma_res), unbind_fence.rcu); + + i915_vma_resource_free(vma_res); +} + +static void unbind_fence_release(struct dma_fence *fence) +{ + struct i915_vma_resource *vma_res = + container_of(fence, typeof(*vma_res), unbind_fence); + + i915_sw_fence_fini(&vma_res->chain); + + call_rcu(&fence->rcu, unbind_fence_free_rcu); +} + +static struct dma_fence_ops unbind_fence_ops = { + .get_driver_name = get_driver_name, + .get_timeline_name = get_timeline_name, + .release = unbind_fence_release, +}; + static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res) { - if (refcount_dec_and_test(&vma_res->hold_count)) - dma_fence_signal(&vma_res->unbind_fence); + struct i915_address_space *vm; + + if (!refcount_dec_and_test(&vma_res->hold_count)) + return; + + dma_fence_signal(&vma_res->unbind_fence); + + vm = vma_res->vm; + if (vma_res->wakeref) + intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref); + + vma_res->vm = NULL; + if (!RB_EMPTY_NODE(&vma_res->rb)) { + mutex_lock(&vm->mutex); + vma_res_itree_remove(vma_res, &vm->pending_unbind); + mutex_unlock(&vm->mutex); + } } /** @@ -102,6 +165,49 @@ bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, return held; } +static void i915_vma_resource_unbind_work(struct work_struct *work) +{ + struct i915_vma_resource *vma_res = + container_of(work, typeof(*vma_res), work); + struct i915_address_space *vm = vma_res->vm; + bool lockdep_cookie; + + lockdep_cookie = dma_fence_begin_signalling(); + if (likely(atomic_read(&vm->open))) + vma_res->ops->unbind_vma(vm, vma_res); + + dma_fence_end_signalling(lockdep_cookie); + __i915_vma_resource_unhold(vma_res); + i915_vma_resource_put(vma_res); +} + +static int +i915_vma_resource_fence_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct i915_vma_resource *vma_res = + container_of(fence, typeof(*vma_res), chain); + struct dma_fence *unbind_fence = + &vma_res->unbind_fence; + + switch (state) { + case FENCE_COMPLETE: + dma_fence_get(unbind_fence); + if (vma_res->immediate_unbind) { + i915_vma_resource_unbind_work(&vma_res->work); + } else { + INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work); + queue_work(system_unbound_wq, &vma_res->work); + } + break; + case FENCE_FREE: + i915_vma_resource_put(vma_res); + break; + } + + return NOTIFY_DONE; +} + /** * i915_vma_resource_unbind - Unbind a vma resource * @vma_res: The vma resource to unbind. @@ -112,10 +218,196 @@ bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, * Return: A refcounted pointer to a dma-fence that signals when unbinding is * complete. */ -struct dma_fence * -i915_vma_resource_unbind(struct i915_vma_resource *vma_res) +struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res) { - __i915_vma_resource_unhold(vma_res); - dma_fence_get(&vma_res->unbind_fence); + struct i915_address_space *vm = vma_res->vm; + + /* Reference for the sw fence */ + i915_vma_resource_get(vma_res); + + /* Caller must already have a wakeref in this case. */ + if (vma_res->needs_wakeref) + vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm); + + if (atomic_read(&vma_res->chain.pending) <= 1) { + RB_CLEAR_NODE(&vma_res->rb); + vma_res->immediate_unbind = 1; + } else { + vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind); + } + + i915_sw_fence_commit(&vma_res->chain); + return &vma_res->unbind_fence; } + +/** + * __i915_vma_resource_init - Initialize a vma resource. + * @vma_res: The vma resource to initialize + * + * Initializes the private members of a vma resource. + */ +void __i915_vma_resource_init(struct i915_vma_resource *vma_res) +{ + spin_lock_init(&vma_res->lock); + dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops, + &vma_res->lock, 0, 0); + refcount_set(&vma_res->hold_count, 1); + i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify); +} + +static void +i915_vma_resource_color_adjust_range(struct i915_address_space *vm, + u64 *start, + u64 *end) +{ + if (i915_vm_has_cache_coloring(vm)) { + if (*start) + *start -= I915_GTT_PAGE_SIZE; + *end += I915_GTT_PAGE_SIZE; + } +} + +/** + * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a + * certain vm range. + * @vm: The vm to look at. + * @offset: The range start. + * @size: The range size. + * @intr: Whether to wait interrubtible. + * + * The function needs to be called with the vm lock held. + * + * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true + */ +int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm, + u64 offset, + u64 size, + bool intr) +{ + struct i915_vma_resource *node; + u64 last = offset + size - 1; + + lockdep_assert_held(&vm->mutex); + might_sleep(); + + i915_vma_resource_color_adjust_range(vm, &offset, &last); + node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); + while (node) { + int ret = dma_fence_wait(&node->unbind_fence, intr); + + if (ret) + return ret; + + node = vma_res_itree_iter_next(node, offset, last); + } + + return 0; +} + +/** + * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm, + * releasing the vm lock while waiting. + * @vm: The vm to look at. + * + * The function may not be called with the vm lock held. + * Typically this is called at vm destruction to finish any pending + * unbind operations. The vm mutex is released while waiting to avoid + * stalling kernel workqueues trying to grab the mutex. + */ +void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm) +{ + struct i915_vma_resource *node; + struct dma_fence *fence; + + do { + fence = NULL; + mutex_lock(&vm->mutex); + node = vma_res_itree_iter_first(&vm->pending_unbind, 0, + U64_MAX); + if (node) + fence = dma_fence_get_rcu(&node->unbind_fence); + mutex_unlock(&vm->mutex); + + if (fence) { + /* + * The wait makes sure the node eventually removes + * itself from the tree. + */ + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + } while (node); +} + +/** + * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all + * pending unbinds in a certain range of a vm. + * @vm: The vm to look at. + * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds. + * @offset: The range start. + * @size: The range size. + * @intr: Whether to wait interrubtible. + * @gfp: Allocation mode for memory allocations. + * + * The function makes @sw_fence await all pending unbinds in a certain + * vm range before calling the complete notifier. To be able to await + * each individual unbind, the function needs to allocate memory using + * the @gpf allocation mode. If that fails, the function will instead + * wait for the unbind fence to signal, using @intr to judge whether to + * wait interruptible or not. Note that @gfp should ideally be selected so + * as to avoid any expensive memory allocation stalls and rather fail and + * synchronize itself. For now the vm mutex is required when calling this + * function with means that @gfp can't call into direct reclaim. In reality + * this means that during heavy memory pressure, we will sync in this + * function. + * + * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true + */ +int i915_vma_resource_bind_dep_await(struct i915_address_space *vm, + struct i915_sw_fence *sw_fence, + u64 offset, + u64 size, + bool intr, + gfp_t gfp) +{ + struct i915_vma_resource *node; + u64 last = offset + size - 1; + + lockdep_assert_held(&vm->mutex); + might_alloc(gfp); + might_sleep(); + + i915_vma_resource_color_adjust_range(vm, &offset, &last); + node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); + while (node) { + int ret; + + ret = i915_sw_fence_await_dma_fence(sw_fence, + &node->unbind_fence, + 0, gfp); + if (ret < 0) { + ret = dma_fence_wait(&node->unbind_fence, intr); + if (ret) + return ret; + } + + node = vma_res_itree_iter_next(node, offset, last); + } + + return 0; +} + +void i915_vma_resource_module_exit(void) +{ + kmem_cache_destroy(slab_vma_resources); +} + +int __init i915_vma_resource_module_init(void) +{ + slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN); + if (!slab_vma_resources) + return -ENOMEM; + + return 0; +} |