diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2019-08-02 11:00:15 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2019-08-02 11:53:04 +0100 |
commit | d8af05ff38ae7a42819b285ffef314942414ef8b (patch) | |
tree | 1ec8990be56972746cada518e40e07f64696fd3a /drivers/gpu/drm/i915/i915_active.c | |
parent | 576f05865581f82ac988ffec70e4e2ebd31165db (diff) |
drm/i915: Allow sharing the idle-barrier from other kernel requests
By placing our idle-barriers in the i915_active fence tree, we expose
those for reuse by other components that are issuing requests along the
kernel_context. Reusing the proto-barrier active_node is perfectly fine
as the new request implies a context-switch, and so an opportune point
to run the idle-barrier. However, the proto-barrier is not equivalent
to a normal active_node and care must be taken to avoid dereferencing the
ERR_PTR used as its request marker.
v2: Comment the more egregious cheek
v3: A glossary!
Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: ce476c80b8bf ("drm/i915: Keep contexts pinned until after the next kernel context switch")
Fixes: a9877da2d629 ("drm/i915/oa: Reconfigure contexts on the fly")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190802100015.1281-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_active.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_active.c | 288 |
1 files changed, 249 insertions, 39 deletions
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index d32db8a4db5c..1e09722b5317 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -33,6 +33,38 @@ struct active_node { u64 timeline; }; +static inline struct active_node * +node_from_active(struct i915_active_request *active) +{ + return container_of(active, struct active_node, base); +} + +#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) + +static inline bool is_barrier(const struct i915_active_request *active) +{ + return IS_ERR(rcu_access_pointer(active->request)); +} + +static inline struct llist_node *barrier_to_ll(struct active_node *node) +{ + GEM_BUG_ON(!is_barrier(&node->base)); + return (struct llist_node *)&node->base.link; +} + +static inline struct intel_engine_cs * +barrier_to_engine(struct active_node *node) +{ + GEM_BUG_ON(!is_barrier(&node->base)); + return (struct intel_engine_cs *)node->base.link.prev; +} + +static inline struct active_node *barrier_from_ll(struct llist_node *x) +{ + return container_of((struct list_head *)x, + struct active_node, base.link); +} + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) static void *active_debug_hint(void *addr) @@ -127,7 +159,7 @@ active_retire(struct i915_active *ref) static void node_retire(struct i915_active_request *base, struct i915_request *rq) { - active_retire(container_of(base, struct active_node, base)->ref); + active_retire(node_from_active(base)->ref); } static struct i915_active_request * @@ -184,6 +216,7 @@ out: ref->cache = node; mutex_unlock(&ref->mutex); + BUILD_BUG_ON(offsetof(typeof(*node), base)); return &node->base; } @@ -201,11 +234,52 @@ void __i915_active_init(struct drm_i915_private *i915, ref->retire = retire; ref->tree = RB_ROOT; ref->cache = NULL; - init_llist_head(&ref->barriers); + init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); } +static bool __active_del_barrier(struct i915_active *ref, + struct active_node *node) +{ + struct intel_engine_cs *engine = barrier_to_engine(node); + struct llist_node *head = NULL, *tail = NULL; + struct llist_node *pos, *next; + + GEM_BUG_ON(node->timeline != engine->kernel_context->ring->timeline->fence_context); + + /* + * Rebuild the llist excluding our node. We may perform this + * outside of the kernel_context timeline mutex and so someone + * else may be manipulating the engine->barrier_tasks, in + * which case either we or they will be upset :) + * + * A second __active_del_barrier() will report failure to claim + * the active_node and the caller will just shrug and know not to + * claim ownership of its node. + * + * A concurrent i915_request_add_active_barriers() will miss adding + * any of the tasks, but we will try again on the next -- and since + * we are actively using the barrier, we know that there will be + * at least another opportunity when we idle. + */ + llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { + if (node == barrier_from_ll(pos)) { + node = NULL; + continue; + } + + pos->next = head; + head = pos; + if (!tail) + tail = pos; + } + if (head) + llist_add_batch(head, tail, &engine->barrier_tasks); + + return !node; +} + int i915_active_ref(struct i915_active *ref, u64 timeline, struct i915_request *rq) @@ -224,8 +298,20 @@ int i915_active_ref(struct i915_active *ref, goto out; } - if (!i915_active_request_isset(active)) - atomic_inc(&ref->count); + if (is_barrier(active)) { /* proto-node used by our idle barrier */ + /* + * This request is on the kernel_context timeline, and so + * we can use it to substitute for the pending idle-barrer + * request that we want to emit on the kernel_context. + */ + __active_del_barrier(ref, node_from_active(active)); + RCU_INIT_POINTER(active->request, NULL); + INIT_LIST_HEAD(&active->link); + } else { + if (!i915_active_request_isset(active)) + atomic_inc(&ref->count); + } + GEM_BUG_ON(!atomic_read(&ref->count)); __i915_active_request_set(active, rq); out: @@ -312,6 +398,11 @@ int i915_active_wait(struct i915_active *ref) } rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + if (is_barrier(&it->base)) { /* unconnected idle-barrier */ + err = -EBUSY; + break; + } + err = i915_active_request_retire(&it->base, BKL(ref)); if (err) break; @@ -374,6 +465,92 @@ void i915_active_fini(struct i915_active *ref) } #endif +static inline bool is_idle_barrier(struct active_node *node, u64 idx) +{ + return node->timeline == idx && !i915_active_request_isset(&node->base); +} + +static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) +{ + struct rb_node *prev, *p; + + if (RB_EMPTY_ROOT(&ref->tree)) + return NULL; + + mutex_lock(&ref->mutex); + GEM_BUG_ON(i915_active_is_idle(ref)); + + /* + * Try to reuse any existing barrier nodes already allocated for this + * i915_active, due to overlapping active phases there is likely a + * node kept alive (as we reuse before parking). We prefer to reuse + * completely idle barriers (less hassle in manipulating the llists), + * but otherwise any will do. + */ + if (ref->cache && is_idle_barrier(ref->cache, idx)) { + p = &ref->cache->node; + goto match; + } + + prev = NULL; + p = ref->tree.rb_node; + while (p) { + struct active_node *node = + rb_entry(p, struct active_node, node); + + if (is_idle_barrier(node, idx)) + goto match; + + prev = p; + if (node->timeline < idx) + p = p->rb_right; + else + p = p->rb_left; + } + + /* + * No quick match, but we did find the leftmost rb_node for the + * kernel_context. Walk the rb_tree in-order to see if there were + * any idle-barriers on this timeline that we missed, or just use + * the first pending barrier. + */ + for (p = prev; p; p = rb_next(p)) { + struct active_node *node = + rb_entry(p, struct active_node, node); + + if (node->timeline > idx) + break; + + if (node->timeline < idx) + continue; + + if (is_idle_barrier(node, idx)) + goto match; + + /* + * The list of pending barriers is protected by the + * kernel_context timeline, which notably we do not hold + * here. i915_request_add_active_barriers() may consume + * the barrier before we claim it, so we have to check + * for success. + */ + if (is_barrier(&node->base) && __active_del_barrier(ref, node)) + goto match; + } + + mutex_unlock(&ref->mutex); + + return NULL; + +match: + rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ + if (p == &ref->cache->node) + ref->cache = NULL; + mutex_unlock(&ref->mutex); + + return rb_entry(p, struct active_node, node); +} + int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine) { @@ -382,39 +559,61 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct llist_node *pos, *next; int err; - GEM_BUG_ON(!mask); + GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); + + /* + * Preallocate a node for each physical engine supporting the target + * engine (remember virtual engines have more than one sibling). + * We can then use the preallocated nodes in + * i915_active_acquire_barrier() + */ for_each_engine_masked(engine, i915, mask, tmp) { - struct intel_context *kctx = engine->kernel_context; + u64 idx = engine->kernel_context->ring->timeline->fence_context; struct active_node *node; - node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); - if (unlikely(!node)) { - err = -ENOMEM; - goto unwind; + node = reuse_idle_barrier(ref, idx); + if (!node) { + node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); + if (!node) { + err = ENOMEM; + goto unwind; + } + + RCU_INIT_POINTER(node->base.request, NULL); + node->base.retire = node_retire; + node->timeline = idx; + node->ref = ref; } - i915_active_request_init(&node->base, - (void *)engine, node_retire); - node->timeline = kctx->ring->timeline->fence_context; - node->ref = ref; - atomic_inc(&ref->count); + if (!i915_active_request_isset(&node->base)) { + /* + * Mark this as being *our* unconnected proto-node. + * + * Since this node is not in any list, and we have + * decoupled it from the rbtree, we can reuse the + * request to indicate this is an idle-barrier node + * and then we can use the rb_node and list pointers + * for our tracking of the pending barrier. + */ + RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); + node->base.link.prev = (void *)engine; + atomic_inc(&ref->count); + } + GEM_BUG_ON(barrier_to_engine(node) != engine); + llist_add(barrier_to_ll(node), &ref->preallocated_barriers); intel_engine_pm_get(engine); - llist_add((struct llist_node *)&node->base.link, - &ref->barriers); } return 0; unwind: - llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { - struct active_node *node; + llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { + struct active_node *node = barrier_from_ll(pos); - node = container_of((struct list_head *)pos, - typeof(*node), base.link); - engine = (void *)rcu_access_pointer(node->base.request); + atomic_dec(&ref->count); + intel_engine_pm_put(barrier_to_engine(node)); - intel_engine_pm_put(engine); kmem_cache_free(global.slab_cache, node); } return err; @@ -426,25 +625,27 @@ void i915_active_acquire_barrier(struct i915_active *ref) GEM_BUG_ON(i915_active_is_idle(ref)); + /* + * Transfer the list of preallocated barriers into the + * i915_active rbtree, but only as proto-nodes. They will be + * populated by i915_request_add_active_barriers() to point to the + * request that will eventually release them. + */ mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); - llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { - struct intel_engine_cs *engine; - struct active_node *node; + llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { + struct active_node *node = barrier_from_ll(pos); + struct intel_engine_cs *engine = barrier_to_engine(node); struct rb_node **p, *parent; - node = container_of((struct list_head *)pos, - typeof(*node), base.link); - - engine = (void *)rcu_access_pointer(node->base.request); - RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); - parent = NULL; p = &ref->tree.rb_node; while (*p) { + struct active_node *it; + parent = *p; - if (rb_entry(parent, - struct active_node, - node)->timeline < node->timeline) + + it = rb_entry(parent, struct active_node, node); + if (it->timeline < node->timeline) p = &parent->rb_right; else p = &parent->rb_left; @@ -452,20 +653,29 @@ void i915_active_acquire_barrier(struct i915_active *ref) rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); - llist_add((struct llist_node *)&node->base.link, - &engine->barrier_tasks); + llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } mutex_unlock(&ref->mutex); } -void i915_request_add_barriers(struct i915_request *rq) +void i915_request_add_active_barriers(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct llist_node *node, *next; - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) + GEM_BUG_ON(intel_engine_is_virtual(engine)); + GEM_BUG_ON(rq->timeline != engine->kernel_context->ring->timeline); + + /* + * Attach the list of proto-fences to the in-flight request such + * that the parent i915_active will be released when this request + * is retired. + */ + llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { + RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq); list_add_tail((struct list_head *)node, &rq->active_list); + } } int i915_active_request_set(struct i915_active_request *active, |