drm/i915: Allow sharing the idle-barrier from other kernel requests

By placing our idle-barriers in the i915_active fence tree, we expose those for reuse by other components that are issuing requests along the kernel_context. Reusing the proto-barrier active_node is perfectly fine as the new request implies a context-switch, and so an opportune point to run the idle-barrier. However, the proto-barrier is not equivalent to a normal active_node and care must be taken to avoid dereferencing the ERR_PTR used as its request marker. v2: Comment the more egregious cheek v3: A glossary! Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: ce476c80b8bf ("drm/i915: Keep contexts pinned until after the next kernel context switch") Fixes: a9877da2d629 ("drm/i915/oa: Reconfigure contexts on the fly") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190802100015.1281-1-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2019-08-02 11:00:15 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2019-08-02 11:53:04 +0100
commit: d8af05ff38ae7a42819b285ffef314942414ef8b (patch)
tree: 1ec8990be56972746cada518e40e07f64696fd3a /drivers/gpu/drm/i915/i915_active.c
parent: 576f05865581f82ac988ffec70e4e2ebd31165db (diff)
1 files changed, 249 insertions, 39 deletions
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index d32db8a4db5c..1e09722b5317 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -33,6 +33,38 @@ struct active_node {
 	u64 timeline;
 };
 
+static inline struct active_node *
+node_from_active(struct i915_active_request *active)
+{
+	return container_of(active, struct active_node, base);
+}
+
+#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
+
+static inline bool is_barrier(const struct i915_active_request *active)
+{
+	return IS_ERR(rcu_access_pointer(active->request));
+}
+
+static inline struct llist_node *barrier_to_ll(struct active_node *node)
+{
+	GEM_BUG_ON(!is_barrier(&node->base));
+	return (struct llist_node *)&node->base.link;
+}
+
+static inline struct intel_engine_cs *
+barrier_to_engine(struct active_node *node)
+{
+	GEM_BUG_ON(!is_barrier(&node->base));
+	return (struct intel_engine_cs *)node->base.link.prev;
+}
+
+static inline struct active_node *barrier_from_ll(struct llist_node *x)
+{
+	return container_of((struct list_head *)x,
+			    struct active_node, base.link);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
 
 static void *active_debug_hint(void *addr)
@@ -127,7 +159,7 @@ active_retire(struct i915_active *ref)
 static void
 node_retire(struct i915_active_request *base, struct i915_request *rq)
 {
-	active_retire(container_of(base, struct active_node, base)->ref);
+	active_retire(node_from_active(base)->ref);
 }
 
 static struct i915_active_request *
@@ -184,6 +216,7 @@ out:
 	ref->cache = node;
 	mutex_unlock(&ref->mutex);
 
+	BUILD_BUG_ON(offsetof(typeof(*node), base));
 	return &node->base;
 }
 
@@ -201,11 +234,52 @@ void __i915_active_init(struct drm_i915_private *i915,
 	ref->retire = retire;
 	ref->tree = RB_ROOT;
 	ref->cache = NULL;
-	init_llist_head(&ref->barriers);
+	init_llist_head(&ref->preallocated_barriers);
 	atomic_set(&ref->count, 0);
 	__mutex_init(&ref->mutex, "i915_active", key);
 }
 
+static bool __active_del_barrier(struct i915_active *ref,
+				 struct active_node *node)
+{
+	struct intel_engine_cs *engine = barrier_to_engine(node);
+	struct llist_node *head = NULL, *tail = NULL;
+	struct llist_node *pos, *next;
+
+	GEM_BUG_ON(node->timeline != engine->kernel_context->ring->timeline->fence_context);
+
+	/*
+	 * Rebuild the llist excluding our node. We may perform this
+	 * outside of the kernel_context timeline mutex and so someone
+	 * else may be manipulating the engine->barrier_tasks, in
+	 * which case either we or they will be upset :)
+	 *
+	 * A second __active_del_barrier() will report failure to claim
+	 * the active_node and the caller will just shrug and know not to
+	 * claim ownership of its node.
+	 *
+	 * A concurrent i915_request_add_active_barriers() will miss adding
+	 * any of the tasks, but we will try again on the next -- and since
+	 * we are actively using the barrier, we know that there will be
+	 * at least another opportunity when we idle.
+	 */
+	llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
+		if (node == barrier_from_ll(pos)) {
+			node = NULL;
+			continue;
+		}
+
+		pos->next = head;
+		head = pos;
+		if (!tail)
+			tail = pos;
+	}
+	if (head)
+		llist_add_batch(head, tail, &engine->barrier_tasks);
+
+	return !node;
+}
+
 int i915_active_ref(struct i915_active *ref,
 		    u64 timeline,
 		    struct i915_request *rq)
@@ -224,8 +298,20 @@ int i915_active_ref(struct i915_active *ref,
 		goto out;
 	}
 
-	if (!i915_active_request_isset(active))
-		atomic_inc(&ref->count);
+	if (is_barrier(active)) { /* proto-node used by our idle barrier */
+		/*
+		 * This request is on the kernel_context timeline, and so
+		 * we can use it to substitute for the pending idle-barrer
+		 * request that we want to emit on the kernel_context.
+		 */
+		__active_del_barrier(ref, node_from_active(active));
+		RCU_INIT_POINTER(active->request, NULL);
+		INIT_LIST_HEAD(&active->link);
+	} else {
+		if (!i915_active_request_isset(active))
+			atomic_inc(&ref->count);
+	}
+	GEM_BUG_ON(!atomic_read(&ref->count));
 	__i915_active_request_set(active, rq);
 
 out:
@@ -312,6 +398,11 @@ int i915_active_wait(struct i915_active *ref)
 	}
 
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
+			err = -EBUSY;
+			break;
+		}
+
 		err = i915_active_request_retire(&it->base, BKL(ref));
 		if (err)
 			break;
@@ -374,6 +465,92 @@ void i915_active_fini(struct i915_active *ref)
 }
 #endif
 
+static inline bool is_idle_barrier(struct active_node *node, u64 idx)
+{
+	return node->timeline == idx && !i915_active_request_isset(&node->base);
+}
+
+static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
+{
+	struct rb_node *prev, *p;
+
+	if (RB_EMPTY_ROOT(&ref->tree))
+		return NULL;
+
+	mutex_lock(&ref->mutex);
+	GEM_BUG_ON(i915_active_is_idle(ref));
+
+	/*
+	 * Try to reuse any existing barrier nodes already allocated for this
+	 * i915_active, due to overlapping active phases there is likely a
+	 * node kept alive (as we reuse before parking). We prefer to reuse
+	 * completely idle barriers (less hassle in manipulating the llists),
+	 * but otherwise any will do.
+	 */
+	if (ref->cache && is_idle_barrier(ref->cache, idx)) {
+		p = &ref->cache->node;
+		goto match;
+	}
+
+	prev = NULL;
+	p = ref->tree.rb_node;
+	while (p) {
+		struct active_node *node =
+			rb_entry(p, struct active_node, node);
+
+		if (is_idle_barrier(node, idx))
+			goto match;
+
+		prev = p;
+		if (node->timeline < idx)
+			p = p->rb_right;
+		else
+			p = p->rb_left;
+	}
+
+	/*
+	 * No quick match, but we did find the leftmost rb_node for the
+	 * kernel_context. Walk the rb_tree in-order to see if there were
+	 * any idle-barriers on this timeline that we missed, or just use
+	 * the first pending barrier.
+	 */
+	for (p = prev; p; p = rb_next(p)) {
+		struct active_node *node =
+			rb_entry(p, struct active_node, node);
+
+		if (node->timeline > idx)
+			break;
+
+		if (node->timeline < idx)
+			continue;
+
+		if (is_idle_barrier(node, idx))
+			goto match;
+
+		/*
+		 * The list of pending barriers is protected by the
+		 * kernel_context timeline, which notably we do not hold
+		 * here. i915_request_add_active_barriers() may consume
+		 * the barrier before we claim it, so we have to check
+		 * for success.
+		 */
+		if (is_barrier(&node->base) && __active_del_barrier(ref, node))
+			goto match;
+	}
+
+	mutex_unlock(&ref->mutex);
+
+	return NULL;
+
+match:
+	rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
+	if (p == &ref->cache->node)
+		ref->cache = NULL;
+	mutex_unlock(&ref->mutex);
+
+	return rb_entry(p, struct active_node, node);
+}
+
 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 					    struct intel_engine_cs *engine)
 {
@@ -382,39 +559,61 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 	struct llist_node *pos, *next;
 	int err;
 
-	GEM_BUG_ON(!mask);
+	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
+
+	/*
+	 * Preallocate a node for each physical engine supporting the target
+	 * engine (remember virtual engines have more than one sibling).
+	 * We can then use the preallocated nodes in
+	 * i915_active_acquire_barrier()
+	 */
 	for_each_engine_masked(engine, i915, mask, tmp) {
-		struct intel_context *kctx = engine->kernel_context;
+		u64 idx = engine->kernel_context->ring->timeline->fence_context;
 		struct active_node *node;
 
-		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
-		if (unlikely(!node)) {
-			err = -ENOMEM;
-			goto unwind;
+		node = reuse_idle_barrier(ref, idx);
+		if (!node) {
+			node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
+			if (!node) {
+				err = ENOMEM;
+				goto unwind;
+			}
+
+			RCU_INIT_POINTER(node->base.request, NULL);
+			node->base.retire = node_retire;
+			node->timeline = idx;
+			node->ref = ref;
 		}
 
-		i915_active_request_init(&node->base,
-					 (void *)engine, node_retire);
-		node->timeline = kctx->ring->timeline->fence_context;
-		node->ref = ref;
-		atomic_inc(&ref->count);
+		if (!i915_active_request_isset(&node->base)) {
+			/*
+			 * Mark this as being *our* unconnected proto-node.
+			 *
+			 * Since this node is not in any list, and we have
+			 * decoupled it from the rbtree, we can reuse the
+			 * request to indicate this is an idle-barrier node
+			 * and then we can use the rb_node and list pointers
+			 * for our tracking of the pending barrier.
+			 */
+			RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
+			node->base.link.prev = (void *)engine;
+			atomic_inc(&ref->count);
+		}
 
+		GEM_BUG_ON(barrier_to_engine(node) != engine);
+		llist_add(barrier_to_ll(node), &ref->preallocated_barriers);
 		intel_engine_pm_get(engine);
-		llist_add((struct llist_node *)&node->base.link,
-			  &ref->barriers);
 	}
 
 	return 0;
 
 unwind:
-	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
-		struct active_node *node;
+	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
+		struct active_node *node = barrier_from_ll(pos);
 
-		node = container_of((struct list_head *)pos,
-				    typeof(*node), base.link);
-		engine = (void *)rcu_access_pointer(node->base.request);
+		atomic_dec(&ref->count);
+		intel_engine_pm_put(barrier_to_engine(node));
 
-		intel_engine_pm_put(engine);
 		kmem_cache_free(global.slab_cache, node);
 	}
 	return err;
@@ -426,25 +625,27 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
+	/*
+	 * Transfer the list of preallocated barriers into the
+	 * i915_active rbtree, but only as proto-nodes. They will be
+	 * populated by i915_request_add_active_barriers() to point to the
+	 * request that will eventually release them.
+	 */
 	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
-	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
-		struct intel_engine_cs *engine;
-		struct active_node *node;
+	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
+		struct active_node *node = barrier_from_ll(pos);
+		struct intel_engine_cs *engine = barrier_to_engine(node);
 		struct rb_node **p, *parent;
 
-		node = container_of((struct list_head *)pos,
-				    typeof(*node), base.link);
-
-		engine = (void *)rcu_access_pointer(node->base.request);
-		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
-
 		parent = NULL;
 		p = &ref->tree.rb_node;
 		while (*p) {
+			struct active_node *it;
+
 			parent = *p;
-			if (rb_entry(parent,
-				     struct active_node,
-				     node)->timeline < node->timeline)
+
+			it = rb_entry(parent, struct active_node, node);
+			if (it->timeline < node->timeline)
 				p = &parent->rb_right;
 			else
 				p = &parent->rb_left;
@@ -452,20 +653,29 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 		rb_link_node(&node->node, parent, p);
 		rb_insert_color(&node->node, &ref->tree);
 
-		llist_add((struct llist_node *)&node->base.link,
-			  &engine->barrier_tasks);
+		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
 		intel_engine_pm_put(engine);
 	}
 	mutex_unlock(&ref->mutex);
 }
 
-void i915_request_add_barriers(struct i915_request *rq)
+void i915_request_add_active_barriers(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct llist_node *node, *next;
 
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
+	GEM_BUG_ON(intel_engine_is_virtual(engine));
+	GEM_BUG_ON(rq->timeline != engine->kernel_context->ring->timeline);
+
+	/*
+	 * Attach the list of proto-fences to the in-flight request such
+	 * that the parent i915_active will be released when this request
+	 * is retired.
+	 */
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq);
 		list_add_tail((struct list_head *)node, &rq->active_list);
+	}
 }
 
 int i915_active_request_set(struct i915_active_request *active,
author	Chris Wilson <chris@chris-wilson.co.uk>	2019-08-02 11:00:15 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2019-08-02 11:53:04 +0100
commit	d8af05ff38ae7a42819b285ffef314942414ef8b (patch)
tree	1ec8990be56972746cada518e40e07f64696fd3a /drivers/gpu/drm/i915/i915_active.c
parent	576f05865581f82ac988ffec70e4e2ebd31165db (diff)