1 files changed, 264 insertions, 151 deletions
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index d960d0be5bd2..6b0c1162505a 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -13,7 +13,6 @@
 
 #include "i915_drv.h"
 #include "i915_active.h"
-#include "i915_globals.h"
 
 /*
  * Active refs memory management
@@ -22,18 +21,17 @@
  * they idle (when we know the active requests are inactive) and allocate the
  * nodes from a local slab cache to hopefully reduce the fragmentation.
  */
-static struct i915_global_active {
-	struct i915_global base;
-	struct kmem_cache *slab_cache;
-} global;
+static struct kmem_cache *slab_cache;
 
 struct active_node {
+	struct rb_node node;
 	struct i915_active_fence base;
 	struct i915_active *ref;
-	struct rb_node node;
 	u64 timeline;
 };
 
+#define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node)
+
 static inline struct active_node *
 node_from_active(struct i915_active_fence *active)
 {
@@ -81,7 +79,7 @@ static void *active_debug_hint(void *addr)
 	return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
 }
 
-static struct debug_obj_descr active_debug_desc = {
+static const struct debug_obj_descr active_debug_desc = {
 	.name = "i915_active",
 	.debug_hint = active_debug_hint,
 };
@@ -94,8 +92,7 @@ static void debug_active_init(struct i915_active *ref)
 static void debug_active_activate(struct i915_active *ref)
 {
 	lockdep_assert_held(&ref->tree_lock);
-	if (!atomic_read(&ref->count)) /* before the first inc */
-		debug_object_activate(ref, &active_debug_desc);
+	debug_object_activate(ref, &active_debug_desc);
 }
 
 static void debug_active_deactivate(struct i915_active *ref)
@@ -128,8 +125,8 @@ static inline void debug_active_assert(struct i915_active *ref) { }
 static void
 __active_retire(struct i915_active *ref)
 {
+	struct rb_root root = RB_ROOT;
 	struct active_node *it, *n;
-	struct rb_root root;
 	unsigned long flags;
 
 	GEM_BUG_ON(i915_active_is_idle(ref));
@@ -141,9 +138,24 @@ __active_retire(struct i915_active *ref)
 	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
 	debug_active_deactivate(ref);
 
-	root = ref->tree;
-	ref->tree = RB_ROOT;
-	ref->cache = NULL;
+	/* Even if we have not used the cache, we may still have a barrier */
+	if (!ref->cache)
+		ref->cache = fetch_node(ref->tree.rb_node);
+
+	/* Keep the MRU cached node for reuse */
+	if (ref->cache) {
+		/* Discard all other nodes in the tree */
+		rb_erase(&ref->cache->node, &ref->tree);
+		root = ref->tree;
+
+		/* Rebuild the tree with only the cached node */
+		rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
+		rb_insert_color(&ref->cache->node, &ref->tree);
+		GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
+
+		/* Make the cached node available for reuse with any timeline */
+		ref->cache->timeline = 0; /* needs cmpxchg(u64) */
+	}
 
 	spin_unlock_irqrestore(&ref->tree_lock, flags);
 
@@ -154,9 +166,10 @@ __active_retire(struct i915_active *ref)
 	/* ... except if you wait on it, you must manage your own references! */
 	wake_up_var(ref);
 
+	/* Finally free the discarded timeline tree  */
 	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
 		GEM_BUG_ON(i915_active_fence_isset(&it->base));
-		kmem_cache_free(global.slab_cache, it);
+		kmem_cache_free(slab_cache, it);
 	}
 }
 
@@ -199,7 +212,7 @@ active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 	struct i915_active_fence *active =
 		container_of(cb, typeof(*active), cb);
 
-	return cmpxchg(__active_fence_slot(active), fence, NULL) == fence;
+	return try_cmpxchg(__active_fence_slot(active), &fence, NULL);
 }
 
 static void
@@ -216,12 +229,11 @@ excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 		active_retire(container_of(cb, struct i915_active, excl.cb));
 }
 
-static struct i915_active_fence *
-active_instance(struct i915_active *ref, struct intel_timeline *tl)
+static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
 {
-	struct active_node *node, *prealloc;
-	struct rb_node **p, *parent;
-	u64 idx = tl->fence_context;
+	struct active_node *it;
+
+	GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */
 
 	/*
 	 * We track the most recently used timeline to skip a rbtree search
@@ -230,14 +242,57 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 	 * after the previous activity has been retired, or if it matches the
 	 * current timeline.
 	 */
-	node = READ_ONCE(ref->cache);
-	if (node && node->timeline == idx)
-		return &node->base;
+	it = READ_ONCE(ref->cache);
+	if (it) {
+		u64 cached = READ_ONCE(it->timeline);
 
-	/* Preallocate a replacement, just in case */
-	prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
-	if (!prealloc)
-		return NULL;
+		/* Once claimed, this slot will only belong to this idx */
+		if (cached == idx)
+			return it;
+
+		/*
+		 * An unclaimed cache [.timeline=0] can only be claimed once.
+		 *
+		 * If the value is already non-zero, some other thread has
+		 * claimed the cache and we know that is does not match our
+		 * idx. If, and only if, the timeline is currently zero is it
+		 * worth competing to claim it atomically for ourselves (for
+		 * only the winner of that race will cmpxchg succeed).
+		 */
+		if (!cached && try_cmpxchg64(&it->timeline, &cached, idx))
+			return it;
+	}
+
+	BUILD_BUG_ON(offsetof(typeof(*it), node));
+
+	/* While active, the tree can only be built; not destroyed */
+	GEM_BUG_ON(i915_active_is_idle(ref));
+
+	it = fetch_node(ref->tree.rb_node);
+	while (it) {
+		if (it->timeline < idx) {
+			it = fetch_node(it->node.rb_right);
+		} else if (it->timeline > idx) {
+			it = fetch_node(it->node.rb_left);
+		} else {
+			WRITE_ONCE(ref->cache, it);
+			break;
+		}
+	}
+
+	/* NB: If the tree rotated beneath us, we may miss our target. */
+	return it;
+}
+
+static struct i915_active_fence *
+active_instance(struct i915_active *ref, u64 idx)
+{
+	struct active_node *node;
+	struct rb_node **p, *parent;
+
+	node = __active_lookup(ref, idx);
+	if (likely(node))
+		return &node->base;
 
 	spin_lock_irq(&ref->tree_lock);
 	GEM_BUG_ON(i915_active_is_idle(ref));
@@ -248,10 +303,8 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 		parent = *p;
 
 		node = rb_entry(parent, struct active_node, node);
-		if (node->timeline == idx) {
-			kmem_cache_free(global.slab_cache, prealloc);
+		if (node->timeline == idx)
 			goto out;
-		}
 
 		if (node->timeline < idx)
 			p = &parent->rb_right;
@@ -259,7 +312,14 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 			p = &parent->rb_left;
 	}
 
-	node = prealloc;
+	/*
+	 * XXX: We should preallocate this before i915_active_ref() is ever
+	 *  called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC.
+	 */
+	node = kmem_cache_alloc(slab_cache, GFP_ATOMIC);
+	if (!node)
+		goto out;
+
 	__i915_active_fence_init(&node->base, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
@@ -268,28 +328,24 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 	rb_insert_color(&node->node, &ref->tree);
 
 out:
-	ref->cache = node;
+	WRITE_ONCE(ref->cache, node);
 	spin_unlock_irq(&ref->tree_lock);
 
-	BUILD_BUG_ON(offsetof(typeof(*node), base));
 	return &node->base;
 }
 
 void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
+			unsigned long flags,
 			struct lock_class_key *mkey,
 			struct lock_class_key *wkey)
 {
-	unsigned long bits;
-
 	debug_active_init(ref);
 
-	ref->flags = 0;
+	ref->flags = flags;
 	ref->active = active;
-	ref->retire = ptr_unpack_bits(retire, &bits, 2);
-	if (bits & I915_ACTIVE_MAY_SLEEP)
-		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
+	ref->retire = retire;
 
 	spin_lock_init(&ref->tree_lock);
 	ref->tree = RB_ROOT;
@@ -353,69 +409,96 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
 	return ____active_del_barrier(ref, node, barrier_to_engine(node));
 }
 
-int i915_active_ref(struct i915_active *ref,
-		    struct intel_timeline *tl,
-		    struct dma_fence *fence)
+static bool
+replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
 {
+	if (!is_barrier(active)) /* proto-node used by our idle barrier? */
+		return false;
+
+	/*
+	 * This request is on the kernel_context timeline, and so
+	 * we can use it to substitute for the pending idle-barrer
+	 * request that we want to emit on the kernel_context.
+	 */
+	return __active_del_barrier(ref, node_from_active(active));
+}
+
+int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
+{
+	u64 idx = i915_request_timeline(rq)->fence_context;
+	struct dma_fence *fence = &rq->fence;
 	struct i915_active_fence *active;
 	int err;
 
-	lockdep_assert_held(&tl->mutex);
-
 	/* Prevent reaping in case we malloc/wait while building the tree */
 	err = i915_active_acquire(ref);
 	if (err)
 		return err;
 
-	active = active_instance(ref, tl);
-	if (!active) {
-		err = -ENOMEM;
-		goto out;
-	}
+	do {
+		active = active_instance(ref, idx);
+		if (!active) {
+			err = -ENOMEM;
+			goto out;
+		}
 
-	if (is_barrier(active)) { /* proto-node used by our idle barrier */
-		/*
-		 * This request is on the kernel_context timeline, and so
-		 * we can use it to substitute for the pending idle-barrer
-		 * request that we want to emit on the kernel_context.
-		 */
-		__active_del_barrier(ref, node_from_active(active));
-		RCU_INIT_POINTER(active->fence, NULL);
-		atomic_dec(&ref->count);
-	}
-	if (!__i915_active_fence_set(active, fence))
-		atomic_inc(&ref->count);
+		if (replace_barrier(ref, active)) {
+			RCU_INIT_POINTER(active->fence, NULL);
+			atomic_dec(&ref->count);
+		}
+	} while (unlikely(is_barrier(active)));
+
+	fence = __i915_active_fence_set(active, fence);
+	if (!fence)
+		__i915_active_acquire(ref);
+	else
+		dma_fence_put(fence);
 
 out:
 	i915_active_release(ref);
 	return err;
 }
 
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+static struct dma_fence *
+__i915_active_set_fence(struct i915_active *ref,
+			struct i915_active_fence *active,
+			struct dma_fence *fence)
 {
 	struct dma_fence *prev;
 
-	/* We expect the caller to manage the exclusive timeline ordering */
-	GEM_BUG_ON(i915_active_is_idle(ref));
+	if (replace_barrier(ref, active)) {
+		RCU_INIT_POINTER(active->fence, fence);
+		return NULL;
+	}
 
-	rcu_read_lock();
-	prev = __i915_active_fence_set(&ref->excl, f);
-	if (prev)
-		prev = dma_fence_get_rcu(prev);
-	else
-		atomic_inc(&ref->count);
-	rcu_read_unlock();
+	prev = __i915_active_fence_set(active, fence);
+	if (!prev)
+		__i915_active_acquire(ref);
 
 	return prev;
 }
 
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+{
+	/* We expect the caller to manage the exclusive timeline ordering */
+	return __i915_active_set_fence(ref, &ref->excl, f);
+}
+
 bool i915_active_acquire_if_busy(struct i915_active *ref)
 {
 	debug_active_assert(ref);
 	return atomic_add_unless(&ref->count, 1, 0);
 }
 
+static void __i915_active_activate(struct i915_active *ref)
+{
+	spin_lock_irq(&ref->tree_lock); /* __active_retire() */
+	if (!atomic_fetch_inc(&ref->count))
+		debug_active_activate(ref);
+	spin_unlock_irq(&ref->tree_lock);
+}
+
 int i915_active_acquire(struct i915_active *ref)
 {
 	int err;
@@ -423,19 +506,19 @@ int i915_active_acquire(struct i915_active *ref)
 	if (i915_active_acquire_if_busy(ref))
 		return 0;
 
+	if (!ref->active) {
+		__i915_active_activate(ref);
+		return 0;
+	}
+
 	err = mutex_lock_interruptible(&ref->mutex);
 	if (err)
 		return err;
 
 	if (likely(!i915_active_acquire_if_busy(ref))) {
-		if (ref->active)
-			err = ref->active(ref);
-		if (!err) {
-			spin_lock_irq(&ref->tree_lock); /* __active_retire() */
-			debug_active_activate(ref);
-			atomic_inc(&ref->count);
-			spin_unlock_irq(&ref->tree_lock);
-		}
+		err = ref->active(ref);
+		if (!err)
+			__i915_active_activate(ref);
 	}
 
 	mutex_unlock(&ref->mutex);
@@ -498,24 +581,26 @@ static int flush_lazy_signals(struct i915_active *ref)
 
 int __i915_active_wait(struct i915_active *ref, int state)
 {
-	int err;
-
 	might_sleep();
 
-	if (!i915_active_acquire_if_busy(ref))
-		return 0;
-
 	/* Any fence added after the wait begins will not be auto-signaled */
-	err = flush_lazy_signals(ref);
-	i915_active_release(ref);
-	if (err)
-		return err;
+	if (i915_active_acquire_if_busy(ref)) {
+		int err;
 
-	if (!i915_active_is_idle(ref) &&
-	    ___wait_var_event(ref, i915_active_is_idle(ref),
-			      state, 0, 0, schedule()))
-		return -EINTR;
+		err = flush_lazy_signals(ref);
+		i915_active_release(ref);
+		if (err)
+			return err;
 
+		if (___wait_var_event(ref, i915_active_is_idle(ref),
+				      state, 0, 0, schedule()))
+			return -EINTR;
+	}
+
+	/*
+	 * After the wait is complete, the caller may free the active.
+	 * We have to flush any concurrent retirement before returning.
+	 */
 	flush_work(&ref->work);
 	return 0;
 }
@@ -651,16 +736,16 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
 	return await_active(ref, flags, sw_await_fence, fence, fence);
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref)
 {
 	debug_active_fini(ref);
 	GEM_BUG_ON(atomic_read(&ref->count));
 	GEM_BUG_ON(work_pending(&ref->work));
-	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	mutex_destroy(&ref->mutex);
+
+	if (ref->cache)
+		kmem_cache_free(slab_cache, ref->cache);
 }
-#endif
 
 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 {
@@ -674,7 +759,6 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 	if (RB_EMPTY_ROOT(&ref->tree))
 		return NULL;
 
-	spin_lock_irq(&ref->tree_lock);
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
 	/*
@@ -700,9 +784,9 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 
 		prev = p;
 		if (node->timeline < idx)
-			p = p->rb_right;
+			p = READ_ONCE(p->rb_right);
 		else
-			p = p->rb_left;
+			p = READ_ONCE(p->rb_left);
 	}
 
 	/*
@@ -739,14 +823,13 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 			goto match;
 	}
 
-	spin_unlock_irq(&ref->tree_lock);
-
 	return NULL;
 
 match:
+	spin_lock_irq(&ref->tree_lock);
 	rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
 	if (p == &ref->cache->node)
-		ref->cache = NULL;
+		WRITE_ONCE(ref->cache, NULL);
 	spin_unlock_irq(&ref->tree_lock);
 
 	return rb_entry(p, struct active_node, node);
@@ -758,7 +841,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 	intel_engine_mask_t tmp, mask = engine->mask;
 	struct llist_node *first = NULL, *last = NULL;
 	struct intel_gt *gt = engine->gt;
-	int err;
 
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
@@ -778,13 +860,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 		struct llist_node *prev = first;
 		struct active_node *node;
 
+		rcu_read_lock();
 		node = reuse_idle_barrier(ref, idx);
+		rcu_read_unlock();
 		if (!node) {
-			node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
-			if (!node) {
-				err = ENOMEM;
+			node = kmem_cache_alloc(slab_cache, GFP_KERNEL);
+			if (!node)
 				goto unwind;
-			}
 
 			RCU_INIT_POINTER(node->base.fence, NULL);
 			node->base.cb.func = node_retire;
@@ -804,7 +886,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			 */
 			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
 			node->base.cb.node.prev = (void *)engine;
-			atomic_inc(&ref->count);
+			__i915_active_acquire(ref);
 		}
 		GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
 
@@ -830,9 +912,9 @@ unwind:
 		atomic_dec(&ref->count);
 		intel_engine_pm_put(barrier_to_engine(node));
 
-		kmem_cache_free(global.slab_cache, node);
+		kmem_cache_free(slab_cache, node);
 	}
-	return err;
+	return -ENOMEM;
 }
 
 void i915_active_acquire_barrier(struct i915_active *ref)
@@ -874,7 +956,7 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 
 		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
-		intel_engine_pm_put_delay(engine, 1);
+		intel_engine_pm_put_delay(engine, 2);
 	}
 }
 
@@ -917,10 +999,11 @@ void i915_request_add_active_barriers(struct i915_request *rq)
  *
  * Records the new @fence as the last active fence along its timeline in
  * this active tracker, moving the tracking callbacks from the previous
- * fence onto this one. Returns the previous fence (if not already completed),
- * which the caller must ensure is executed before the new fence. To ensure
- * that the order of fences within the timeline of the i915_active_fence is
- * understood, it should be locked by the caller.
+ * fence onto this one. Gets and returns a reference to the previous fence
+ * (if not already completed), which the caller must put after making sure
+ * that it is executed before the new fence. To ensure that the order of
+ * fences within the timeline of the i915_active_fence is understood, it
+ * should be locked by the caller.
  */
 struct dma_fence *
 __i915_active_fence_set(struct i915_active_fence *active,
@@ -929,7 +1012,23 @@ __i915_active_fence_set(struct i915_active_fence *active,
 	struct dma_fence *prev;
 	unsigned long flags;
 
-	if (fence == rcu_access_pointer(active->fence))
+	/*
+	 * In case of fences embedded in i915_requests, their memory is
+	 * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
+	 * by new requests.  Then, there is a risk of passing back a pointer
+	 * to a new, completely unrelated fence that reuses the same memory
+	 * while tracked under a different active tracker.  Combined with i915
+	 * perf open/close operations that build await dependencies between
+	 * engine kernel context requests and user requests from different
+	 * timelines, this can lead to dependency loops and infinite waits.
+	 *
+	 * As a countermeasure, we try to get a reference to the active->fence
+	 * first, so if we succeed and pass it back to our user then it is not
+	 * released and potentially reused by an unrelated request before the
+	 * user has a chance to set up an await dependency on it.
+	 */
+	prev = i915_active_fence_get(active);
+	if (fence == prev)
 		return fence;
 
 	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
@@ -938,27 +1037,56 @@ __i915_active_fence_set(struct i915_active_fence *active,
 	 * Consider that we have two threads arriving (A and B), with
 	 * C already resident as the active->fence.
 	 *
-	 * A does the xchg first, and so it sees C or NULL depending
-	 * on the timing of the interrupt handler. If it is NULL, the
-	 * previous fence must have been signaled and we know that
-	 * we are first on the timeline. If it is still present,
-	 * we acquire the lock on that fence and serialise with the interrupt
-	 * handler, in the process removing it from any future interrupt
-	 * callback. A will then wait on C before executing (if present).
-	 *
-	 * As B is second, it sees A as the previous fence and so waits for
-	 * it to complete its transition and takes over the occupancy for
-	 * itself -- remembering that it needs to wait on A before executing.
+	 * Both A and B have got a reference to C or NULL, depending on the
+	 * timing of the interrupt handler.  Let's assume that if A has got C
+	 * then it has locked C first (before B).
 	 *
 	 * Note the strong ordering of the timeline also provides consistent
 	 * nesting rules for the fence->lock; the inner lock is always the
 	 * older lock.
 	 */
 	spin_lock_irqsave(fence->lock, flags);
-	prev = xchg(__active_fence_slot(active), fence);
-	if (prev) {
-		GEM_BUG_ON(prev == fence);
+	if (prev)
 		spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+
+	/*
+	 * A does the cmpxchg first, and so it sees C or NULL, as before, or
+	 * something else, depending on the timing of other threads and/or
+	 * interrupt handler.  If not the same as before then A unlocks C if
+	 * applicable and retries, starting from an attempt to get a new
+	 * active->fence.  Meanwhile, B follows the same path as A.
+	 * Once A succeeds with cmpxch, B fails again, retires, gets A from
+	 * active->fence, locks it as soon as A completes, and possibly
+	 * succeeds with cmpxchg.
+	 */
+	while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
+		if (prev) {
+			spin_unlock(prev->lock);
+			dma_fence_put(prev);
+		}
+		spin_unlock_irqrestore(fence->lock, flags);
+
+		prev = i915_active_fence_get(active);
+		GEM_BUG_ON(prev == fence);
+
+		spin_lock_irqsave(fence->lock, flags);
+		if (prev)
+			spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+	}
+
+	/*
+	 * If prev is NULL then the previous fence must have been signaled
+	 * and we know that we are first on the timeline.  If it is still
+	 * present then, having the lock on that fence already acquired, we
+	 * serialise with the interrupt handler, in the process of removing it
+	 * from any future interrupt callback.  A will then wait on C before
+	 * executing (if present).
+	 *
+	 * As B is second, it sees A as the previous fence and so waits for
+	 * it to complete its transition and takes over the occupancy for
+	 * itself -- remembering that it needs to wait on A before executing.
+	 */
+	if (prev) {
 		__list_del_entry(&active->cb.node);
 		spin_unlock(prev->lock); /* serialise with prev->cb_list */
 	}
@@ -975,11 +1103,7 @@ int i915_active_fence_set(struct i915_active_fence *active,
 	int err = 0;
 
 	/* Must maintain timeline ordering wrt previous active requests */
-	rcu_read_lock();
 	fence = __i915_active_fence_set(active, &rq->fence);
-	if (fence) /* but the previous fence may not belong to that timeline! */
-		fence = dma_fence_get_rcu(fence);
-	rcu_read_unlock();
 	if (fence) {
 		err = i915_request_await_dma_fence(rq, fence);
 		dma_fence_put(fence);
@@ -1041,7 +1165,7 @@ struct i915_active *i915_active_create(void)
 		return NULL;
 
 	kref_init(&aa->ref);
-	i915_active_init(&aa->base, auto_active, auto_retire);
+	i915_active_init(&aa->base, auto_active, auto_retire, 0);
 
 	return &aa->base;
 }
@@ -1050,27 +1174,16 @@ struct i915_active *i915_active_create(void)
 #include "selftests/i915_active.c"
 #endif
 
-static void i915_global_active_shrink(void)
+void i915_active_module_exit(void)
 {
-	kmem_cache_shrink(global.slab_cache);
+	kmem_cache_destroy(slab_cache);
 }
 
-static void i915_global_active_exit(void)
-{
-	kmem_cache_destroy(global.slab_cache);
-}
-
-static struct i915_global_active global = { {
-	.shrink = i915_global_active_shrink,
-	.exit = i915_global_active_exit,
-} };
-
-int __init i915_global_active_init(void)
+int __init i915_active_module_init(void)
 {
-	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
-	if (!global.slab_cache)
+	slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
+	if (!slab_cache)
 		return -ENOMEM;
 
-	i915_global_register(&global.base);
 	return 0;
 }