summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_active.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_active.c')
-rw-r--r--drivers/gpu/drm/i915/i915_active.c152
1 files changed, 88 insertions, 64 deletions
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 7412abf166a8..6b0c1162505a 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -92,8 +92,7 @@ static void debug_active_init(struct i915_active *ref)
static void debug_active_activate(struct i915_active *ref)
{
lockdep_assert_held(&ref->tree_lock);
- if (!atomic_read(&ref->count)) /* before the first inc */
- debug_object_activate(ref, &active_debug_desc);
+ debug_object_activate(ref, &active_debug_desc);
}
static void debug_active_deactivate(struct i915_active *ref)
@@ -213,7 +212,7 @@ active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
struct i915_active_fence *active =
container_of(cb, typeof(*active), cb);
- return cmpxchg(__active_fence_slot(active), fence, NULL) == fence;
+ return try_cmpxchg(__active_fence_slot(active), &fence, NULL);
}
static void
@@ -258,10 +257,9 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
* claimed the cache and we know that is does not match our
* idx. If, and only if, the timeline is currently zero is it
* worth competing to claim it atomically for ourselves (for
- * only the winner of that race will cmpxchg return the old
- * value of 0).
+ * only the winner of that race will cmpxchg succeed).
*/
- if (!cached && !cmpxchg64(&it->timeline, 0, idx))
+ if (!cached && try_cmpxchg64(&it->timeline, &cached, idx))
return it;
}
@@ -422,12 +420,12 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
* we can use it to substitute for the pending idle-barrer
* request that we want to emit on the kernel_context.
*/
- __active_del_barrier(ref, node_from_active(active));
- return true;
+ return __active_del_barrier(ref, node_from_active(active));
}
int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
{
+ u64 idx = i915_request_timeline(rq)->fence_context;
struct dma_fence *fence = &rq->fence;
struct i915_active_fence *active;
int err;
@@ -437,18 +435,24 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
if (err)
return err;
- active = active_instance(ref, i915_request_timeline(rq)->fence_context);
- if (!active) {
- err = -ENOMEM;
- goto out;
- }
+ do {
+ active = active_instance(ref, idx);
+ if (!active) {
+ err = -ENOMEM;
+ goto out;
+ }
- if (replace_barrier(ref, active)) {
- RCU_INIT_POINTER(active->fence, NULL);
- atomic_dec(&ref->count);
- }
- if (!__i915_active_fence_set(active, fence))
+ if (replace_barrier(ref, active)) {
+ RCU_INIT_POINTER(active->fence, NULL);
+ atomic_dec(&ref->count);
+ }
+ } while (unlikely(is_barrier(active)));
+
+ fence = __i915_active_fence_set(active, fence);
+ if (!fence)
__i915_active_acquire(ref);
+ else
+ dma_fence_put(fence);
out:
i915_active_release(ref);
@@ -467,13 +471,9 @@ __i915_active_set_fence(struct i915_active *ref,
return NULL;
}
- rcu_read_lock();
prev = __i915_active_fence_set(active, fence);
- if (prev)
- prev = dma_fence_get_rcu(prev);
- else
+ if (!prev)
__i915_active_acquire(ref);
- rcu_read_unlock();
return prev;
}
@@ -526,24 +526,6 @@ int i915_active_acquire(struct i915_active *ref)
return err;
}
-int i915_active_acquire_for_context(struct i915_active *ref, u64 idx)
-{
- struct i915_active_fence *active;
- int err;
-
- err = i915_active_acquire(ref);
- if (err)
- return err;
-
- active = active_instance(ref, idx);
- if (!active) {
- i915_active_release(ref);
- return -ENOMEM;
- }
-
- return 0; /* return with active ref */
-}
-
void i915_active_release(struct i915_active *ref)
{
debug_active_assert(ref);
@@ -1017,10 +999,11 @@ void i915_request_add_active_barriers(struct i915_request *rq)
*
* Records the new @fence as the last active fence along its timeline in
* this active tracker, moving the tracking callbacks from the previous
- * fence onto this one. Returns the previous fence (if not already completed),
- * which the caller must ensure is executed before the new fence. To ensure
- * that the order of fences within the timeline of the i915_active_fence is
- * understood, it should be locked by the caller.
+ * fence onto this one. Gets and returns a reference to the previous fence
+ * (if not already completed), which the caller must put after making sure
+ * that it is executed before the new fence. To ensure that the order of
+ * fences within the timeline of the i915_active_fence is understood, it
+ * should be locked by the caller.
*/
struct dma_fence *
__i915_active_fence_set(struct i915_active_fence *active,
@@ -1029,7 +1012,23 @@ __i915_active_fence_set(struct i915_active_fence *active,
struct dma_fence *prev;
unsigned long flags;
- if (fence == rcu_access_pointer(active->fence))
+ /*
+ * In case of fences embedded in i915_requests, their memory is
+ * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
+ * by new requests. Then, there is a risk of passing back a pointer
+ * to a new, completely unrelated fence that reuses the same memory
+ * while tracked under a different active tracker. Combined with i915
+ * perf open/close operations that build await dependencies between
+ * engine kernel context requests and user requests from different
+ * timelines, this can lead to dependency loops and infinite waits.
+ *
+ * As a countermeasure, we try to get a reference to the active->fence
+ * first, so if we succeed and pass it back to our user then it is not
+ * released and potentially reused by an unrelated request before the
+ * user has a chance to set up an await dependency on it.
+ */
+ prev = i915_active_fence_get(active);
+ if (fence == prev)
return fence;
GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
@@ -1038,27 +1037,56 @@ __i915_active_fence_set(struct i915_active_fence *active,
* Consider that we have two threads arriving (A and B), with
* C already resident as the active->fence.
*
- * A does the xchg first, and so it sees C or NULL depending
- * on the timing of the interrupt handler. If it is NULL, the
- * previous fence must have been signaled and we know that
- * we are first on the timeline. If it is still present,
- * we acquire the lock on that fence and serialise with the interrupt
- * handler, in the process removing it from any future interrupt
- * callback. A will then wait on C before executing (if present).
- *
- * As B is second, it sees A as the previous fence and so waits for
- * it to complete its transition and takes over the occupancy for
- * itself -- remembering that it needs to wait on A before executing.
+ * Both A and B have got a reference to C or NULL, depending on the
+ * timing of the interrupt handler. Let's assume that if A has got C
+ * then it has locked C first (before B).
*
* Note the strong ordering of the timeline also provides consistent
* nesting rules for the fence->lock; the inner lock is always the
* older lock.
*/
spin_lock_irqsave(fence->lock, flags);
- prev = xchg(__active_fence_slot(active), fence);
- if (prev) {
- GEM_BUG_ON(prev == fence);
+ if (prev)
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+
+ /*
+ * A does the cmpxchg first, and so it sees C or NULL, as before, or
+ * something else, depending on the timing of other threads and/or
+ * interrupt handler. If not the same as before then A unlocks C if
+ * applicable and retries, starting from an attempt to get a new
+ * active->fence. Meanwhile, B follows the same path as A.
+ * Once A succeeds with cmpxch, B fails again, retires, gets A from
+ * active->fence, locks it as soon as A completes, and possibly
+ * succeeds with cmpxchg.
+ */
+ while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
+ if (prev) {
+ spin_unlock(prev->lock);
+ dma_fence_put(prev);
+ }
+ spin_unlock_irqrestore(fence->lock, flags);
+
+ prev = i915_active_fence_get(active);
+ GEM_BUG_ON(prev == fence);
+
+ spin_lock_irqsave(fence->lock, flags);
+ if (prev)
+ spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+ }
+
+ /*
+ * If prev is NULL then the previous fence must have been signaled
+ * and we know that we are first on the timeline. If it is still
+ * present then, having the lock on that fence already acquired, we
+ * serialise with the interrupt handler, in the process of removing it
+ * from any future interrupt callback. A will then wait on C before
+ * executing (if present).
+ *
+ * As B is second, it sees A as the previous fence and so waits for
+ * it to complete its transition and takes over the occupancy for
+ * itself -- remembering that it needs to wait on A before executing.
+ */
+ if (prev) {
__list_del_entry(&active->cb.node);
spin_unlock(prev->lock); /* serialise with prev->cb_list */
}
@@ -1075,11 +1103,7 @@ int i915_active_fence_set(struct i915_active_fence *active,
int err = 0;
/* Must maintain timeline ordering wrt previous active requests */
- rcu_read_lock();
fence = __i915_active_fence_set(active, &rq->fence);
- if (fence) /* but the previous fence may not belong to that timeline! */
- fence = dma_fence_get_rcu(fence);
- rcu_read_unlock();
if (fence) {
err = i915_request_await_dma_fence(rq, fence);
dma_fence_put(fence);