summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2019-10-04 14:40:00 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2019-10-04 15:39:12 +0100
commitb1e3177bd1d8f41e2a9cc847e56a96cdc0eefe62 (patch)
tree9af22565533f12868a015e18e51406d54773e08a /drivers/gpu/drm/i915/i915_gem.c
parent274cbf20fd108fa26d0497282b102e00371210fd (diff)
drm/i915: Coordinate i915_active with its own mutex
Forgo the struct_mutex serialisation for i915_active, and interpose its own mutex handling for active/retire. This is a multi-layered sleight-of-hand. First, we had to ensure that no active/retire callbacks accidentally inverted the mutex ordering rules, nor assumed that they were themselves serialised by struct_mutex. More challenging though, is the rule over updating elements of the active rbtree. Instead of the whole i915_active now being serialised by struct_mutex, allocations/rotations of the tree are serialised by the i915_active.mutex and individual nodes are serialised by the caller using the i915_timeline.mutex (we need to use nested spinlocks to interact with the dma_fence callback lists). The pain point here is that instead of a single mutex around execbuf, we now have to take a mutex for active tracker (one for each vma, context, etc) and a couple of spinlocks for each fence update. The improvement in fine grained locking allowing for multiple concurrent clients (eventually!) should be worth it in typical loads. v2: Add some comments that barely elucidate anything :( Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-6-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c42
1 files changed, 26 insertions, 16 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc5c618f6c19..b0aa0a7c680f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -892,28 +892,38 @@ wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout)
spin_lock_irqsave(&timelines->lock, flags);
list_for_each_entry(tl, &timelines->active_list, link) {
- struct i915_request *rq;
+ struct dma_fence *fence;
- rq = i915_active_request_get_unlocked(&tl->last_request);
- if (!rq)
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
continue;
spin_unlock_irqrestore(&timelines->lock, flags);
- /*
- * "Race-to-idle".
- *
- * Switching to the kernel context is often used a synchronous
- * step prior to idling, e.g. in suspend for flushing all
- * current operations to memory before sleeping. These we
- * want to complete as quickly as possible to avoid prolonged
- * stalls, so allow the gpu to boost to maximum clocks.
- */
- if (wait & I915_WAIT_FOR_IDLE_BOOST)
- gen6_rps_boost(rq);
+ if (!dma_fence_is_i915(fence)) {
+ timeout = dma_fence_wait_timeout(fence,
+ flags & I915_WAIT_INTERRUPTIBLE,
+ timeout);
+ } else {
+ struct i915_request *rq = to_request(fence);
+
+ /*
+ * "Race-to-idle".
+ *
+ * Switching to the kernel context is often used as
+ * a synchronous step prior to idling, e.g. in suspend
+ * for flushing all current operations to memory before
+ * sleeping. These we want to complete as quickly as
+ * possible to avoid prolonged stalls, so allow the gpu
+ * to boost to maximum clocks.
+ */
+ if (flags & I915_WAIT_FOR_IDLE_BOOST)
+ gen6_rps_boost(rq);
+
+ timeout = i915_request_wait(rq, flags, timeout);
+ }
- timeout = i915_request_wait(rq, wait, timeout);
- i915_request_put(rq);
+ dma_fence_put(fence);
if (timeout < 0)
return timeout;