From 10be98a77c558f8cfb823cd2777171fbb35040f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 May 2019 10:29:49 +0100 Subject: drm/i915: Move more GEM objects under gem/ Continuing the theme of separating out the GEM clutter. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190528092956.14910-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 18b34b0bf872..da1e6984a8cc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,9 @@ #include #include +#include "gem/i915_gem_context.h" +#include "gt/intel_context.h" + #include "i915_active.h" #include "i915_drv.h" #include "i915_globals.h" -- cgit From f4d57d838c48ebb123f9032cca0e5697c457868f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 10 Jun 2019 11:36:10 +0100 Subject: drm/i915: Allow interrupts when taking the timeline->mutex Before we commit ourselves to writing commands into the ringbuffer and submitting the request, allow signals to interrupt acquisition of the timeline mutex. We allow ourselves to be interrupted at any time later if we need to block for space in the ring, anyway. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190610103610.19883-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index da1e6984a8cc..e9b59eea4f10 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -784,8 +784,11 @@ struct i915_request * i915_request_create(struct intel_context *ce) { struct i915_request *rq; + int err; - intel_context_timeline_lock(ce); + err = intel_context_timeline_lock(ce); + if (err) + return ERR_PTR(err); /* Move our oldest request to the slab-cache (if not in use!) */ rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); -- cgit From 33df8a7697a08ec96811a1e9bbce45c7cdbbc316 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 12 Jun 2019 09:52:46 +0100 Subject: drm/i915: Prevent lock-cycles between GPU waits and GPU resets We cannot allow ourselves to wait on the GPU while holding any lock as we may need to reset the GPU. While there is not an explicit lock between the two operations, lockdep cannot detect the dependency. So let's tell lockdep about the wait/reset dependency with an explicit lockmap. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190612085246.16374-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e9b59eea4f10..1cbc3ef4fc27 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1444,6 +1444,7 @@ long i915_request_wait(struct i915_request *rq, return -ETIME; trace_i915_request_wait_begin(rq, flags); + lock_map_acquire(&rq->i915->gt.reset_lockmap); /* * Optimistic spin before touching IRQs. @@ -1517,6 +1518,7 @@ long i915_request_wait(struct i915_request *rq, dma_fence_remove_callback(&rq->fence, &wait.cb); out: + lock_map_release(&rq->i915->gt.reset_lockmap); trace_i915_request_wait_end(rq); return timeout; } -- cgit From 6e4e9708614aaf743fd9eb29cc2738ab7b501c3f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 14 Jun 2019 12:10:53 +0100 Subject: drm/i915: Execute signal callbacks from no-op i915_request_wait If we enter i915_request_wait() with an already completed request, but unsignaled dma-fence, signal the fence before returning. This allows us to execute any of the signal callbacks at the earliest opportunity. v2: Also signal after busyspin success Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190614111053.25615-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1cbc3ef4fc27..5ee1ef92a9d9 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1437,7 +1437,7 @@ long i915_request_wait(struct i915_request *rq, might_sleep(); GEM_BUG_ON(timeout < 0); - if (i915_request_completed(rq)) + if (dma_fence_is_signaled(&rq->fence)) return timeout; if (!timeout) @@ -1470,8 +1470,10 @@ long i915_request_wait(struct i915_request *rq, * duration, which we currently lack. */ if (CONFIG_DRM_I915_SPIN_REQUEST && - __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) + __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) { + dma_fence_signal(&rq->fence); goto out; + } /* * This client is about to stall waiting for the GPU. In many cases -- cgit From 84383d2e8d7c50dc344a1acf7b5b4d32cd1569fc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 14 Jun 2019 08:09:46 +0100 Subject: drm/i915: Refine i915_reset.lock_map We already use a mutex to serialise i915_reset() and wedging, so all we need it to link that into i915_request_wait() and we have our lock cycle detection. v2.5: Take error mutex for selftests Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190614071023.17929-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 5ee1ef92a9d9..da76e4d1c7f1 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1444,7 +1444,15 @@ long i915_request_wait(struct i915_request *rq, return -ETIME; trace_i915_request_wait_begin(rq, flags); - lock_map_acquire(&rq->i915->gt.reset_lockmap); + + /* + * We must never wait on the GPU while holding a lock as we + * may need to perform a GPU reset. So while we don't need to + * serialise wait/reset with an explicit lock, we do want + * lockdep to detect potential dependency cycles. + */ + mutex_acquire(&rq->i915->gpu_error.wedge_mutex.dep_map, + 0, 0, _THIS_IP_); /* * Optimistic spin before touching IRQs. @@ -1520,7 +1528,7 @@ long i915_request_wait(struct i915_request *rq, dma_fence_remove_callback(&rq->fence, &wait.cb); out: - lock_map_release(&rq->i915->gt.reset_lockmap); + mutex_release(&rq->i915->gpu_error.wedge_mutex.dep_map, 0, _THIS_IP_); trace_i915_request_wait_end(rq); return timeout; } -- cgit From ce476c80b8bfa8a8e4c9182cdb686c5aea2431a6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 14 Jun 2019 17:46:04 +0100 Subject: drm/i915: Keep contexts pinned until after the next kernel context switch We need to keep the context image pinned in memory until after the GPU has finished writing into it. Since it continues to write as we signal the final breadcrumb, we need to keep it pinned until the request after it is complete. Currently we know the order in which requests execute on each engine, and so to remove that presumption we need to identify a request/context-switch we know must occur after our completion. Any request queued after the signal must imply a context switch, for simplicity we use a fresh request from the kernel context. The sequence of operations for keeping the context pinned until saved is: - On context activation, we preallocate a node for each physical engine the context may operate on. This is to avoid allocations during unpinning, which may be from inside FS_RECLAIM context (aka the shrinker) - On context deactivation on retirement of the last active request (which is before we know the context has been saved), we add the preallocated node onto a barrier list on each engine - On engine idling, we emit a switch to kernel context. When this switch completes, we know that all previous contexts must have been saved, and so on retiring this request we can finally unpin all the contexts that were marked as deactivated prior to the switch. We can enhance this in future by flushing all the idle contexts on a regular heartbeat pulse of a switch to kernel context, which will also be used to check for hung engines. v2: intel_context_active_acquire/_release Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190614164606.15633-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index da76e4d1c7f1..38d112d8aba7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -213,18 +213,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine, spin_unlock(&rq->lock); local_irq_enable(); - - /* - * The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. However, since we - * cannot take the required locks at i915_request_submit() we - * defer the unpinning of the active context to now, retirement of - * the subsequent request. - */ - if (engine->last_retired_context) - intel_context_unpin(engine->last_retired_context); - engine->last_retired_context = rq->hw_context; } static void __retire_engine_upto(struct intel_engine_cs *engine, @@ -759,9 +747,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->infix = rq->ring->emit; /* end of header; start of user payload */ - /* Keep a second pin for the dual retirement along engine and ring */ - __intel_context_pin(ce); - intel_context_mark_active(ce); return rq; -- cgit From 9db0c5caa7471b463627e1a87c58de874be1c55f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 14 Jun 2019 17:46:05 +0100 Subject: drm/i915: Stop retiring along engine We no longer track the execution order along the engine and so no longer need to enforce ordering of retire along the engine. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190614164606.15633-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 131 +++++++++++++++--------------------- 1 file changed, 53 insertions(+), 78 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 38d112d8aba7..c99136f78af9 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -183,72 +183,23 @@ static void free_capture_list(struct i915_request *request) } } -static void __retire_engine_request(struct intel_engine_cs *engine, - struct i915_request *rq) -{ - GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n", - __func__, engine->name, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); - - GEM_BUG_ON(!i915_request_completed(rq)); - - local_irq_disable(); - - spin_lock(&engine->timeline.lock); - GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests)); - list_del_init(&rq->link); - spin_unlock(&engine->timeline.lock); - - spin_lock(&rq->lock); - i915_request_mark_complete(rq); - if (!i915_request_signaled(rq)) - dma_fence_signal_locked(&rq->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) - i915_request_cancel_breadcrumb(rq); - if (rq->waitboost) { - GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); - atomic_dec(&rq->i915->gt_pm.rps.num_waiters); - } - spin_unlock(&rq->lock); - - local_irq_enable(); -} - -static void __retire_engine_upto(struct intel_engine_cs *engine, - struct i915_request *rq) -{ - struct i915_request *tmp; - - if (list_empty(&rq->link)) - return; - - do { - tmp = list_first_entry(&engine->timeline.requests, - typeof(*tmp), link); - - GEM_BUG_ON(tmp->engine != engine); - __retire_engine_request(engine, tmp); - } while (tmp != rq); -} - -static void i915_request_retire(struct i915_request *request) +static bool i915_request_retire(struct i915_request *rq) { struct i915_active_request *active, *next; - GEM_TRACE("%s fence %llx:%lld, current %d\n", - request->engine->name, - request->fence.context, request->fence.seqno, - hwsp_seqno(request)); + lockdep_assert_held(&rq->i915->drm.struct_mutex); + if (!i915_request_completed(rq)) + return false; - lockdep_assert_held(&request->i915->drm.struct_mutex); - GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); - GEM_BUG_ON(!i915_request_completed(request)); + GEM_TRACE("%s fence %llx:%lld, current %d\n", + rq->engine->name, + rq->fence.context, rq->fence.seqno, + hwsp_seqno(rq)); - trace_i915_request_retire(request); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + trace_i915_request_retire(rq); - advance_ring(request); - free_capture_list(request); + advance_ring(rq); /* * Walk through the active list, calling retire on each. This allows @@ -260,7 +211,7 @@ static void i915_request_retire(struct i915_request *request) * pass along the auxiliary information (to avoid dereferencing * the node after the callback). */ - list_for_each_entry_safe(active, next, &request->active_list, link) { + list_for_each_entry_safe(active, next, &rq->active_list, link) { /* * In microbenchmarks or focusing upon time inside the kernel, * we may spend an inordinate amount of time simply handling @@ -276,18 +227,39 @@ static void i915_request_retire(struct i915_request *request) INIT_LIST_HEAD(&active->link); RCU_INIT_POINTER(active->request, NULL); - active->retire(active, request); + active->retire(active, rq); + } + + local_irq_disable(); + + spin_lock(&rq->engine->timeline.lock); + list_del(&rq->link); + spin_unlock(&rq->engine->timeline.lock); + + spin_lock(&rq->lock); + i915_request_mark_complete(rq); + if (!i915_request_signaled(rq)) + dma_fence_signal_locked(&rq->fence); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) + i915_request_cancel_breadcrumb(rq); + if (rq->waitboost) { + GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); + atomic_dec(&rq->i915->gt_pm.rps.num_waiters); } + spin_unlock(&rq->lock); + + local_irq_enable(); - i915_request_remove_from_client(request); + intel_context_exit(rq->hw_context); + intel_context_unpin(rq->hw_context); - __retire_engine_upto(request->engine, request); + i915_request_remove_from_client(rq); - intel_context_exit(request->hw_context); - intel_context_unpin(request->hw_context); + free_capture_list(rq); + i915_sched_node_fini(&rq->sched); + i915_request_put(rq); - i915_sched_node_fini(&request->sched); - i915_request_put(request); + return true; } void i915_request_retire_upto(struct i915_request *rq) @@ -309,9 +281,7 @@ void i915_request_retire_upto(struct i915_request *rq) do { tmp = list_first_entry(&ring->request_list, typeof(*tmp), ring_link); - - i915_request_retire(tmp); - } while (tmp != rq); + } while (i915_request_retire(tmp) && tmp != rq); } static void irq_execute_cb(struct irq_work *wrk) @@ -600,12 +570,9 @@ static void ring_retire_requests(struct intel_ring *ring) { struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) { - if (!i915_request_completed(rq)) + list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) + if (!i915_request_retire(rq)) break; - - i915_request_retire(rq); - } } static noinline struct i915_request * @@ -620,6 +587,15 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp) if (!gfpflags_allow_blocking(gfp)) goto out; + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + i915_request_retire(rq); + + rq = kmem_cache_alloc(global.slab_requests, + gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (rq) + return rq; + /* Ratelimit ourselves to prevent oom from malicious clients */ rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); cond_synchronize_rcu(rq->rcustate); @@ -777,8 +753,7 @@ i915_request_create(struct intel_context *ce) /* Move our oldest request to the slab-cache (if not in use!) */ rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && - i915_request_completed(rq)) + if (!list_is_last(&rq->ring_link, &ce->ring->request_list)) i915_request_retire(rq); intel_context_enter(ce); -- cgit From 422d7df4f090bbbc4d49e66d533a259ba63ec70d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 14 Jun 2019 17:46:06 +0100 Subject: drm/i915: Replace engine->timeline with a plain list To continue the onslaught of removing the assumption of a global execution ordering, another casualty is the engine->timeline. Without an actual timeline to track, it is overkill and we can replace it with a much less grand plain list. We still need a list of requests inflight, for the simple purpose of finding inflight requests (for retiring, resetting, preemption etc). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190614164606.15633-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 43 +++++++++++-------------------------- 1 file changed, 13 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c99136f78af9..9819483d1b5d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -232,9 +232,9 @@ static bool i915_request_retire(struct i915_request *rq) local_irq_disable(); - spin_lock(&rq->engine->timeline.lock); - list_del(&rq->link); - spin_unlock(&rq->engine->timeline.lock); + spin_lock(&rq->engine->active.lock); + list_del(&rq->sched.link); + spin_unlock(&rq->engine->active.lock); spin_lock(&rq->lock); i915_request_mark_complete(rq); @@ -254,6 +254,7 @@ static bool i915_request_retire(struct i915_request *rq) intel_context_unpin(rq->hw_context); i915_request_remove_from_client(rq); + list_del(&rq->link); free_capture_list(rq); i915_sched_node_fini(&rq->sched); @@ -373,28 +374,17 @@ __i915_request_await_execution(struct i915_request *rq, return 0; } -static void move_to_timeline(struct i915_request *request, - struct i915_timeline *timeline) -{ - GEM_BUG_ON(request->timeline == &request->engine->timeline); - lockdep_assert_held(&request->engine->timeline.lock); - - spin_lock(&request->timeline->lock); - list_move_tail(&request->link, &timeline->requests); - spin_unlock(&request->timeline->lock); -} - void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld -> current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, request->fence.context, request->fence.seqno, hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); @@ -422,6 +412,8 @@ void __i915_request_submit(struct i915_request *request) /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + list_move_tail(&request->sched.link, &engine->active.requests); + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); @@ -437,9 +429,6 @@ void __i915_request_submit(struct i915_request *request) engine->emit_fini_breadcrumb(request, request->ring->vaddr + request->postfix); - /* Transfer from per-context onto the global per-engine timeline */ - move_to_timeline(request, &engine->timeline); - engine->serial++; trace_i915_request_execute(request); @@ -451,11 +440,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -468,7 +457,7 @@ void __i915_request_unsubmit(struct i915_request *request) hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); /* * Only unwind in reverse order, required so that the per-context list @@ -486,9 +475,6 @@ void __i915_request_unsubmit(struct i915_request *request) spin_unlock(&request->lock); - /* Transfer back from the global per-engine timeline to per-context */ - move_to_timeline(request, request->timeline); - /* We've already spun, don't charge on resubmitting. */ if (request->sched.semaphores && i915_request_started(request)) { request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE; @@ -510,11 +496,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static int __i915_sw_fence_call @@ -669,7 +655,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->engine = ce->engine; rq->ring = ce->ring; rq->timeline = tl; - GEM_BUG_ON(rq->timeline == &ce->engine->timeline); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -1136,9 +1121,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) 0); } - spin_lock_irq(&timeline->lock); list_add_tail(&rq->link, &timeline->requests); - spin_unlock_irq(&timeline->lock); /* * Make sure that no request gazumped us - if it was allocated after -- cgit From ef78f7b18726578fbabdeb8719f161f48a34d85d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 18 Jun 2019 13:58:58 +0100 Subject: drm/i915: Use drm_gem_object.resv Since commit 1ba627148ef5 ("drm: Add reservation_object to drm_gem_object"), struct drm_gem_object grew its own builtin reservation_object rendering our own private one bloat. Remove our redundant reservation_object and point into obj->base.resv instead. References: 1ba627148ef5 ("drm: Add reservation_object to drm_gem_object") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Cc: Mika Kuoppala Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190618125858.7295-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9819483d1b5d..7418e8f1ed6a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1027,7 +1027,7 @@ i915_request_await_object(struct i915_request *to, struct dma_fence **shared; unsigned int count, i; - ret = reservation_object_get_fences_rcu(obj->resv, + ret = reservation_object_get_fences_rcu(obj->base.resv, &excl, &count, &shared); if (ret) return ret; @@ -1044,7 +1044,7 @@ i915_request_await_object(struct i915_request *to, dma_fence_put(shared[i]); kfree(shared); } else { - excl = reservation_object_get_excl_rcu(obj->resv); + excl = reservation_object_get_excl_rcu(obj->base.resv); } if (excl) { -- cgit From 44d89409a12eb8333735958509d7d591b461d13d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 18 Jun 2019 08:41:35 +0100 Subject: drm/i915: Make the semaphore saturation mask global The idea behind keeping the saturation mask local to a context backfired spectacularly. The premise with the local mask was that we would be more proactive in attempting to use semaphores after each time the context idled, and that all new contexts would attempt to use semaphores ignoring the current state of the system. This turns out to be horribly optimistic. If the system state is still oversaturated and the existing workloads have all stopped using semaphores, the new workloads would attempt to use semaphores and be deprioritised behind real work. The new contexts would not switch off using semaphores until their initial batch of low priority work had completed. Given sufficient backload load of equal user priority, this would completely starve the new work of any GPU time. To compensate, remove the local tracking in favour of keeping it as global state on the engine -- once the system is saturated and semaphores are disabled, everyone stops attempting to use semaphores until the system is idle again. One of the reason for preferring local context tracking was that it worked with virtual engines, so for switching to global state we could either do a complete check of all the virtual siblings or simply disable semaphores for those requests. This takes the simpler approach of disabling semaphores on virtual engines. The downside is that the decision that the engine is saturated is a local measure -- we are only checking whether or not this context was scheduled in a timely fashion, it may be legitimately delayed due to user priorities. We still have the same dilemma though, that we do not want to employ the semaphore poll unless it will be used. v2: Explain why we need to assume the worst wrt virtual engines. Fixes: ca6e56f654e7 ("drm/i915: Disable semaphore busywaits on saturated systems") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Dmitry Rogozhkin Cc: Dmitry Ermilov Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190618074153.16055-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7418e8f1ed6a..0c2b53b8a3d1 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -407,7 +407,7 @@ void __i915_request_submit(struct i915_request *request) */ if (request->sched.semaphores && i915_sw_fence_signaled(&request->semaphore)) - request->hw_context->saturated |= request->sched.semaphores; + engine->saturated |= request->sched.semaphores; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -787,7 +787,7 @@ already_busywaiting(struct i915_request *rq) * * See the are-we-too-late? check in __i915_request_submit(). */ - return rq->sched.semaphores | rq->hw_context->saturated; + return rq->sched.semaphores | rq->engine->saturated; } static int -- cgit From 2f5309452dc044a133c36c6e75170eb5f7450088 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 18 Jun 2019 08:41:30 +0100 Subject: drm/i915: Stop passing I915_WAIT_LOCKED to i915_request_wait() Since commit eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex"), the I915_WAIT_LOCKED flags passed to i915_request_wait() has been defunct. Now go ahead and remove it from all callers. References: eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190618074153.16055-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0c2b53b8a3d1..a195a92d0105 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1360,10 +1360,6 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an * unbounded wait). * - * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED - * in via the flags, and vice versa if the struct_mutex is not held, the caller - * must not specify that the wait is locked. - * * Returns the remaining time (in jiffies) if the request completed, which may * be zero or -ETIME if the request is unfinished after the timeout expires. * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is -- cgit