From 12ca695d2c1ed26b2dcbb528b42813bd0f216cfc Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 23 Mar 2021 16:49:50 +0100 Subject: drm/i915: Do not share hwsp across contexts any more, v8. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of sharing pages with breadcrumbs, give each timeline a single page. This allows unrelated timelines not to share locks any more during command submission. As an additional benefit, seqno wraparound no longer requires i915_vma_pin, which means we no longer need to worry about a potential -EDEADLK at a point where we are ready to submit. Changes since v1: - Fix erroneous i915_vma_acquire that should be a i915_vma_release (ickle). - Extra check for completion in intel_read_hwsp(). Changes since v2: - Fix inconsistent indent in hwsp_alloc() (kbuild) - memset entire cacheline to 0. Changes since v3: - Do same in intel_timeline_reset_seqno(), and clflush for good measure. Changes since v4: - Use refcounting on timeline, instead of relying on i915_active. - Fix waiting on kernel requests. Changes since v5: - Bump amount of slots to maximum (256), for best wraparounds. - Add hwsp_offset to i915_request to fix potential wraparound hang. - Ensure timeline wrap test works with the changes. - Assign hwsp in intel_timeline_read_hwsp() within the rcu lock to fix a hang. Changes since v6: - Rename i915_request_active_offset to i915_request_active_seqno(), and elaborate the function. (tvrtko) Changes since v7: - Move hunk to where it belongs. (jekstrand) - Replace CACHELINE_BYTES with TIMELINE_SEQNO_BYTES. (jekstrand) Signed-off-by: Maarten Lankhorst Reviewed-by: Thomas Hellström #v1 Reported-by: kernel test robot Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210323155059.628690-2-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/i915_request.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 22e39d938f17..021535f2a718 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -863,7 +863,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->fence.seqno = seqno; RCU_INIT_POINTER(rq->timeline, tl); - RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; GEM_BUG_ON(__i915_request_is_complete(rq)); @@ -1108,9 +1107,6 @@ emit_semaphore_wait(struct i915_request *to, if (i915_request_has_initial_breadcrumb(to)) goto await_fence; - if (!rcu_access_pointer(from->hwsp_cacheline)) - goto await_fence; - /* * If this or its dependents are waiting on an external fence * that may fail catastrophically, then we want to avoid using -- cgit From c10e4a7960f3032b0313c4b684e0b4025b4c138d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Feb 2021 08:56:22 +0000 Subject: drm/i915: Protect against request freeing during cancellation on wedging As soon as we mark a request as completed, it may be retired. So when cancelling a request and marking it complete, make sure we first keep a reference to the request. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20210201085715.27435-4-chris@chris-wilson.co.uk Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_request.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 22e39d938f17..8b4223325188 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -514,15 +514,20 @@ void i915_request_set_error_once(struct i915_request *rq, int error) } while (!try_cmpxchg(&rq->fence.error, &old, error)); } -void i915_request_mark_eio(struct i915_request *rq) +struct i915_request *i915_request_mark_eio(struct i915_request *rq) { if (__i915_request_is_complete(rq)) - return; + return NULL; GEM_BUG_ON(i915_request_signaled(rq)); + /* As soon as the request is completed, it may be retired */ + rq = i915_request_get(rq); + i915_request_set_error_once(rq, -EIO); i915_request_mark_complete(rq); + + return rq; } bool __i915_request_submit(struct i915_request *request) -- cgit From 7dbc19da5daf45cb16f3bff900b69d44c512c333 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 24 Mar 2021 12:13:29 +0000 Subject: drm/i915: Extract active lookup engine to a helper Move active engine lookup to exported i915_request_active_engine. Signed-off-by: Tvrtko Ursulin Reviewed-by: Matthew Auld [danvet: Slight rebase, engine->sched.lock is still called engine->active.lock.] Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210324121335.2307063-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_request.c | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 021535f2a718..d23186016fc6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -244,6 +244,50 @@ static void __i915_request_fill(struct i915_request *rq, u8 val) memset(vaddr + head, val, rq->postfix - head); } +/** + * i915_request_active_engine + * @rq: request to inspect + * @active: pointer in which to return the active engine + * + * Fills the currently active engine to the @active pointer if the request + * is active and still not completed. + * + * Returns true if request was active or false otherwise. + */ +bool +i915_request_active_engine(struct i915_request *rq, + struct intel_engine_cs **active) +{ + struct intel_engine_cs *engine, *locked; + bool ret = false; + + /* + * Serialise with __i915_request_submit() so that it sees + * is-banned?, or we know the request is already inflight. + * + * Note that rq->engine is unstable, and so we double + * check that we have acquired the lock on the final engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->active.lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->active.lock); + locked = engine; + spin_lock(&locked->active.lock); + } + + if (i915_request_is_active(rq)) { + if (!__i915_request_is_complete(rq)) + *active = locked; + ret = true; + } + + spin_unlock_irq(&locked->active.lock); + + return ret; +} + + static void remove_from_engine(struct i915_request *rq) { struct intel_engine_cs *engine, *locked; -- cgit From 38b237eab2bc7feac87a4c9d870368e935a0091b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Mar 2021 12:13:30 +0000 Subject: drm/i915: Individual request cancellation Currently, we cancel outstanding requests within a context when the context is closed. We may also want to cancel individual requests using the same graceful preemption mechanism. v2 (Tvrtko): * Cancel waiters carefully considering no timeline lock and RCU. * Fixed selftests. v3 (Tvrtko): * Remove error propagation to waiters for now. v4 (Tvrtko): * Rebase for extracted i915_request_active_engine. (Matt) Signed-off-by: Chris Wilson Signed-off-by: Tvrtko Ursulin Reviewed-by: Matthew Auld [danvet: Resolve conflict because intel_engine_flush_scheduler is still called intel_engine_flush_submission] Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210324121335.2307063-3-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_request.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d23186016fc6..a031b86f8508 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -33,7 +33,10 @@ #include "gem/i915_gem_context.h" #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" +#include "gt/intel_engine.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_gpu_commands.h" +#include "gt/intel_reset.h" #include "gt/intel_ring.h" #include "gt/intel_rps.h" @@ -542,20 +545,22 @@ void __i915_request_skip(struct i915_request *rq) rq->infix = rq->postfix; } -void i915_request_set_error_once(struct i915_request *rq, int error) +bool i915_request_set_error_once(struct i915_request *rq, int error) { int old; GEM_BUG_ON(!IS_ERR_VALUE((long)error)); if (i915_request_signaled(rq)) - return; + return false; old = READ_ONCE(rq->fence.error); do { if (fatal_error(old)) - return; + return false; } while (!try_cmpxchg(&rq->fence.error, &old, error)); + + return true; } void i915_request_mark_eio(struct i915_request *rq) @@ -722,6 +727,28 @@ void i915_request_unsubmit(struct i915_request *request) spin_unlock_irqrestore(&engine->active.lock, flags); } +static void __cancel_request(struct i915_request *rq) +{ + struct intel_engine_cs *engine = NULL; + + i915_request_active_engine(rq, &engine); + + if (engine && intel_engine_pulse(engine)) + intel_gt_handle_error(engine->gt, engine->mask, 0, + "request cancellation by %s", + current->comm); +} + +void i915_request_cancel(struct i915_request *rq, int error) +{ + if (!i915_request_set_error_once(rq, error)) + return; + + set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); + + __cancel_request(rq); +} + static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { -- cgit From 9b4d0598ee940df33ea6cbbba8c80e951223131b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 24 Mar 2021 12:13:33 +0000 Subject: drm/i915: Request watchdog infrastructure Prepares the plumbing for setting request/fence expiration time. All code is put in place but is never activated due yet missing ability to actually configure the timer. Outline of the basic operation: A timer is started when request is ready for execution. If the request completes (retires) before the timer fires, timer is cancelled and nothing further happens. If the timer fires request is added to a lockless list and worker queued. Purpose of this is twofold: a) It allows request cancellation from a more friendly context and b) coalesces multiple expirations into a single event of consuming the list. Worker locklessly consumes the list of expired requests and cancels them all using previous added i915_request_cancel(). Associated timeout value is stored in rq->context.watchdog.timeout_us. v2: * Log expiration. v3: * Include more information about user timeline in the log message. v4: * Remove obsolete comment and fix formatting. (Matt) Signed-off-by: Tvrtko Ursulin Cc: Daniel Vetter Reviewed-by: Matthew Auld Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210324121335.2307063-6-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_request.c | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a031b86f8508..63968d163c14 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -321,6 +321,53 @@ static void remove_from_engine(struct i915_request *rq) __notify_execute_cb_imm(rq); } +static void __rq_init_watchdog(struct i915_request *rq) +{ + rq->watchdog.timer.function = NULL; +} + +static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer) +{ + struct i915_request *rq = + container_of(hrtimer, struct i915_request, watchdog.timer); + struct intel_gt *gt = rq->engine->gt; + + if (!i915_request_completed(rq)) { + if (llist_add(&rq->watchdog.link, >->watchdog.list)) + schedule_work(>->watchdog.work); + } else { + i915_request_put(rq); + } + + return HRTIMER_NORESTART; +} + +static void __rq_arm_watchdog(struct i915_request *rq) +{ + struct i915_request_watchdog *wdg = &rq->watchdog; + struct intel_context *ce = rq->context; + + if (!ce->watchdog.timeout_us) + return; + + hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + wdg->timer.function = __rq_watchdog_expired; + hrtimer_start_range_ns(&wdg->timer, + ns_to_ktime(ce->watchdog.timeout_us * + NSEC_PER_USEC), + NSEC_PER_MSEC, + HRTIMER_MODE_REL); + i915_request_get(rq); +} + +static void __rq_cancel_watchdog(struct i915_request *rq) +{ + struct i915_request_watchdog *wdg = &rq->watchdog; + + if (wdg->timer.function && hrtimer_try_to_cancel(&wdg->timer) > 0) + i915_request_put(rq); +} + bool i915_request_retire(struct i915_request *rq) { if (!__i915_request_is_complete(rq)) @@ -332,6 +379,8 @@ bool i915_request_retire(struct i915_request *rq) trace_i915_request_retire(rq); i915_request_mark_complete(rq); + __rq_cancel_watchdog(rq); + /* * We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -761,6 +810,8 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) if (unlikely(fence->error)) i915_request_set_error_once(request, fence->error); + else + __rq_arm_watchdog(request); /* * We need to serialize use of the submit_request() callback @@ -947,6 +998,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) /* No zalloc, everything must be cleared after use */ rq->batch = NULL; + __rq_init_watchdog(rq); GEM_BUG_ON(rq->capture_list); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); -- cgit