summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_breadcrumbs.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-08-09 17:47:51 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2016-08-10 10:37:35 +0100
commit83348ba84ee0d5d4d982e5382bfbc8b2a2d05e75 (patch)
tree19ef4651cda72ff05599d075b53ffc9213f6b503 /drivers/gpu/drm/i915/intel_breadcrumbs.c
parent70cb472c6d7a3575a9b3fc7e0188384b7ca3d705 (diff)
drm/i915: Move missed interrupt detection from hangcheck to breadcrumbs
In commit 2529d57050af ("drm/i915: Drop racy markup of missed-irqs from idle-worker") the racy detection of missed interrupts was removed when we went idle. This however opened up the issue that the stuck waiters were not being reported, causing a test case failure. If we move the stuck waiter detection out of hangcheck and into the breadcrumb mechanims (i.e. the waiter) itself, we can avoid this issue entirely. This leaves hangcheck looking for a stuck GPU (inspecting for request advancement and HEAD motion), and breadcrumbs looking for a stuck waiter - hopefully make both easier to understand by their segregation. v2: Reduce the error message as we now run independently of hangcheck, and the hanging batch used by igt also counts as a stuck waiter causing extra warnings in dmesg. v3: Move the breadcrumb's hangcheck kickstart to the first missed wait. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97104 Fixes: 2529d57050af (waiter"drm/i915: Drop racy markup of missed-irqs...") Testcase: igt/drv_missed_irq Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1470761272-1245-2-git-send-email-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_breadcrumbs.c')
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c69
1 files changed, 47 insertions, 22 deletions
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 90867446f1a5..7be9af1d5424 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -26,6 +26,40 @@
#include "i915_drv.h"
+static void intel_breadcrumbs_hangcheck(unsigned long data)
+{
+ struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ if (!b->irq_enabled)
+ return;
+
+ if (time_before(jiffies, b->timeout)) {
+ mod_timer(&b->hangcheck, b->timeout);
+ return;
+ }
+
+ DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
+ set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+ mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
+
+ /* Ensure that even if the GPU hangs, we get woken up.
+ *
+ * However, note that if no one is waiting, we never notice
+ * a gpu hang. Eventually, we will have to wait for a resource
+ * held by the GPU and so trigger a hangcheck. In the most
+ * pathological case, this will be upon memory starvation! To
+ * prevent this, we also queue the hangcheck from the retire
+ * worker.
+ */
+ i915_queue_hangcheck(engine->i915);
+}
+
+static unsigned long wait_timeout(void)
+{
+ return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
+}
+
static void intel_breadcrumbs_fake_irq(unsigned long data)
{
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@@ -51,13 +85,6 @@ static void irq_enable(struct intel_engine_cs *engine)
*/
engine->breadcrumbs.irq_posted = true;
- /* Make sure the current hangcheck doesn't falsely accuse a just
- * started irq handler from missing an interrupt (because the
- * interrupt count still matches the stale value from when
- * the irq handler was disabled, many hangchecks ago).
- */
- engine->breadcrumbs.irq_wakeups++;
-
spin_lock_irq(&engine->i915->irq_lock);
engine->irq_enable(engine);
spin_unlock_irq(&engine->i915->irq_lock);
@@ -98,17 +125,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
}
if (!b->irq_enabled ||
- test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
+ test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
mod_timer(&b->fake_irq, jiffies + 1);
-
- /* Ensure that even if the GPU hangs, we get woken up.
- *
- * However, note that if no one is waiting, we never notice
- * a gpu hang. Eventually, we will have to wait for a resource
- * held by the GPU and so trigger a hangcheck. In the most
- * pathological case, this will be upon memory starvation!
- */
- i915_queue_hangcheck(i915);
+ } else {
+ /* Ensure we never sleep indefinitely */
+ GEM_BUG_ON(!time_after(b->timeout, jiffies));
+ mod_timer(&b->hangcheck, b->timeout);
+ }
}
static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
@@ -219,6 +242,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
GEM_BUG_ON(!next && !first);
if (next && next != &wait->node) {
GEM_BUG_ON(first);
+ b->timeout = wait_timeout();
b->first_wait = to_wait(next);
smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
/* As there is a delay between reading the current
@@ -245,6 +269,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
if (first) {
GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
+ b->timeout = wait_timeout();
b->first_wait = wait;
smp_store_mb(b->irq_seqno_bh, wait->tsk);
/* After assigning ourselves as the new bottom-half, we must
@@ -277,11 +302,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
return first;
}
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
-{
- mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
-}
-
static inline bool chain_wakeup(struct rb_node *rb, int priority)
{
return rb && to_wait(rb)->tsk->prio <= priority;
@@ -359,6 +379,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
* the interrupt, or if we have to handle an
* exception rather than a seqno completion.
*/
+ b->timeout = wait_timeout();
b->first_wait = to_wait(next);
smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
if (b->first_wait->seqno != wait->seqno)
@@ -536,6 +557,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
setup_timer(&b->fake_irq,
intel_breadcrumbs_fake_irq,
(unsigned long)engine);
+ setup_timer(&b->hangcheck,
+ intel_breadcrumbs_hangcheck,
+ (unsigned long)engine);
/* Spawn a thread to provide a common bottom-half for all signals.
* As this is an asynchronous interface we cannot steal the current
@@ -560,6 +584,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
if (!IS_ERR_OR_NULL(b->signaler))
kthread_stop(b->signaler);
+ del_timer_sync(&b->hangcheck);
del_timer_sync(&b->fake_irq);
}