summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_hangcheck.c
diff options
context:
space:
mode:
authorZhenyu Wang <zhenyuw@linux.intel.com>2019-04-16 16:50:34 +0800
committerZhenyu Wang <zhenyuw@linux.intel.com>2019-04-16 16:50:34 +0800
commit95d002e0a34cb0f238abb39987f9980f325d8332 (patch)
treeb8ed70572a55b4e67410f906159f86e8aabb0f6a /drivers/gpu/drm/i915/intel_hangcheck.c
parentd57b39e3ee3cdb4b00452090e386d197980cefc9 (diff)
parent28d618e9ab86f26a31af0b235ced55beb3e343c8 (diff)
Merge tag 'drm-intel-next-2019-04-04' into gvt-next
Merge back drm-intel-next for engine name definition refinement and 54939ea0bd85 ("drm/i915: Switch to use HWS indices rather than addresses") that would need gvt fixes to depend on. Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_hangcheck.c')
-rw-r--r--drivers/gpu/drm/i915/intel_hangcheck.c233
1 files changed, 43 insertions, 190 deletions
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index e26d05a46451..3d51ed1428d4 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -23,144 +23,18 @@
*/
#include "i915_drv.h"
+#include "i915_reset.h"
-static bool
-ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
-{
- ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
- return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
- MI_SEMAPHORE_REGISTER);
-}
-
-static struct intel_engine_cs *
-semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
- u64 offset)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
- struct intel_engine_cs *signaller;
- enum intel_engine_id id;
-
- for_each_engine(signaller, dev_priv, id) {
- if (engine == signaller)
- continue;
-
- if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
- return signaller;
- }
-
- DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
- engine->name, ipehr);
-
- return ERR_PTR(-ENODEV);
-}
-
-static struct intel_engine_cs *
-semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- void __iomem *vaddr;
- u32 cmd, ipehr, head;
- u64 offset = 0;
- int i, backwards;
-
- /*
- * This function does not support execlist mode - any attempt to
- * proceed further into this function will result in a kernel panic
- * when dereferencing ring->buffer, which is not set up in execlist
- * mode.
- *
- * The correct way of doing it would be to derive the currently
- * executing ring buffer from the current context, which is derived
- * from the currently running request. Unfortunately, to get the
- * current request we would have to grab the struct_mutex before doing
- * anything else, which would be ill-advised since some other thread
- * might have grabbed it already and managed to hang itself, causing
- * the hang checker to deadlock.
- *
- * Therefore, this function does not support execlist mode in its
- * current form. Just return NULL and move on.
- */
- if (engine->buffer == NULL)
- return NULL;
-
- ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
- if (!ipehr_is_semaphore_wait(engine, ipehr))
- return NULL;
-
- /*
- * HEAD is likely pointing to the dword after the actual command,
- * so scan backwards until we find the MBOX. But limit it to just 3
- * or 4 dwords depending on the semaphore wait command size.
- * Note that we don't care about ACTHD here since that might
- * point at at batch, and semaphores are always emitted into the
- * ringbuffer itself.
- */
- head = I915_READ_HEAD(engine) & HEAD_ADDR;
- backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4;
- vaddr = (void __iomem *)engine->buffer->vaddr;
-
- for (i = backwards; i; --i) {
- /*
- * Be paranoid and presume the hw has gone off into the wild -
- * our ring is smaller than what the hardware (and hence
- * HEAD_ADDR) allows. Also handles wrap-around.
- */
- head &= engine->buffer->size - 1;
-
- /* This here seems to blow up */
- cmd = ioread32(vaddr + head);
- if (cmd == ipehr)
- break;
-
- head -= 4;
- }
-
- if (!i)
- return NULL;
-
- *seqno = ioread32(vaddr + head + 4) + 1;
- return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
-}
-
-static int semaphore_passed(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct intel_engine_cs *signaller;
+struct hangcheck {
+ u64 acthd;
u32 seqno;
-
- engine->hangcheck.deadlock++;
-
- signaller = semaphore_waits_for(engine, &seqno);
- if (signaller == NULL)
- return -1;
-
- if (IS_ERR(signaller))
- return 0;
-
- /* Prevent pathological recursion due to driver bugs */
- if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
- return -1;
-
- if (intel_engine_signaled(signaller, seqno))
- return 1;
-
- /* cursory check for an unkickable deadlock */
- if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE &&
- semaphore_passed(signaller) < 0)
- return -1;
-
- return 0;
-}
-
-static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, dev_priv, id)
- engine->hangcheck.deadlock = 0;
-}
+ enum intel_engine_hangcheck_action action;
+ unsigned long action_timestamp;
+ int deadlock;
+ struct intel_instdone instdone;
+ bool wedged:1;
+ bool stalled:1;
+};
static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
{
@@ -182,7 +56,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine)
int slice;
int subslice;
- if (engine->id != RCS)
+ if (engine->id != RCS0)
return true;
intel_engine_get_instdone(engine, &instdone);
@@ -236,7 +110,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
if (ha != ENGINE_DEAD)
return ha;
- if (IS_GEN2(dev_priv))
+ if (IS_GEN(dev_priv, 2))
return ENGINE_DEAD;
/* Is the chip hanging on a WAIT_FOR_EVENT?
@@ -244,64 +118,36 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
* and break the hang. This should work on
* all but the second generation chipsets.
*/
- tmp = I915_READ_CTL(engine);
+ tmp = ENGINE_READ(engine, RING_CTL);
if (tmp & RING_WAIT) {
- i915_handle_error(dev_priv, BIT(engine->id), 0,
+ i915_handle_error(dev_priv, engine->mask, 0,
"stuck wait on %s", engine->name);
- I915_WRITE_CTL(engine, tmp);
+ ENGINE_WRITE(engine, RING_CTL, tmp);
return ENGINE_WAIT_KICK;
}
- if (IS_GEN(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
- switch (semaphore_passed(engine)) {
- default:
- return ENGINE_DEAD;
- case 1:
- i915_handle_error(dev_priv, ALL_ENGINES, 0,
- "stuck semaphore on %s",
- engine->name);
- I915_WRITE_CTL(engine, tmp);
- return ENGINE_WAIT_KICK;
- case 0:
- return ENGINE_WAIT;
- }
- }
-
return ENGINE_DEAD;
}
static void hangcheck_load_sample(struct intel_engine_cs *engine,
- struct intel_engine_hangcheck *hc)
+ struct hangcheck *hc)
{
- /* We don't strictly need an irq-barrier here, as we are not
- * serving an interrupt request, be paranoid in case the
- * barrier has side-effects (such as preventing a broken
- * cacheline snoop) and so be sure that we can see the seqno
- * advance. If the seqno should stick, due to a stale
- * cacheline, we would erroneously declare the GPU hung.
- */
- if (engine->irq_seqno_barrier)
- engine->irq_seqno_barrier(engine);
-
hc->acthd = intel_engine_get_active_head(engine);
- hc->seqno = intel_engine_get_seqno(engine);
+ hc->seqno = intel_engine_get_hangcheck_seqno(engine);
}
static void hangcheck_store_sample(struct intel_engine_cs *engine,
- const struct intel_engine_hangcheck *hc)
+ const struct hangcheck *hc)
{
engine->hangcheck.acthd = hc->acthd;
- engine->hangcheck.seqno = hc->seqno;
- engine->hangcheck.action = hc->action;
- engine->hangcheck.stalled = hc->stalled;
- engine->hangcheck.wedged = hc->wedged;
+ engine->hangcheck.last_seqno = hc->seqno;
}
static enum intel_engine_hangcheck_action
hangcheck_get_action(struct intel_engine_cs *engine,
- const struct intel_engine_hangcheck *hc)
+ const struct hangcheck *hc)
{
- if (engine->hangcheck.seqno != hc->seqno)
+ if (engine->hangcheck.last_seqno != hc->seqno)
return ENGINE_ACTIVE_SEQNO;
if (intel_engine_is_idle(engine))
@@ -311,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine,
}
static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
- struct intel_engine_hangcheck *hc)
+ struct hangcheck *hc)
{
unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
@@ -357,10 +203,6 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
break;
case ENGINE_DEAD:
- if (GEM_SHOW_DEBUG()) {
- struct drm_printer p = drm_debug_printer("hangcheck");
- intel_engine_dump(engine, &p, "%s\n", engine->name);
- }
break;
default:
@@ -379,8 +221,8 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915,
unsigned int stuck)
{
struct intel_engine_cs *engine;
+ intel_engine_mask_t tmp;
char msg[80];
- unsigned int tmp;
int len;
/* If some rings hung but others were still busy, only
@@ -421,32 +263,43 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
if (!READ_ONCE(dev_priv->gt.awake))
return;
- if (i915_terminally_wedged(&dev_priv->gpu_error))
+ if (i915_terminally_wedged(dev_priv))
return;
/* As enabling the GPU requires fairly extensive mmio access,
* periodically arm the mmio checker to see if we are triggering
* any invalid access.
*/
- intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
+ intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore);
for_each_engine(engine, dev_priv, id) {
- struct intel_engine_hangcheck hc;
+ struct hangcheck hc;
- semaphore_clear_deadlocks(dev_priv);
+ intel_engine_signal_breadcrumbs(engine);
hangcheck_load_sample(engine, &hc);
hangcheck_accumulate_sample(engine, &hc);
hangcheck_store_sample(engine, &hc);
- if (engine->hangcheck.stalled) {
- hung |= intel_engine_flag(engine);
+ if (hc.stalled) {
+ hung |= engine->mask;
if (hc.action != ENGINE_DEAD)
- stuck |= intel_engine_flag(engine);
+ stuck |= engine->mask;
}
- if (engine->hangcheck.wedged)
- wedged |= intel_engine_flag(engine);
+ if (hc.wedged)
+ wedged |= engine->mask;
+ }
+
+ if (GEM_SHOW_DEBUG() && (hung | stuck)) {
+ struct drm_printer p = drm_debug_printer("hangcheck");
+
+ for_each_engine(engine, dev_priv, id) {
+ if (intel_engine_is_idle(engine))
+ continue;
+
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ }
}
if (wedged) {