diff options
author | Zhenyu Wang <zhenyuw@linux.intel.com> | 2019-04-16 16:50:34 +0800 |
---|---|---|
committer | Zhenyu Wang <zhenyuw@linux.intel.com> | 2019-04-16 16:50:34 +0800 |
commit | 95d002e0a34cb0f238abb39987f9980f325d8332 (patch) | |
tree | b8ed70572a55b4e67410f906159f86e8aabb0f6a /drivers/gpu/drm/i915/intel_hangcheck.c | |
parent | d57b39e3ee3cdb4b00452090e386d197980cefc9 (diff) | |
parent | 28d618e9ab86f26a31af0b235ced55beb3e343c8 (diff) |
Merge tag 'drm-intel-next-2019-04-04' into gvt-next
Merge back drm-intel-next for engine name definition refinement
and 54939ea0bd85 ("drm/i915: Switch to use HWS indices rather than addresses")
that would need gvt fixes to depend on.
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_hangcheck.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_hangcheck.c | 233 |
1 files changed, 43 insertions, 190 deletions
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index e26d05a46451..3d51ed1428d4 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -23,144 +23,18 @@ */ #include "i915_drv.h" +#include "i915_reset.h" -static bool -ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) -{ - ipehr &= ~MI_SEMAPHORE_SYNC_MASK; - return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | - MI_SEMAPHORE_REGISTER); -} - -static struct intel_engine_cs * -semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, - u64 offset) -{ - struct drm_i915_private *dev_priv = engine->i915; - u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK; - struct intel_engine_cs *signaller; - enum intel_engine_id id; - - for_each_engine(signaller, dev_priv, id) { - if (engine == signaller) - continue; - - if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) - return signaller; - } - - DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n", - engine->name, ipehr); - - return ERR_PTR(-ENODEV); -} - -static struct intel_engine_cs * -semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) -{ - struct drm_i915_private *dev_priv = engine->i915; - void __iomem *vaddr; - u32 cmd, ipehr, head; - u64 offset = 0; - int i, backwards; - - /* - * This function does not support execlist mode - any attempt to - * proceed further into this function will result in a kernel panic - * when dereferencing ring->buffer, which is not set up in execlist - * mode. - * - * The correct way of doing it would be to derive the currently - * executing ring buffer from the current context, which is derived - * from the currently running request. Unfortunately, to get the - * current request we would have to grab the struct_mutex before doing - * anything else, which would be ill-advised since some other thread - * might have grabbed it already and managed to hang itself, causing - * the hang checker to deadlock. - * - * Therefore, this function does not support execlist mode in its - * current form. Just return NULL and move on. - */ - if (engine->buffer == NULL) - return NULL; - - ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); - if (!ipehr_is_semaphore_wait(engine, ipehr)) - return NULL; - - /* - * HEAD is likely pointing to the dword after the actual command, - * so scan backwards until we find the MBOX. But limit it to just 3 - * or 4 dwords depending on the semaphore wait command size. - * Note that we don't care about ACTHD here since that might - * point at at batch, and semaphores are always emitted into the - * ringbuffer itself. - */ - head = I915_READ_HEAD(engine) & HEAD_ADDR; - backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; - vaddr = (void __iomem *)engine->buffer->vaddr; - - for (i = backwards; i; --i) { - /* - * Be paranoid and presume the hw has gone off into the wild - - * our ring is smaller than what the hardware (and hence - * HEAD_ADDR) allows. Also handles wrap-around. - */ - head &= engine->buffer->size - 1; - - /* This here seems to blow up */ - cmd = ioread32(vaddr + head); - if (cmd == ipehr) - break; - - head -= 4; - } - - if (!i) - return NULL; - - *seqno = ioread32(vaddr + head + 4) + 1; - return semaphore_wait_to_signaller_ring(engine, ipehr, offset); -} - -static int semaphore_passed(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_engine_cs *signaller; +struct hangcheck { + u64 acthd; u32 seqno; - - engine->hangcheck.deadlock++; - - signaller = semaphore_waits_for(engine, &seqno); - if (signaller == NULL) - return -1; - - if (IS_ERR(signaller)) - return 0; - - /* Prevent pathological recursion due to driver bugs */ - if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) - return -1; - - if (intel_engine_signaled(signaller, seqno)) - return 1; - - /* cursory check for an unkickable deadlock */ - if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE && - semaphore_passed(signaller) < 0) - return -1; - - return 0; -} - -static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) - engine->hangcheck.deadlock = 0; -} + enum intel_engine_hangcheck_action action; + unsigned long action_timestamp; + int deadlock; + struct intel_instdone instdone; + bool wedged:1; + bool stalled:1; +}; static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) { @@ -182,7 +56,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine) int slice; int subslice; - if (engine->id != RCS) + if (engine->id != RCS0) return true; intel_engine_get_instdone(engine, &instdone); @@ -236,7 +110,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) if (ha != ENGINE_DEAD) return ha; - if (IS_GEN2(dev_priv)) + if (IS_GEN(dev_priv, 2)) return ENGINE_DEAD; /* Is the chip hanging on a WAIT_FOR_EVENT? @@ -244,64 +118,36 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) * and break the hang. This should work on * all but the second generation chipsets. */ - tmp = I915_READ_CTL(engine); + tmp = ENGINE_READ(engine, RING_CTL); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, BIT(engine->id), 0, + i915_handle_error(dev_priv, engine->mask, 0, "stuck wait on %s", engine->name); - I915_WRITE_CTL(engine, tmp); + ENGINE_WRITE(engine, RING_CTL, tmp); return ENGINE_WAIT_KICK; } - if (IS_GEN(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) { - switch (semaphore_passed(engine)) { - default: - return ENGINE_DEAD; - case 1: - i915_handle_error(dev_priv, ALL_ENGINES, 0, - "stuck semaphore on %s", - engine->name); - I915_WRITE_CTL(engine, tmp); - return ENGINE_WAIT_KICK; - case 0: - return ENGINE_WAIT; - } - } - return ENGINE_DEAD; } static void hangcheck_load_sample(struct intel_engine_cs *engine, - struct intel_engine_hangcheck *hc) + struct hangcheck *hc) { - /* We don't strictly need an irq-barrier here, as we are not - * serving an interrupt request, be paranoid in case the - * barrier has side-effects (such as preventing a broken - * cacheline snoop) and so be sure that we can see the seqno - * advance. If the seqno should stick, due to a stale - * cacheline, we would erroneously declare the GPU hung. - */ - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - hc->acthd = intel_engine_get_active_head(engine); - hc->seqno = intel_engine_get_seqno(engine); + hc->seqno = intel_engine_get_hangcheck_seqno(engine); } static void hangcheck_store_sample(struct intel_engine_cs *engine, - const struct intel_engine_hangcheck *hc) + const struct hangcheck *hc) { engine->hangcheck.acthd = hc->acthd; - engine->hangcheck.seqno = hc->seqno; - engine->hangcheck.action = hc->action; - engine->hangcheck.stalled = hc->stalled; - engine->hangcheck.wedged = hc->wedged; + engine->hangcheck.last_seqno = hc->seqno; } static enum intel_engine_hangcheck_action hangcheck_get_action(struct intel_engine_cs *engine, - const struct intel_engine_hangcheck *hc) + const struct hangcheck *hc) { - if (engine->hangcheck.seqno != hc->seqno) + if (engine->hangcheck.last_seqno != hc->seqno) return ENGINE_ACTIVE_SEQNO; if (intel_engine_is_idle(engine)) @@ -311,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine, } static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, - struct intel_engine_hangcheck *hc) + struct hangcheck *hc) { unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; @@ -357,10 +203,6 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, break; case ENGINE_DEAD: - if (GEM_SHOW_DEBUG()) { - struct drm_printer p = drm_debug_printer("hangcheck"); - intel_engine_dump(engine, &p, "%s\n", engine->name); - } break; default: @@ -379,8 +221,8 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, unsigned int stuck) { struct intel_engine_cs *engine; + intel_engine_mask_t tmp; char msg[80]; - unsigned int tmp; int len; /* If some rings hung but others were still busy, only @@ -421,32 +263,43 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (!READ_ONCE(dev_priv->gt.awake)) return; - if (i915_terminally_wedged(&dev_priv->gpu_error)) + if (i915_terminally_wedged(dev_priv)) return; /* As enabling the GPU requires fairly extensive mmio access, * periodically arm the mmio checker to see if we are triggering * any invalid access. */ - intel_uncore_arm_unclaimed_mmio_detection(dev_priv); + intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore); for_each_engine(engine, dev_priv, id) { - struct intel_engine_hangcheck hc; + struct hangcheck hc; - semaphore_clear_deadlocks(dev_priv); + intel_engine_signal_breadcrumbs(engine); hangcheck_load_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc); hangcheck_store_sample(engine, &hc); - if (engine->hangcheck.stalled) { - hung |= intel_engine_flag(engine); + if (hc.stalled) { + hung |= engine->mask; if (hc.action != ENGINE_DEAD) - stuck |= intel_engine_flag(engine); + stuck |= engine->mask; } - if (engine->hangcheck.wedged) - wedged |= intel_engine_flag(engine); + if (hc.wedged) + wedged |= engine->mask; + } + + if (GEM_SHOW_DEBUG() && (hung | stuck)) { + struct drm_printer p = drm_debug_printer("hangcheck"); + + for_each_engine(engine, dev_priv, id) { + if (intel_engine_is_idle(engine)) + continue; + + intel_engine_dump(engine, &p, "%s\n", engine->name); + } } if (wedged) { |