drm/i915: Replace global_seqno with a hangcheck heartbeat seqno

To determine whether an engine has 'stuck', we simply check whether or not is still on the same seqno for several seconds. To keep this simple mechanism intact over the loss of a global seqno, we can simply add a new global heartbeat seqno instead. As we cannot know the sequence in which requests will then be completed, we use a primitive random number generator instead (with a cycle long enough to not matter over an interval of a few thousand requests between hangcheck samples). The alternative to using a dedicated seqno on every request is to issue a heartbeat request and query its progress through the system. Sadly this requires us to reduce struct_mutex so that we can issue requests without requiring that bkl. v2: And without the extra CS_STALL for the hangcheck seqno -- we don't need strict serialisation with what comes later, we just need to be sure we don't write the hangcheck seqno before our batch is flushed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190226094922.31617-1-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2019-02-26 09:49:19 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2019-02-26 09:55:31 +0000
commit: 89531e7d8ee8602b2723431a581250d5d0ec2913 (patch)
tree: fccf27612b7590e765eca61cdd8e13b38f57326d /drivers/gpu/drm/i915/intel_ringbuffer.h
parent: 37fc7845df7b6ab9b2885b42e8de8ebcf33bee7a (diff)
1 files changed, 18 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5d45ad4ecca9..2869aaa9d225 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -6,6 +6,7 @@
 
 #include <linux/hashtable.h>
 #include <linux/irq_work.h>
+#include <linux/random.h>
 #include <linux/seqlock.h>
 
 #include "i915_gem_batch_pool.h"
@@ -119,7 +120,8 @@ struct intel_instdone {
 
 struct intel_engine_hangcheck {
 	u64 acthd;
-	u32 seqno;
+	u32 last_seqno;
+	u32 next_seqno;
 	unsigned long action_timestamp;
 	struct intel_instdone instdone;
 };
@@ -726,6 +728,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32))
 #define I915_GEM_HWS_PREEMPT		0x32
 #define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
+#define I915_GEM_HWS_HANGCHECK		0x34
+#define I915_GEM_HWS_HANGCHECK_ADDR	(I915_GEM_HWS_HANGCHECK * sizeof(u32))
 #define I915_GEM_HWS_SEQNO		0x40
 #define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
 #define I915_GEM_HWS_SCRATCH		0x80
@@ -1086,4 +1090,17 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
 
 #endif
 
+static inline u32
+intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+	return engine->hangcheck.next_seqno =
+		next_pseudo_random32(engine->hangcheck.next_seqno);
+}
+
+static inline u32
+intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+	return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
+}
+
 #endif /* _INTEL_RINGBUFFER_H_ */
author	Chris Wilson <chris@chris-wilson.co.uk>	2019-02-26 09:49:19 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2019-02-26 09:55:31 +0000
commit	89531e7d8ee8602b2723431a581250d5d0ec2913 (patch)
tree	fccf27612b7590e765eca61cdd8e13b38f57326d /drivers/gpu/drm/i915/intel_ringbuffer.h
parent	37fc7845df7b6ab9b2885b42e8de8ebcf33bee7a (diff)