1 files changed, 131 insertions, 83 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ec0b4a0c605d..8cb2078c5bfc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -4,6 +4,7 @@
 #include <linux/hashtable.h>
 #include "i915_gem_batch_pool.h"
 #include "i915_gem_request.h"
+#include "i915_gem_timeline.h"
 
 #define I915_CMD_HASH_ORDER 9
 
@@ -64,22 +65,73 @@ struct intel_hw_status_page {
 	 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
 
 enum intel_engine_hangcheck_action {
-	HANGCHECK_IDLE = 0,
-	HANGCHECK_WAIT,
-	HANGCHECK_ACTIVE,
-	HANGCHECK_KICK,
-	HANGCHECK_HUNG,
+	ENGINE_IDLE = 0,
+	ENGINE_WAIT,
+	ENGINE_ACTIVE_SEQNO,
+	ENGINE_ACTIVE_HEAD,
+	ENGINE_ACTIVE_SUBUNITS,
+	ENGINE_WAIT_KICK,
+	ENGINE_DEAD,
 };
 
-#define HANGCHECK_SCORE_RING_HUNG 31
+static inline const char *
+hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
+{
+	switch (a) {
+	case ENGINE_IDLE:
+		return "idle";
+	case ENGINE_WAIT:
+		return "wait";
+	case ENGINE_ACTIVE_SEQNO:
+		return "active seqno";
+	case ENGINE_ACTIVE_HEAD:
+		return "active head";
+	case ENGINE_ACTIVE_SUBUNITS:
+		return "active subunits";
+	case ENGINE_WAIT_KICK:
+		return "wait kick";
+	case ENGINE_DEAD:
+		return "dead";
+	}
+
+	return "unknown";
+}
+
+#define I915_MAX_SLICES	3
+#define I915_MAX_SUBSLICES 3
+
+#define instdone_slice_mask(dev_priv__) \
+	(INTEL_GEN(dev_priv__) == 7 ? \
+	 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask)
+
+#define instdone_subslice_mask(dev_priv__) \
+	(INTEL_GEN(dev_priv__) == 7 ? \
+	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
+
+#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
+	for ((slice__) = 0, (subslice__) = 0; \
+	     (slice__) < I915_MAX_SLICES; \
+	     (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
+	       (slice__) += ((subslice__) == 0)) \
+		for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
+			    (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+
+struct intel_instdone {
+	u32 instdone;
+	/* The following exist only in the RCS engine */
+	u32 slice_common;
+	u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+};
 
 struct intel_engine_hangcheck {
 	u64 acthd;
 	u32 seqno;
-	int score;
 	enum intel_engine_hangcheck_action action;
+	unsigned long action_timestamp;
 	int deadlock;
-	u32 instdone[I915_NUM_INSTDONE_REG];
+	struct intel_instdone instdone;
+	bool stalled;
 };
 
 struct intel_ring {
@@ -130,6 +182,7 @@ struct i915_ctx_workarounds {
 };
 
 struct drm_i915_gem_request;
+struct intel_render_state;
 
 struct intel_engine_cs {
 	struct drm_i915_private *i915;
@@ -141,7 +194,6 @@ struct intel_engine_cs {
 		VCS2,	/* Keep instances of the same type engine together. */
 		VECS
 	} id;
-#define I915_NUM_ENGINES 5
 #define _VCS(n) (VCS + (n))
 	unsigned int exec_id;
 	enum intel_engine_hw_id {
@@ -152,10 +204,12 @@ struct intel_engine_cs {
 		VCS2_HW
 	} hw_id;
 	enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */
-	u64 fence_context;
 	u32		mmio_base;
 	unsigned int irq_shift;
 	struct intel_ring *buffer;
+	struct intel_timeline *timeline;
+
+	struct intel_render_state *render_state;
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
@@ -177,7 +231,7 @@ struct intel_engine_cs {
 		struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
 		bool irq_posted;
 
-		spinlock_t lock; /* protects the lists of requests */
+		spinlock_t lock; /* protects the lists of requests; irqsafe */
 		struct rb_root waiters; /* sorted by retirement, priority */
 		struct rb_root signals; /* sorted by retirement */
 		struct intel_wait *first_wait; /* oldest waiter by retirement */
@@ -212,6 +266,11 @@ struct intel_engine_cs {
 	void		(*reset_hw)(struct intel_engine_cs *engine,
 				    struct drm_i915_gem_request *req);
 
+	int		(*context_pin)(struct intel_engine_cs *engine,
+				       struct i915_gem_context *ctx);
+	void		(*context_unpin)(struct intel_engine_cs *engine,
+					 struct i915_gem_context *ctx);
+	int		(*request_alloc)(struct drm_i915_gem_request *req);
 	int		(*init_context)(struct drm_i915_gem_request *req);
 
 	int		(*emit_flush)(struct drm_i915_gem_request *request,
@@ -225,7 +284,9 @@ struct intel_engine_cs {
 #define I915_DISPATCH_SECURE BIT(0)
 #define I915_DISPATCH_PINNED BIT(1)
 #define I915_DISPATCH_RS     BIT(2)
-	int		(*emit_request)(struct drm_i915_gem_request *req);
+	void		(*emit_breadcrumb)(struct drm_i915_gem_request *req,
+					   u32 *out);
+	int		emit_breadcrumb_sz;
 
 	/* Pass the request to the hardware queue (e.g. directly into
 	 * the legacy ringbuffer or to the end of an execlist).
@@ -235,6 +296,15 @@ struct intel_engine_cs {
 	 */
 	void		(*submit_request)(struct drm_i915_gem_request *req);
 
+	/* Call when the priority on a request has changed and it and its
+	 * dependencies may need rescheduling. Note the request itself may
+	 * not be ready to run!
+	 *
+	 * Called under the struct_mutex.
+	 */
+	void		(*schedule)(struct drm_i915_gem_request *request,
+				    int priority);
+
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
@@ -282,8 +352,6 @@ struct intel_engine_cs {
 	 *  ie. transpose of f(x, y)
 	 */
 	struct {
-		u32	sync_seqno[I915_NUM_ENGINES-1];
-
 		union {
 #define GEN6_SEMAPHORE_LAST	VECS_HW
 #define GEN6_NUM_SEMAPHORES	(GEN6_SEMAPHORE_LAST + 1)
@@ -300,44 +368,43 @@ struct intel_engine_cs {
 		/* AKA wait() */
 		int	(*sync_to)(struct drm_i915_gem_request *req,
 				   struct drm_i915_gem_request *signal);
-		int	(*signal)(struct drm_i915_gem_request *req);
+		u32	*(*signal)(struct drm_i915_gem_request *req, u32 *out);
 	} semaphore;
 
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
-	spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
 	struct execlist_port {
 		struct drm_i915_gem_request *request;
 		unsigned int count;
 	} execlist_port[2];
-	struct list_head execlist_queue;
+	struct rb_root execlist_queue;
+	struct rb_node *execlist_first;
 	unsigned int fw_domains;
 	bool disable_lite_restore_wa;
 	bool preempt_wa;
 	u32 ctx_desc_template;
 
-	/**
-	 * List of breadcrumbs associated with GPU requests currently
-	 * outstanding.
+	/* Contexts are pinned whilst they are active on the GPU. The last
+	 * context executed remains active whilst the GPU is idle - the
+	 * switch away and write to the context object only occurs on the
+	 * next execution.  Contexts are only unpinned on retirement of the
+	 * following request ensuring that we can always write to the object
+	 * on the context switch even after idling. Across suspend, we switch
+	 * to the kernel context and trash it as the save may not happen
+	 * before the hardware is powered down.
 	 */
-	struct list_head request_list;
-
-	/**
-	 * Seqno of request most recently submitted to request_list.
-	 * Used exclusively by hang checker to avoid grabbing lock while
-	 * inspecting request list.
-	 */
-	u32 last_submitted_seqno;
-	u32 last_pending_seqno;
+	struct i915_gem_context *last_retired_context;
 
-	/* An RCU guarded pointer to the last request. No reference is
-	 * held to the request, users must carefully acquire a reference to
-	 * the request using i915_gem_active_get_rcu(), or hold the
-	 * struct_mutex.
+	/* We track the current MI_SET_CONTEXT in order to eliminate
+	 * redudant context switches. This presumes that requests are not
+	 * reordered! Or when they are the tracking is updated along with
+	 * the emission of individual requests into the legacy command
+	 * stream (ring).
 	 */
-	struct i915_gem_active last_request;
+	struct i915_gem_context *legacy_active_context;
 
-	struct i915_gem_context *last_context;
+	/* status_notifier: list of callbacks for context-switch changes */
+	struct atomic_notifier_head context_status_notifier;
 
 	struct intel_engine_hangcheck hangcheck;
 
@@ -368,39 +435,12 @@ struct intel_engine_cs {
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
 };
 
-static inline bool
-intel_engine_initialized(const struct intel_engine_cs *engine)
-{
-	return engine->i915 != NULL;
-}
-
 static inline unsigned
 intel_engine_flag(const struct intel_engine_cs *engine)
 {
 	return 1 << engine->id;
 }
 
-static inline u32
-intel_engine_sync_index(struct intel_engine_cs *engine,
-			struct intel_engine_cs *other)
-{
-	int idx;
-
-	/*
-	 * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
-	 * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
-	 * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
-	 * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
-	 * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
-	 */
-
-	idx = (other - engine) - 1;
-	if (idx < 0)
-		idx += I915_NUM_ENGINES;
-
-	return idx;
-}
-
 static inline void
 intel_flush_status_page(struct intel_engine_cs *engine, int reg)
 {
@@ -446,7 +486,7 @@ intel_write_status_page(struct intel_engine_cs *engine,
 
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring);
+int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
@@ -455,8 +495,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine);
 
 void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 
-int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
-
 int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
 int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
 
@@ -483,30 +521,29 @@ static inline void intel_ring_advance(struct intel_ring *ring)
 	 */
 }
 
-static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
+static inline u32
+intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+	return pos & (ring->size - 1);
+}
+
+static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr)
 {
 	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
-	return value & (ring->size - 1);
+	u32 offset = addr - ring->vaddr;
+	return intel_ring_wrap(ring, offset);
 }
 
 int __intel_ring_space(int head, int tail, int size);
 void intel_ring_update_space(struct intel_ring *ring);
 
-void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
-static inline int intel_engine_idle(struct intel_engine_cs *engine,
-				    unsigned int flags)
-{
-	/* Wait upon the last request to be completed */
-	return i915_gem_active_wait_unlocked(&engine->last_request,
-					     flags, NULL, NULL);
-}
-
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
@@ -514,13 +551,30 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
 
 u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
+u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine);
+
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
 {
 	return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
 }
 
+static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
+{
+	/* We are only peeking at the tail of the submit queue (and not the
+	 * queue itself) in order to gain a hint as to the current active
+	 * state of the engine. Callers are not expected to be taking
+	 * engine->timeline->lock, nor are they expected to be concerned
+	 * wtih serialising this hint with anything, so document it as
+	 * a hint and nothing more.
+	 */
+	return READ_ONCE(engine->timeline->last_submitted_seqno);
+}
+
 int init_workarounds_ring(struct intel_engine_cs *engine);
 
+void intel_engine_get_instdone(struct intel_engine_cs *engine,
+			       struct intel_instdone *instdone);
+
 /*
  * Arbitrary size for largest possible 'add request' sequence. The code paths
  * are complex and variable. Empirical measurement shows that the worst case
@@ -586,12 +640,6 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
 
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-unsigned int intel_kick_waiters(struct drm_i915_private *i915);
-unsigned int intel_kick_signalers(struct drm_i915_private *i915);
-
-static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
-{
-	return i915_gem_active_isset(&engine->last_request);
-}
+unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
 
 #endif /* _INTEL_RINGBUFFER_H_ */