summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_ringbuffer.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h214
1 files changed, 131 insertions, 83 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ec0b4a0c605d..8cb2078c5bfc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -4,6 +4,7 @@
#include <linux/hashtable.h>
#include "i915_gem_batch_pool.h"
#include "i915_gem_request.h"
+#include "i915_gem_timeline.h"
#define I915_CMD_HASH_ORDER 9
@@ -64,22 +65,73 @@ struct intel_hw_status_page {
GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
enum intel_engine_hangcheck_action {
- HANGCHECK_IDLE = 0,
- HANGCHECK_WAIT,
- HANGCHECK_ACTIVE,
- HANGCHECK_KICK,
- HANGCHECK_HUNG,
+ ENGINE_IDLE = 0,
+ ENGINE_WAIT,
+ ENGINE_ACTIVE_SEQNO,
+ ENGINE_ACTIVE_HEAD,
+ ENGINE_ACTIVE_SUBUNITS,
+ ENGINE_WAIT_KICK,
+ ENGINE_DEAD,
};
-#define HANGCHECK_SCORE_RING_HUNG 31
+static inline const char *
+hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
+{
+ switch (a) {
+ case ENGINE_IDLE:
+ return "idle";
+ case ENGINE_WAIT:
+ return "wait";
+ case ENGINE_ACTIVE_SEQNO:
+ return "active seqno";
+ case ENGINE_ACTIVE_HEAD:
+ return "active head";
+ case ENGINE_ACTIVE_SUBUNITS:
+ return "active subunits";
+ case ENGINE_WAIT_KICK:
+ return "wait kick";
+ case ENGINE_DEAD:
+ return "dead";
+ }
+
+ return "unknown";
+}
+
+#define I915_MAX_SLICES 3
+#define I915_MAX_SUBSLICES 3
+
+#define instdone_slice_mask(dev_priv__) \
+ (INTEL_GEN(dev_priv__) == 7 ? \
+ 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask)
+
+#define instdone_subslice_mask(dev_priv__) \
+ (INTEL_GEN(dev_priv__) == 7 ? \
+ 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
+
+#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
+ for ((slice__) = 0, (subslice__) = 0; \
+ (slice__) < I915_MAX_SLICES; \
+ (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
+ (slice__) += ((subslice__) == 0)) \
+ for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
+ (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+
+struct intel_instdone {
+ u32 instdone;
+ /* The following exist only in the RCS engine */
+ u32 slice_common;
+ u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+ u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+};
struct intel_engine_hangcheck {
u64 acthd;
u32 seqno;
- int score;
enum intel_engine_hangcheck_action action;
+ unsigned long action_timestamp;
int deadlock;
- u32 instdone[I915_NUM_INSTDONE_REG];
+ struct intel_instdone instdone;
+ bool stalled;
};
struct intel_ring {
@@ -130,6 +182,7 @@ struct i915_ctx_workarounds {
};
struct drm_i915_gem_request;
+struct intel_render_state;
struct intel_engine_cs {
struct drm_i915_private *i915;
@@ -141,7 +194,6 @@ struct intel_engine_cs {
VCS2, /* Keep instances of the same type engine together. */
VECS
} id;
-#define I915_NUM_ENGINES 5
#define _VCS(n) (VCS + (n))
unsigned int exec_id;
enum intel_engine_hw_id {
@@ -152,10 +204,12 @@ struct intel_engine_cs {
VCS2_HW
} hw_id;
enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */
- u64 fence_context;
u32 mmio_base;
unsigned int irq_shift;
struct intel_ring *buffer;
+ struct intel_timeline *timeline;
+
+ struct intel_render_state *render_state;
/* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the
@@ -177,7 +231,7 @@ struct intel_engine_cs {
struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
bool irq_posted;
- spinlock_t lock; /* protects the lists of requests */
+ spinlock_t lock; /* protects the lists of requests; irqsafe */
struct rb_root waiters; /* sorted by retirement, priority */
struct rb_root signals; /* sorted by retirement */
struct intel_wait *first_wait; /* oldest waiter by retirement */
@@ -212,6 +266,11 @@ struct intel_engine_cs {
void (*reset_hw)(struct intel_engine_cs *engine,
struct drm_i915_gem_request *req);
+ int (*context_pin)(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx);
+ void (*context_unpin)(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx);
+ int (*request_alloc)(struct drm_i915_gem_request *req);
int (*init_context)(struct drm_i915_gem_request *req);
int (*emit_flush)(struct drm_i915_gem_request *request,
@@ -225,7 +284,9 @@ struct intel_engine_cs {
#define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1)
#define I915_DISPATCH_RS BIT(2)
- int (*emit_request)(struct drm_i915_gem_request *req);
+ void (*emit_breadcrumb)(struct drm_i915_gem_request *req,
+ u32 *out);
+ int emit_breadcrumb_sz;
/* Pass the request to the hardware queue (e.g. directly into
* the legacy ringbuffer or to the end of an execlist).
@@ -235,6 +296,15 @@ struct intel_engine_cs {
*/
void (*submit_request)(struct drm_i915_gem_request *req);
+ /* Call when the priority on a request has changed and it and its
+ * dependencies may need rescheduling. Note the request itself may
+ * not be ready to run!
+ *
+ * Called under the struct_mutex.
+ */
+ void (*schedule)(struct drm_i915_gem_request *request,
+ int priority);
+
/* Some chipsets are not quite as coherent as advertised and need
* an expensive kick to force a true read of the up-to-date seqno.
* However, the up-to-date seqno is not always required and the last
@@ -282,8 +352,6 @@ struct intel_engine_cs {
* ie. transpose of f(x, y)
*/
struct {
- u32 sync_seqno[I915_NUM_ENGINES-1];
-
union {
#define GEN6_SEMAPHORE_LAST VECS_HW
#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
@@ -300,44 +368,43 @@ struct intel_engine_cs {
/* AKA wait() */
int (*sync_to)(struct drm_i915_gem_request *req,
struct drm_i915_gem_request *signal);
- int (*signal)(struct drm_i915_gem_request *req);
+ u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out);
} semaphore;
/* Execlists */
struct tasklet_struct irq_tasklet;
- spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
struct execlist_port {
struct drm_i915_gem_request *request;
unsigned int count;
} execlist_port[2];
- struct list_head execlist_queue;
+ struct rb_root execlist_queue;
+ struct rb_node *execlist_first;
unsigned int fw_domains;
bool disable_lite_restore_wa;
bool preempt_wa;
u32 ctx_desc_template;
- /**
- * List of breadcrumbs associated with GPU requests currently
- * outstanding.
+ /* Contexts are pinned whilst they are active on the GPU. The last
+ * context executed remains active whilst the GPU is idle - the
+ * switch away and write to the context object only occurs on the
+ * next execution. Contexts are only unpinned on retirement of the
+ * following request ensuring that we can always write to the object
+ * on the context switch even after idling. Across suspend, we switch
+ * to the kernel context and trash it as the save may not happen
+ * before the hardware is powered down.
*/
- struct list_head request_list;
-
- /**
- * Seqno of request most recently submitted to request_list.
- * Used exclusively by hang checker to avoid grabbing lock while
- * inspecting request list.
- */
- u32 last_submitted_seqno;
- u32 last_pending_seqno;
+ struct i915_gem_context *last_retired_context;
- /* An RCU guarded pointer to the last request. No reference is
- * held to the request, users must carefully acquire a reference to
- * the request using i915_gem_active_get_rcu(), or hold the
- * struct_mutex.
+ /* We track the current MI_SET_CONTEXT in order to eliminate
+ * redudant context switches. This presumes that requests are not
+ * reordered! Or when they are the tracking is updated along with
+ * the emission of individual requests into the legacy command
+ * stream (ring).
*/
- struct i915_gem_active last_request;
+ struct i915_gem_context *legacy_active_context;
- struct i915_gem_context *last_context;
+ /* status_notifier: list of callbacks for context-switch changes */
+ struct atomic_notifier_head context_status_notifier;
struct intel_engine_hangcheck hangcheck;
@@ -368,39 +435,12 @@ struct intel_engine_cs {
u32 (*get_cmd_length_mask)(u32 cmd_header);
};
-static inline bool
-intel_engine_initialized(const struct intel_engine_cs *engine)
-{
- return engine->i915 != NULL;
-}
-
static inline unsigned
intel_engine_flag(const struct intel_engine_cs *engine)
{
return 1 << engine->id;
}
-static inline u32
-intel_engine_sync_index(struct intel_engine_cs *engine,
- struct intel_engine_cs *other)
-{
- int idx;
-
- /*
- * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
- * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
- * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
- * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
- * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
- */
-
- idx = (other - engine) - 1;
- if (idx < 0)
- idx += I915_NUM_ENGINES;
-
- return idx;
-}
-
static inline void
intel_flush_status_page(struct intel_engine_cs *engine, int reg)
{
@@ -446,7 +486,7 @@ intel_write_status_page(struct intel_engine_cs *engine,
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring);
+int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_free(struct intel_ring *ring);
@@ -455,8 +495,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine);
void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
-int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
-
int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
@@ -483,30 +521,29 @@ static inline void intel_ring_advance(struct intel_ring *ring)
*/
}
-static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
+static inline u32
+intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+ return pos & (ring->size - 1);
+}
+
+static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr)
{
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
- return value & (ring->size - 1);
+ u32 offset = addr - ring->vaddr;
+ return intel_ring_wrap(ring, offset);
}
int __intel_ring_space(int head, int tail, int size);
void intel_ring_update_space(struct intel_ring *ring);
-void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
void intel_engine_setup_common(struct intel_engine_cs *engine);
int intel_engine_init_common(struct intel_engine_cs *engine);
int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
-static inline int intel_engine_idle(struct intel_engine_cs *engine,
- unsigned int flags)
-{
- /* Wait upon the last request to be completed */
- return i915_gem_active_wait_unlocked(&engine->last_request,
- flags, NULL, NULL);
-}
-
int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
@@ -514,13 +551,30 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
+u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine);
+
static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
{
return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
}
+static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
+{
+ /* We are only peeking at the tail of the submit queue (and not the
+ * queue itself) in order to gain a hint as to the current active
+ * state of the engine. Callers are not expected to be taking
+ * engine->timeline->lock, nor are they expected to be concerned
+ * wtih serialising this hint with anything, so document it as
+ * a hint and nothing more.
+ */
+ return READ_ONCE(engine->timeline->last_submitted_seqno);
+}
+
int init_workarounds_ring(struct intel_engine_cs *engine);
+void intel_engine_get_instdone(struct intel_engine_cs *engine,
+ struct intel_instdone *instdone);
+
/*
* Arbitrary size for largest possible 'add request' sequence. The code paths
* are complex and variable. Empirical measurement shows that the worst case
@@ -586,12 +640,6 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-unsigned int intel_kick_waiters(struct drm_i915_private *i915);
-unsigned int intel_kick_signalers(struct drm_i915_private *i915);
-
-static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
-{
- return i915_gem_active_isset(&engine->last_request);
-}
+unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
#endif /* _INTEL_RINGBUFFER_H_ */