diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 214 |
1 files changed, 131 insertions, 83 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ec0b4a0c605d..8cb2078c5bfc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -4,6 +4,7 @@ #include <linux/hashtable.h> #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" +#include "i915_gem_timeline.h" #define I915_CMD_HASH_ORDER 9 @@ -64,22 +65,73 @@ struct intel_hw_status_page { GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) enum intel_engine_hangcheck_action { - HANGCHECK_IDLE = 0, - HANGCHECK_WAIT, - HANGCHECK_ACTIVE, - HANGCHECK_KICK, - HANGCHECK_HUNG, + ENGINE_IDLE = 0, + ENGINE_WAIT, + ENGINE_ACTIVE_SEQNO, + ENGINE_ACTIVE_HEAD, + ENGINE_ACTIVE_SUBUNITS, + ENGINE_WAIT_KICK, + ENGINE_DEAD, }; -#define HANGCHECK_SCORE_RING_HUNG 31 +static inline const char * +hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) +{ + switch (a) { + case ENGINE_IDLE: + return "idle"; + case ENGINE_WAIT: + return "wait"; + case ENGINE_ACTIVE_SEQNO: + return "active seqno"; + case ENGINE_ACTIVE_HEAD: + return "active head"; + case ENGINE_ACTIVE_SUBUNITS: + return "active subunits"; + case ENGINE_WAIT_KICK: + return "wait kick"; + case ENGINE_DEAD: + return "dead"; + } + + return "unknown"; +} + +#define I915_MAX_SLICES 3 +#define I915_MAX_SUBSLICES 3 + +#define instdone_slice_mask(dev_priv__) \ + (INTEL_GEN(dev_priv__) == 7 ? \ + 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) + +#define instdone_subslice_mask(dev_priv__) \ + (INTEL_GEN(dev_priv__) == 7 ? \ + 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) + +#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ + for ((slice__) = 0, (subslice__) = 0; \ + (slice__) < I915_MAX_SLICES; \ + (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ + (slice__) += ((subslice__) == 0)) \ + for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ + (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) + +struct intel_instdone { + u32 instdone; + /* The following exist only in the RCS engine */ + u32 slice_common; + u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; + u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; +}; struct intel_engine_hangcheck { u64 acthd; u32 seqno; - int score; enum intel_engine_hangcheck_action action; + unsigned long action_timestamp; int deadlock; - u32 instdone[I915_NUM_INSTDONE_REG]; + struct intel_instdone instdone; + bool stalled; }; struct intel_ring { @@ -130,6 +182,7 @@ struct i915_ctx_workarounds { }; struct drm_i915_gem_request; +struct intel_render_state; struct intel_engine_cs { struct drm_i915_private *i915; @@ -141,7 +194,6 @@ struct intel_engine_cs { VCS2, /* Keep instances of the same type engine together. */ VECS } id; -#define I915_NUM_ENGINES 5 #define _VCS(n) (VCS + (n)) unsigned int exec_id; enum intel_engine_hw_id { @@ -152,10 +204,12 @@ struct intel_engine_cs { VCS2_HW } hw_id; enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */ - u64 fence_context; u32 mmio_base; unsigned int irq_shift; struct intel_ring *buffer; + struct intel_timeline *timeline; + + struct intel_render_state *render_state; /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the @@ -177,7 +231,7 @@ struct intel_engine_cs { struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ bool irq_posted; - spinlock_t lock; /* protects the lists of requests */ + spinlock_t lock; /* protects the lists of requests; irqsafe */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ struct intel_wait *first_wait; /* oldest waiter by retirement */ @@ -212,6 +266,11 @@ struct intel_engine_cs { void (*reset_hw)(struct intel_engine_cs *engine, struct drm_i915_gem_request *req); + int (*context_pin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); + void (*context_unpin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); + int (*request_alloc)(struct drm_i915_gem_request *req); int (*init_context)(struct drm_i915_gem_request *req); int (*emit_flush)(struct drm_i915_gem_request *request, @@ -225,7 +284,9 @@ struct intel_engine_cs { #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) - int (*emit_request)(struct drm_i915_gem_request *req); + void (*emit_breadcrumb)(struct drm_i915_gem_request *req, + u32 *out); + int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). @@ -235,6 +296,15 @@ struct intel_engine_cs { */ void (*submit_request)(struct drm_i915_gem_request *req); + /* Call when the priority on a request has changed and it and its + * dependencies may need rescheduling. Note the request itself may + * not be ready to run! + * + * Called under the struct_mutex. + */ + void (*schedule)(struct drm_i915_gem_request *request, + int priority); + /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. * However, the up-to-date seqno is not always required and the last @@ -282,8 +352,6 @@ struct intel_engine_cs { * ie. transpose of f(x, y) */ struct { - u32 sync_seqno[I915_NUM_ENGINES-1]; - union { #define GEN6_SEMAPHORE_LAST VECS_HW #define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) @@ -300,44 +368,43 @@ struct intel_engine_cs { /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal); - int (*signal)(struct drm_i915_gem_request *req); + u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out); } semaphore; /* Execlists */ struct tasklet_struct irq_tasklet; - spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */ struct execlist_port { struct drm_i915_gem_request *request; unsigned int count; } execlist_port[2]; - struct list_head execlist_queue; + struct rb_root execlist_queue; + struct rb_node *execlist_first; unsigned int fw_domains; bool disable_lite_restore_wa; bool preempt_wa; u32 ctx_desc_template; - /** - * List of breadcrumbs associated with GPU requests currently - * outstanding. + /* Contexts are pinned whilst they are active on the GPU. The last + * context executed remains active whilst the GPU is idle - the + * switch away and write to the context object only occurs on the + * next execution. Contexts are only unpinned on retirement of the + * following request ensuring that we can always write to the object + * on the context switch even after idling. Across suspend, we switch + * to the kernel context and trash it as the save may not happen + * before the hardware is powered down. */ - struct list_head request_list; - - /** - * Seqno of request most recently submitted to request_list. - * Used exclusively by hang checker to avoid grabbing lock while - * inspecting request list. - */ - u32 last_submitted_seqno; - u32 last_pending_seqno; + struct i915_gem_context *last_retired_context; - /* An RCU guarded pointer to the last request. No reference is - * held to the request, users must carefully acquire a reference to - * the request using i915_gem_active_get_rcu(), or hold the - * struct_mutex. + /* We track the current MI_SET_CONTEXT in order to eliminate + * redudant context switches. This presumes that requests are not + * reordered! Or when they are the tracking is updated along with + * the emission of individual requests into the legacy command + * stream (ring). */ - struct i915_gem_active last_request; + struct i915_gem_context *legacy_active_context; - struct i915_gem_context *last_context; + /* status_notifier: list of callbacks for context-switch changes */ + struct atomic_notifier_head context_status_notifier; struct intel_engine_hangcheck hangcheck; @@ -368,39 +435,12 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; -static inline bool -intel_engine_initialized(const struct intel_engine_cs *engine) -{ - return engine->i915 != NULL; -} - static inline unsigned intel_engine_flag(const struct intel_engine_cs *engine) { return 1 << engine->id; } -static inline u32 -intel_engine_sync_index(struct intel_engine_cs *engine, - struct intel_engine_cs *other) -{ - int idx; - - /* - * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2; - * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs; - * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs; - * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs; - * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs; - */ - - idx = (other - engine) - 1; - if (idx < 0) - idx += I915_NUM_ENGINES; - - return idx; -} - static inline void intel_flush_status_page(struct intel_engine_cs *engine, int reg) { @@ -446,7 +486,7 @@ intel_write_status_page(struct intel_engine_cs *engine, struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); -int intel_ring_pin(struct intel_ring *ring); +int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring); @@ -455,8 +495,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); - int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); @@ -483,30 +521,29 @@ static inline void intel_ring_advance(struct intel_ring *ring) */ } -static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value) +static inline u32 +intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + +static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - return value & (ring->size - 1); + u32 offset = addr - ring->vaddr; + return intel_ring_wrap(ring, offset); } int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ring *ring); -void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); +void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); void intel_engine_cleanup_common(struct intel_engine_cs *engine); -static inline int intel_engine_idle(struct intel_engine_cs *engine, - unsigned int flags) -{ - /* Wait upon the last request to be completed */ - return i915_gem_active_wait_unlocked(&engine->last_request, - flags, NULL, NULL); -} - int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); @@ -514,13 +551,30 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); u64 intel_engine_get_active_head(struct intel_engine_cs *engine); +u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine); + static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) { return intel_read_status_page(engine, I915_GEM_HWS_INDEX); } +static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) +{ + /* We are only peeking at the tail of the submit queue (and not the + * queue itself) in order to gain a hint as to the current active + * state of the engine. Callers are not expected to be taking + * engine->timeline->lock, nor are they expected to be concerned + * wtih serialising this hint with anything, so document it as + * a hint and nothing more. + */ + return READ_ONCE(engine->timeline->last_submitted_seqno); +} + int init_workarounds_ring(struct intel_engine_cs *engine); +void intel_engine_get_instdone(struct intel_engine_cs *engine, + struct intel_instdone *instdone); + /* * Arbitrary size for largest possible 'add request' sequence. The code paths * are complex and variable. Empirical measurement shows that the worst case @@ -586,12 +640,6 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -unsigned int intel_kick_waiters(struct drm_i915_private *i915); -unsigned int intel_kick_signalers(struct drm_i915_private *i915); - -static inline bool intel_engine_is_active(struct intel_engine_cs *engine) -{ - return i915_gem_active_isset(&engine->last_request); -} +unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); #endif /* _INTEL_RINGBUFFER_H_ */ |