summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
diff options
context:
space:
mode:
authorRodrigo Vivi <rodrigo.vivi@intel.com>2022-02-23 14:19:43 -0500
committerRodrigo Vivi <rodrigo.vivi@intel.com>2022-02-23 15:03:51 -0500
commit30424ebae8df0f786835e7a31ad790fa00764f35 (patch)
treeef8d835947f4bb8bb608ae2841c34f0d6cb93030 /drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
parent6d8ebef53c2ccea482ed77769b7b5cbb453b7715 (diff)
parent154cfae6158141b18d65abb0db679bb51a8294e7 (diff)
Merge tag 'drm-intel-gt-next-2022-02-17' of git://anongit.freedesktop.org/drm/drm-intel into drm-intel-next
UAPI Changes: - Weak parallel submission support for execlists Minimal implementation of the parallel submission support for execlists backend that was previously only implemented for GuC. Support one sibling non-virtual engine. Core Changes: - Two backmerges of drm/drm-next for header file renames/changes and i915_regs reorganization Driver Changes: - Add new DG2 subplatform: DG2-G12 (Matt R) - Add new DG2 workarounds (Matt R, Ram, Bruce) - Handle pre-programmed WOPCM registers for DG2+ (Daniele) - Update guc shim control programming on XeHP SDV+ (Daniele) - Add RPL-S C0/D0 stepping information (Anusha) - Improve GuC ADS initialization to work on ARM64 on dGFX (Lucas) - Fix KMD and GuC race on accessing PMU busyness (Umesh) - Use PM timestamp instead of RING TIMESTAMP for reference in PMU with GuC (Umesh) - Report error on invalid reset notification from GuC (John) - Avoid WARN splat by holding RPM wakelock during PXP unbind (Juston) - Fixes to parallel submission implementation (Matt B.) - Improve GuC loading status check/error reports (John) - Tweak TTM LRU priority hint selection (Matt A.) - Align the plane_vma to min_page_size of stolen mem (Ram) - Introduce vma resources and implement async unbinding (Thomas) - Use struct vma_resource instead of struct vma_snapshot (Thomas) - Return some TTM accel move errors instead of trying memcpy move (Thomas) - Fix a race between vma / object destruction and unbinding (Thomas) - Remove short-term pins from execbuf (Maarten) - Update to GuC version 69.0.3 (John, Michal Wa.) - Improvements to GT reset paths in GuC backend (Matt B.) - Use shrinker_release_pages instead of writeback in shmem object hooks (Matt A., Tvrtko) - Use trylock instead of blocking lock when freeing GEM objects (Maarten) - Allocate intel_engine_coredump_alloc with ALLOW_FAIL (Matt B.) - Fixes to object unmapping and purging (Matt A) - Check for wedged device in GuC backend (John) - Avoid lockdep splat by locking dpt_obj around set_cache_level (Maarten) - Allow dead vm to unbind vma's without lock (Maarten) - s/engine->i915/i915/ for DG2 engine workarounds (Matt R) - Use to_gt() helper for GGTT accesses (Michal Wi.) - Selftest improvements (Matt B., Thomas, Ram) - Coding style and compiler warning fixes (Matt B., Jasmine, Andi, Colin, Gustavo, Dan) From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Yg4i2aCZvvee5Eai@jlahtine-mobl.ger.corp.intel.com Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> [Fixed conflicts while applying, using the fixups/drm-intel-gt-next.patch from drm-rerere's 1f2b1742abdd ("2022y-02m-23d-16h-07m-57s UTC: drm-tip rerere cache update")]
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c')
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c262
1 files changed, 186 insertions, 76 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0cb51c5bb765..b3a429a92c0d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1115,6 +1115,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
if (new_start == lower_32_bits(*prev_start))
return;
+ /*
+ * When gt is unparked, we update the gt timestamp and start the ping
+ * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
+ * is unparked, all switched in contexts will have a start time that is
+ * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
+ *
+ * If neither gt_stamp nor new_start has rolled over, then the
+ * gt_stamp_hi does not need to be adjusted, however if one of them has
+ * rolled over, we need to adjust gt_stamp_hi accordingly.
+ *
+ * The below conditions address the cases of new_start rollover and
+ * gt_stamp_last rollover respectively.
+ */
if (new_start < gt_stamp_last &&
(new_start - gt_stamp_last) <= POLL_TIME_CLKS)
gt_stamp_hi++;
@@ -1126,17 +1139,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
}
-static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+/*
+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
+ * we run into a race where the value read is inconsistent. Sometimes the
+ * inconsistency is in reading the upper MSB bytes of the last_in value when
+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
+ * determine validity of these values. Instead we read the values multiple times
+ * until they are consistent. In test runs, 3 attempts results in consistent
+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
+ * any new occurences.
+ */
+static void __get_engine_usage_record(struct intel_engine_cs *engine,
+ u32 *last_in, u32 *id, u32 *total)
{
struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+ int i = 0;
+
+ do {
+ *last_in = READ_ONCE(rec->last_switch_in_stamp);
+ *id = READ_ONCE(rec->current_context_index);
+ *total = READ_ONCE(rec->total_runtime);
+
+ if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
+ READ_ONCE(rec->current_context_index) == *id &&
+ READ_ONCE(rec->total_runtime) == *total)
+ break;
+ } while (++i < 6);
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
struct intel_engine_guc_stats *stats = &engine->stats.guc;
struct intel_guc *guc = &engine->gt->uc.guc;
- u32 last_switch = rec->last_switch_in_stamp;
- u32 ctx_id = rec->current_context_index;
- u32 total = rec->total_runtime;
+ u32 last_switch, ctx_id, total;
lockdep_assert_held(&guc->timestamp.lock);
+ __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
+
stats->running = ctx_id != ~0U && last_switch;
if (stats->running)
__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
@@ -1151,23 +1192,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
}
}
-static void guc_update_pm_timestamp(struct intel_guc *guc,
- struct intel_engine_cs *engine,
- ktime_t *now)
+static u32 gpm_timestamp_shift(struct intel_gt *gt)
+{
+ intel_wakeref_t wakeref;
+ u32 reg, shift;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
+
+ shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+ GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
+
+ return 3 - shift;
+}
+
+static u64 gpm_timestamp(struct intel_gt *gt)
{
- u32 gt_stamp_now, gt_stamp_hi;
+ u32 lo, hi, old_hi, loop = 0;
+
+ hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+ do {
+ lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
+ old_hi = hi;
+ hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+ } while (old_hi != hi && loop++ < 2);
+
+ return ((u64)hi << 32) | lo;
+}
+
+static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ u32 gt_stamp_lo, gt_stamp_hi;
+ u64 gpm_ts;
lockdep_assert_held(&guc->timestamp.lock);
gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
- gt_stamp_now = intel_uncore_read(engine->uncore,
- RING_TIMESTAMP(engine->mmio_base));
+ gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
+ gt_stamp_lo = lower_32_bits(gpm_ts);
*now = ktime_get();
- if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
+ if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
gt_stamp_hi++;
- guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
+ guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
}
/*
@@ -1210,8 +1279,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
stats_saved = *stats;
gt_stamp_saved = guc->timestamp.gt_stamp;
+ /*
+ * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
+ * start_gt_clk' calculation below for active engines.
+ */
guc_update_engine_gt_clks(engine);
- guc_update_pm_timestamp(guc, engine, now);
+ guc_update_pm_timestamp(guc, now);
intel_gt_pm_put_async(gt);
if (i915_reset_count(gpu_error) != reset_count) {
*stats = stats_saved;
@@ -1243,8 +1316,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
spin_lock_irqsave(&guc->timestamp.lock, flags);
+ guc_update_pm_timestamp(guc, &unused);
for_each_engine(engine, gt, id) {
- guc_update_pm_timestamp(guc, engine, &unused);
guc_update_engine_gt_clks(engine);
engine->stats.guc.prev_total = 0;
}
@@ -1261,10 +1334,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
ktime_t unused;
spin_lock_irqsave(&guc->timestamp.lock, flags);
- for_each_engine(engine, gt, id) {
- guc_update_pm_timestamp(guc, engine, &unused);
+
+ guc_update_pm_timestamp(guc, &unused);
+ for_each_engine(engine, gt, id)
guc_update_engine_gt_clks(engine);
- }
+
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
}
@@ -1337,10 +1411,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
void intel_guc_busyness_unpark(struct intel_gt *gt)
{
struct intel_guc *guc = &gt->uc.guc;
+ unsigned long flags;
+ ktime_t unused;
if (!guc_submission_initialized(guc))
return;
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ guc_update_pm_timestamp(guc, &unused);
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
guc->timestamp.ping_delay);
}
@@ -1351,7 +1430,8 @@ submission_disabled(struct intel_guc *guc)
struct i915_sched_engine * const sched_engine = guc->sched_engine;
return unlikely(!sched_engine ||
- !__tasklet_is_enabled(&sched_engine->tasklet));
+ !__tasklet_is_enabled(&sched_engine->tasklet) ||
+ intel_gt_is_wedged(guc_to_gt(guc)));
}
static void disable_submission(struct intel_guc *guc)
@@ -1396,8 +1476,6 @@ static void guc_flush_destroyed_contexts(struct intel_guc *guc);
void intel_guc_submission_reset_prepare(struct intel_guc *guc)
{
- int i;
-
if (unlikely(!guc_submission_initialized(guc))) {
/* Reset called during driver load? GuC not yet initialised! */
return;
@@ -1414,21 +1492,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
guc_flush_submissions(guc);
guc_flush_destroyed_contexts(guc);
-
- /*
- * Handle any outstanding G2Hs before reset. Call IRQ handler directly
- * each pass as interrupt have been disabled. We always scrub for
- * outstanding G2H as it is possible for outstanding_submission_g2h to
- * be incremented after the context state update.
- */
- for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) {
- intel_guc_to_host_event_handler(guc);
-#define wait_for_reset(guc, wait_var) \
- intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20))
- do {
- wait_for_reset(guc, &guc->outstanding_submission_g2h);
- } while (!list_empty(&guc->ct.requests.incoming));
- }
+ flush_work(&guc->ct.requests.worker);
scrub_guc_desc_for_outstanding_g2h(guc);
}
@@ -1533,7 +1597,6 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
unsigned long flags;
u32 head;
int i, number_children = ce->parallel.number_children;
- bool skip = false;
struct intel_context *parent = ce;
GEM_BUG_ON(intel_context_is_child(ce));
@@ -1544,23 +1607,10 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
* GuC will implicitly mark the context as non-schedulable when it sends
* the reset notification. Make sure our state reflects this change. The
* context will be marked enabled on resubmission.
- *
- * XXX: If the context is reset as a result of the request cancellation
- * this G2H is received after the schedule disable complete G2H which is
- * wrong as this creates a race between the request cancellation code
- * re-submitting the context and this G2H handler. This is a bug in the
- * GuC but can be worked around in the meantime but converting this to a
- * NOP if a pending enable is in flight as this indicates that a request
- * cancellation has occurred.
*/
spin_lock_irqsave(&ce->guc_state.lock, flags);
- if (likely(!context_pending_enable(ce)))
- clr_context_enabled(ce);
- else
- skip = true;
+ clr_context_enabled(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- if (unlikely(skip))
- goto out_put;
/*
* For each context in the relationship find the hanging request
@@ -1592,7 +1642,6 @@ next_context:
}
__unwind_incomplete_requests(parent);
-out_put:
intel_context_put(parent);
}
@@ -1727,7 +1776,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
- test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) {
+ intel_gt_is_wedged(guc_to_gt(guc)))) {
return;
}
@@ -1746,6 +1795,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
}
static void destroyed_worker_func(struct work_struct *w);
+static void reset_fail_worker_func(struct work_struct *w);
/*
* Set up the memory resources to be shared with the GuC (via the GGTT)
@@ -1776,6 +1826,8 @@ int intel_guc_submission_init(struct intel_guc *guc)
INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
INIT_WORK(&guc->submission_state.destroyed_worker,
destroyed_worker_func);
+ INIT_WORK(&guc->submission_state.reset_fail_worker,
+ reset_fail_worker_func);
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
@@ -1785,6 +1837,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
spin_lock_init(&guc->timestamp.lock);
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
+ guc->timestamp.shift = gpm_timestamp_shift(gt);
return 0;
}
@@ -2531,12 +2584,6 @@ static void guc_context_cancel_request(struct intel_context *ce,
true);
}
- /*
- * XXX: Racey if context is reset, see comment in
- * __guc_reset_context().
- */
- flush_work(&ce_to_guc(ce)->ct.requests.worker);
-
guc_context_unblock(block_context);
intel_context_put(ce);
}
@@ -3250,8 +3297,6 @@ static void guc_parent_context_unpin(struct intel_context *ce)
GEM_BUG_ON(!intel_context_is_parent(ce));
GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
- if (ce->parallel.last_rq)
- i915_request_put(ce->parallel.last_rq);
unpin_guc_id(guc, ce);
lrc_unpin(ce);
}
@@ -3973,14 +4018,14 @@ static void guc_handle_context_reset(struct intel_guc *guc,
{
trace_intel_context_reset(ce);
- /*
- * XXX: Racey if request cancellation has occurred, see comment in
- * __guc_reset_context().
- */
- if (likely(!intel_context_is_banned(ce) &&
- !context_blocked(ce))) {
+ if (likely(!intel_context_is_banned(ce))) {
capture_error_state(guc, ce);
guc_context_replay(ce);
+ } else {
+ drm_err(&guc_to_gt(guc)->i915->drm,
+ "Invalid GuC engine reset notificaion for 0x%04X on %s: banned = %d, blocked = %d",
+ ce->guc_id.id, ce->engine->name, intel_context_is_banned(ce),
+ context_blocked(ce));
}
}
@@ -4019,6 +4064,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
return 0;
}
+int intel_guc_error_capture_process_msg(struct intel_guc *guc,
+ const u32 *msg, u32 len)
+{
+ int status;
+
+ if (unlikely(len != 1)) {
+ drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ status = msg[0];
+ drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status);
+
+ /* FIXME: Do something with the capture */
+
+ return 0;
+}
+
static struct intel_engine_cs *
guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
{
@@ -4031,6 +4094,26 @@ guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
return gt->engine_class[engine_class][instance];
}
+static void reset_fail_worker_func(struct work_struct *w)
+{
+ struct intel_guc *guc = container_of(w, struct intel_guc,
+ submission_state.reset_fail_worker);
+ struct intel_gt *gt = guc_to_gt(guc);
+ intel_engine_mask_t reset_fail_mask;
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ reset_fail_mask = guc->submission_state.reset_fail_mask;
+ guc->submission_state.reset_fail_mask = 0;
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+ if (likely(reset_fail_mask))
+ intel_gt_handle_error(gt, reset_fail_mask,
+ I915_ERROR_CAPTURE,
+ "GuC failed to reset engine mask=0x%x\n",
+ reset_fail_mask);
+}
+
int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
@@ -4038,6 +4121,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
struct intel_gt *gt = guc_to_gt(guc);
u8 guc_class, instance;
u32 reason;
+ unsigned long flags;
if (unlikely(len != 3)) {
drm_err(&gt->i915->drm, "Invalid length %u", len);
@@ -4062,10 +4146,15 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
guc_class, instance, engine->name, reason);
- intel_gt_handle_error(gt, engine->mask,
- I915_ERROR_CAPTURE,
- "GuC failed to reset %s (reason=0x%08x)\n",
- engine->name, reason);
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ guc->submission_state.reset_fail_mask |= engine->mask;
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+ /*
+ * A GT reset flushes this worker queue (G2H handler) so we must use
+ * another worker to trigger a GT reset.
+ */
+ queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
return 0;
}
@@ -4434,27 +4523,31 @@ static inline bool skip_handshake(struct i915_request *rq)
return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
}
+#define NON_SKIP_LEN 6
static u32 *
emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
u32 *cs)
{
struct intel_context *ce = rq->context;
+ __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
+ __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
GEM_BUG_ON(!intel_context_is_parent(ce));
if (unlikely(skip_handshake(rq))) {
/*
* NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
- * the -6 comes from the length of the emits below.
+ * the NON_SKIP_LEN comes from the length of the emits below.
*/
memset(cs, 0, sizeof(u32) *
- (ce->engine->emit_fini_breadcrumb_dw - 6));
- cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+ (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
+ cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
} else {
cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
}
/* Emit fini breadcrumb */
+ before_fini_breadcrumb_user_interrupt_cs = cs;
cs = gen8_emit_ggtt_write(cs,
rq->fence.seqno,
i915_request_active_timeline(rq)->hwsp_offset,
@@ -4464,6 +4557,12 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
+ /* Ensure our math for skip + emit is correct */
+ GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
+ cs);
+ GEM_BUG_ON(start_fini_breadcrumb_cs +
+ ce->engine->emit_fini_breadcrumb_dw != cs);
+
rq->tail = intel_ring_offset(rq, cs);
return cs;
@@ -4506,22 +4605,25 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
u32 *cs)
{
struct intel_context *ce = rq->context;
+ __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
+ __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
GEM_BUG_ON(!intel_context_is_child(ce));
if (unlikely(skip_handshake(rq))) {
/*
* NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
- * the -6 comes from the length of the emits below.
+ * the NON_SKIP_LEN comes from the length of the emits below.
*/
memset(cs, 0, sizeof(u32) *
- (ce->engine->emit_fini_breadcrumb_dw - 6));
- cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+ (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
+ cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
} else {
cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
}
/* Emit fini breadcrumb */
+ before_fini_breadcrumb_user_interrupt_cs = cs;
cs = gen8_emit_ggtt_write(cs,
rq->fence.seqno,
i915_request_active_timeline(rq)->hwsp_offset,
@@ -4531,11 +4633,19 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
+ /* Ensure our math for skip + emit is correct */
+ GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
+ cs);
+ GEM_BUG_ON(start_fini_breadcrumb_cs +
+ ce->engine->emit_fini_breadcrumb_dw != cs);
+
rq->tail = intel_ring_offset(rq, cs);
return cs;
}
+#undef NON_SKIP_LEN
+
static struct intel_context *
guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
unsigned long flags)