1 files changed, 129 insertions, 83 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 468438fb47af..d0088d020220 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -247,8 +247,12 @@ static void __context_pin_release(struct intel_context *ce)
 
 static void mark_eio(struct i915_request *rq)
 {
-	if (!i915_request_signaled(rq))
-		dma_fence_set_error(&rq->fence, -EIO);
+	if (i915_request_completed(rq))
+		return;
+
+	GEM_BUG_ON(i915_request_signaled(rq));
+
+	dma_fence_set_error(&rq->fence, -EIO);
 	i915_request_mark_complete(rq);
 }
 
@@ -348,10 +352,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	 * However, the priority hint is a mere hint that we may need to
 	 * preempt. If that hint is stale or we may be trying to preempt
 	 * ourselves, ignore the request.
+	 *
+	 * More naturally we would write
+	 *      prio >= max(0, last);
+	 * except that we wish to prevent triggering preemption at the same
+	 * priority level: the task that is running should remain running
+	 * to preserve FIFO ordering of dependencies.
 	 */
-	last_prio = effective_prio(rq);
-	if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
-					 last_prio))
+	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
+	if (engine->execlists.queue_priority_hint <= last_prio)
 		return false;
 
 	/*
@@ -669,64 +678,6 @@ static const u8 gen12_xcs_offsets[] = {
 	REG16(0x274),
 	REG16(0x270),
 
-	NOP(13),
-	LRI(2, POSTED),
-	REG16(0x200),
-	REG16(0x204),
-
-	NOP(11),
-	LRI(50, POSTED),
-	REG16(0x588),
-	REG16(0x588),
-	REG16(0x588),
-	REG16(0x588),
-	REG16(0x588),
-	REG16(0x588),
-	REG(0x028),
-	REG(0x09c),
-	REG(0x0c0),
-	REG(0x178),
-	REG(0x17c),
-	REG16(0x358),
-	REG(0x170),
-	REG(0x150),
-	REG(0x154),
-	REG(0x158),
-	REG16(0x41c),
-	REG16(0x600),
-	REG16(0x604),
-	REG16(0x608),
-	REG16(0x60c),
-	REG16(0x610),
-	REG16(0x614),
-	REG16(0x618),
-	REG16(0x61c),
-	REG16(0x620),
-	REG16(0x624),
-	REG16(0x628),
-	REG16(0x62c),
-	REG16(0x630),
-	REG16(0x634),
-	REG16(0x638),
-	REG16(0x63c),
-	REG16(0x640),
-	REG16(0x644),
-	REG16(0x648),
-	REG16(0x64c),
-	REG16(0x650),
-	REG16(0x654),
-	REG16(0x658),
-	REG16(0x65c),
-	REG16(0x660),
-	REG16(0x664),
-	REG16(0x668),
-	REG16(0x66c),
-	REG16(0x670),
-	REG16(0x674),
-	REG16(0x678),
-	REG16(0x67c),
-	REG(0x068),
-
 	END(),
 };
 
@@ -857,6 +808,15 @@ static const u8 gen12_rcs_offsets[] = {
 
 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 {
+	/*
+	 * The gen12+ lists only have the registers we program in the basic
+	 * default state. We rely on the context image using relative
+	 * addressing to automatic fixup the register state between the
+	 * physical engines for virtual engine.
+	 */
+	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
+		   !intel_engine_has_relative_mmio(engine));
+
 	if (engine->class == RENDER_CLASS) {
 		if (INTEL_GEN(engine->i915) >= 12)
 			return gen12_rcs_offsets;
@@ -892,7 +852,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->active.requests,
 					 sched.link) {
-		struct intel_engine_cs *owner;
 
 		if (i915_request_completed(rq))
 			continue; /* XXX */
@@ -907,8 +866,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		 * engine so that it can be moved across onto another physical
 		 * engine as load dictates.
 		 */
-		owner = rq->hw_context->engine;
-		if (likely(owner == engine)) {
+		if (likely(rq->execution_mask == engine->mask)) {
 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 			if (rq_prio(rq) != prio) {
 				prio = rq_prio(rq);
@@ -919,6 +877,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			list_move(&rq->sched.link, pl);
 			active = rq;
 		} else {
+			struct intel_engine_cs *owner = rq->hw_context->engine;
+
 			/*
 			 * Decouple the virtual breadcrumb before moving it
 			 * back to the virtual engine -- we don't want the
@@ -928,7 +888,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			 */
 			if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 				     &rq->fence.flags)) {
-				spin_lock(&rq->lock);
+				spin_lock_nested(&rq->lock,
+						 SINGLE_DEPTH_NESTING);
 				i915_request_cancel_breadcrumb(rq);
 				spin_unlock(&rq->lock);
 			}
@@ -1092,6 +1053,10 @@ static u64 execlists_update_context(const struct i915_request *rq)
 	desc = ce->lrc_desc;
 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
 
+	/* Wa_1607138340:tgl */
+	if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
+		desc |= CTX_DESC_FORCE_RESTORE;
+
 	return desc;
 }
 
@@ -1137,25 +1102,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 
 	trace_ports(execlists, msg, execlists->pending);
 
-	if (!execlists->pending[0])
+	if (!execlists->pending[0]) {
+		GEM_TRACE_ERR("Nothing pending for promotion!\n");
 		return false;
+	}
 
-	if (execlists->pending[execlists_num_ports(execlists)])
+	if (execlists->pending[execlists_num_ports(execlists)]) {
+		GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
+			      execlists_num_ports(execlists));
 		return false;
+	}
 
 	for (port = execlists->pending; (rq = *port); port++) {
-		if (ce == rq->hw_context)
+		if (ce == rq->hw_context) {
+			GEM_TRACE_ERR("Duplicate context in pending[%zd]\n",
+				      port - execlists->pending);
 			return false;
+		}
 
 		ce = rq->hw_context;
 		if (i915_request_completed(rq))
 			continue;
 
-		if (i915_active_is_idle(&ce->active))
+		if (i915_active_is_idle(&ce->active)) {
+			GEM_TRACE_ERR("Inactive context in pending[%zd]\n",
+				      port - execlists->pending);
 			return false;
+		}
+
+		if (!i915_vma_is_pinned(ce->state)) {
+			GEM_TRACE_ERR("Unpinned context in pending[%zd]\n",
+				      port - execlists->pending);
+			return false;
+		}
 
-		if (!i915_vma_is_pinned(ce->state))
+		if (!i915_vma_is_pinned(ce->ring->vma)) {
+			GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n",
+				      port - execlists->pending);
 			return false;
+		}
 	}
 
 	return ce;
@@ -1232,6 +1217,10 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (i915_request_completed(next))
 		return true;
 
+	if (unlikely((prev->flags ^ next->flags) &
+		     (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
+		return false;
+
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
 		return false;
 
@@ -1288,7 +1277,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
 static struct i915_request *
 last_active(const struct intel_engine_execlists *execlists)
 {
-	struct i915_request * const *last = execlists->active;
+	struct i915_request * const *last = READ_ONCE(execlists->active);
 
 	while (*last && i915_request_completed(*last))
 		last++;
@@ -1489,7 +1478,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 			last = NULL;
 		} else if (need_timeslice(engine, last) &&
-			   !timer_pending(&engine->execlists.timer)) {
+			   timer_expired(&engine->execlists.timer)) {
 			GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
 				  engine->name,
 				  last->fence.context,
@@ -1525,8 +1514,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * submission.
 			 */
 			if (!list_is_last(&last->sched.link,
-					  &engine->active.requests))
+					  &engine->active.requests)) {
+				/*
+				 * Even if ELSP[1] is occupied and not worthy
+				 * of timeslices, our queue might be.
+				 */
+				if (!execlists->timer.expires &&
+				    need_timeslice(engine, last))
+					mod_timer(&execlists->timer,
+						  jiffies + 1);
 				return;
+			}
 
 			/*
 			 * WaIdleLiteRestore:bdw,skl
@@ -1680,6 +1678,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (last->hw_context == rq->hw_context)
 					goto done;
 
+				if (i915_request_has_sentinel(last))
+					goto done;
+
 				/*
 				 * If GVT overrides us we only ever submit
 				 * port[0], leaving port[1] empty. Note that we
@@ -1859,6 +1860,13 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
+	/*
+	 * As we modify our execlists state tracking we require exclusive
+	 * access. Either we are inside the tasklet, or the tasklet is disabled
+	 * and we assume that is only inside the reset paths and so serialised.
+	 */
+	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
+		   !reset_in_progress(execlists));
 	GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
 
 	/*
@@ -1920,6 +1928,9 @@ static void process_csb(struct intel_engine_cs *engine)
 		else
 			promote = gen8_csb_parse(execlists, buf + 2 * head);
 		if (promote) {
+			if (!inject_preempt_hang(execlists))
+				ring_set_paused(engine, 0);
+
 			/* cancel old inflight, prepare for switch */
 			trace_ports(execlists, "preempted", execlists->active);
 			while (*execlists->active)
@@ -1935,9 +1946,8 @@ static void process_csb(struct intel_engine_cs *engine)
 
 			if (enable_timeslice(execlists))
 				mod_timer(&execlists->timer, jiffies + 1);
-
-			if (!inject_preempt_hang(execlists))
-				ring_set_paused(engine, 0);
+			else
+				cancel_timer(&execlists->timer);
 
 			WRITE_ONCE(execlists->pending[0], NULL);
 		} else {
@@ -1980,8 +1990,11 @@ static void process_csb(struct intel_engine_cs *engine)
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
 	lockdep_assert_held(&engine->active.lock);
-	if (!engine->execlists.pending[0])
+	if (!engine->execlists.pending[0]) {
+		rcu_read_lock(); /* protect peeking at execlists->active */
 		execlists_dequeue(engine);
+		rcu_read_unlock();
+	}
 }
 
 /*
@@ -2773,8 +2786,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	if (!rq)
 		goto unwind;
 
+	/* We still have requests in-flight; the engine should be active */
+	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
 	ce = rq->hw_context;
-	GEM_BUG_ON(i915_active_is_idle(&ce->active));
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
 	/* Proclaim we have exclusive access to the context image! */
@@ -2782,10 +2797,13 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 
 	rq = active_request(rq);
 	if (!rq) {
+		/* Idle context; tidy up the ring so we can restart afresh */
 		ce->ring->head = ce->ring->tail;
 		goto out_replay;
 	}
 
+	/* Context has requests still in-flight; it should not be idle! */
+	GEM_BUG_ON(i915_active_is_idle(&ce->active));
 	ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
 
 	/*
@@ -3205,8 +3223,11 @@ static int gen12_emit_flush_render(struct i915_request *request,
 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		/* Wa_1409600907:tgl */
+		flags |= PIPE_CONTROL_DEPTH_STALL;
 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
 
 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 		flags |= PIPE_CONTROL_QW_WRITE;
@@ -3232,6 +3253,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
 
 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 		flags |= PIPE_CONTROL_QW_WRITE;
@@ -3253,6 +3275,26 @@ static int gen12_emit_flush_render(struct i915_request *request,
 
 		*cs++ = preparser_disable(false);
 		intel_ring_advance(request, cs);
+
+		/*
+		 * Wa_1604544889:tgl
+		 */
+		if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
+			flags = 0;
+			flags |= PIPE_CONTROL_CS_STALL;
+			flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
+
+			flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+			flags |= PIPE_CONTROL_QW_WRITE;
+
+			cs = intel_ring_begin(request, 6);
+			if (IS_ERR(cs))
+				return PTR_ERR(cs);
+
+			cs = gen8_emit_pipe_control(cs, flags,
+						    LRC_PPHWSP_SCRATCH_ADDR);
+			intel_ring_advance(request, cs);
+		}
 	}
 
 	return 0;
@@ -3415,15 +3457,18 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+				      /* Wa_1409600907:tgl */
+				      PIPE_CONTROL_DEPTH_STALL |
 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
-				      PIPE_CONTROL_FLUSH_ENABLE);
+				      PIPE_CONTROL_FLUSH_ENABLE |
+				      PIPE_CONTROL_HDC_PIPELINE_FLUSH);
 
 	return gen12_emit_fini_breadcrumb_footer(request, cs);
 }
 
 static void execlists_park(struct intel_engine_cs *engine)
 {
-	del_timer(&engine->execlists.timer);
+	cancel_timer(&engine->execlists.timer);
 }
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
@@ -3447,7 +3492,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 	}
 
-	if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
+	if (INTEL_GEN(engine->i915) >= 12)
 		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
 }
 
@@ -4169,6 +4214,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 
 	ve->base.i915 = ctx->i915;
 	ve->base.gt = siblings[0]->gt;
+	ve->base.uncore = siblings[0]->uncore;
 	ve->base.id = -1;
 	ve->base.class = OTHER_CLASS;
 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;