From 36deeddcd3699c2755ed21c8d2595b1728d844e5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 5 Dec 2019 14:59:34 +0000
Subject: drm/i915/gt: Save irqstate around virtual_context_destroy

As virtual_context_destroy() may be called from a request signal, it may
be called from inside an irq-off section, and so we need to do a full
save/restore of the irq state rather than blindly re-enable irqs upon
unlocking.

<4> [110.024262] WARNING: inconsistent lock state
<4> [110.024277] 5.4.0-rc8-CI-CI_DRM_7489+ #1 Tainted: G     U
<4> [110.024292] --------------------------------
<4> [110.024305] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
<4> [110.024323] kworker/0:0/5 [HC0[0]:SC0[0]:HE1:SE1] takes:
<4> [110.024338] ffff88826a0c7a18 (&(&rq->lock)->rlock){?.-.}, at: i915_request_retire+0x221/0x930 [i915]
<4> [110.024592] {IN-HARDIRQ-W} state was registered at:
<4> [110.024612]   lock_acquire+0xa7/0x1c0
<4> [110.024627]   _raw_spin_lock_irqsave+0x33/0x50
<4> [110.024788]   intel_engine_breadcrumbs_irq+0x38c/0x600 [i915]
<4> [110.024808]   irq_work_run_list+0x49/0x70
<4> [110.024824]   irq_work_run+0x26/0x50
<4> [110.024839]   smp_irq_work_interrupt+0x44/0x1e0
<4> [110.024855]   irq_work_interrupt+0xf/0x20
<4> [110.024871]   __do_softirq+0xb7/0x47f
<4> [110.024885]   irq_exit+0xba/0xc0
<4> [110.024898]   do_IRQ+0x83/0x160
<4> [110.024910]   ret_from_intr+0x0/0x1d
<4> [110.024922] irq event stamp: 172864
<4> [110.024938] hardirqs last  enabled at (172863): [<ffffffff819ea214>] _raw_spin_unlock_irq+0x24/0x50
<4> [110.024963] hardirqs last disabled at (172864): [<ffffffff819e9fba>] _raw_spin_lock_irq+0xa/0x40
<4> [110.024988] softirqs last  enabled at (172812): [<ffffffff81c00385>] __do_softirq+0x385/0x47f
<4> [110.025012] softirqs last disabled at (172797): [<ffffffff810b829a>] irq_exit+0xba/0xc0
<4> [110.025031]
other info that might help us debug this:
<4> [110.025049]  Possible unsafe locking scenario:

<4> [110.025065]        CPU0
<4> [110.025075]        ----
<4> [110.025084]   lock(&(&rq->lock)->rlock);
<4> [110.025099]   <Interrupt>
<4> [110.025109]     lock(&(&rq->lock)->rlock);
<4> [110.025124]
 *** DEADLOCK ***

<4> [110.025144] 4 locks held by kworker/0:0/5:
<4> [110.025156]  #0: ffff88827588f528 ((wq_completion)events){+.+.}, at: process_one_work+0x1de/0x620
<4> [110.025187]  #1: ffffc9000006fe78 ((work_completion)(&engine->retire_work)){+.+.}, at: process_one_work+0x1de/0x620
<4> [110.025219]  #2: ffff88825605e270 (&kernel#2){+.+.}, at: engine_retire+0x57/0xe0 [i915]
<4> [110.025405]  #3: ffff88826a0c7a18 (&(&rq->lock)->rlock){?.-.}, at: i915_request_retire+0x221/0x930 [i915]
<4> [110.025634]
stack backtrace:
<4> [110.025653] CPU: 0 PID: 5 Comm: kworker/0:0 Tainted: G     U            5.4.0-rc8-CI-CI_DRM_7489+ #1
<4> [110.025675] Hardware name:  /NUC7i5BNB, BIOS BNKBL357.86A.0054.2017.1025.1822 10/25/2017
<4> [110.025856] Workqueue: events engine_retire [i915]
<4> [110.025872] Call Trace:
<4> [110.025891]  dump_stack+0x71/0x9b
<4> [110.025907]  mark_lock+0x49a/0x500
<4> [110.025926]  ? print_shortest_lock_dependencies+0x200/0x200
<4> [110.025946]  mark_held_locks+0x49/0x70
<4> [110.025962]  ? _raw_spin_unlock_irq+0x24/0x50
<4> [110.025978]  lockdep_hardirqs_on+0xa2/0x1c0
<4> [110.025995]  _raw_spin_unlock_irq+0x24/0x50
<4> [110.026171]  virtual_context_destroy+0xc5/0x2e0 [i915]
<4> [110.026376]  __active_retire+0xb4/0x290 [i915]
<4> [110.026396]  dma_fence_signal_locked+0x9e/0x1b0
<4> [110.026613]  i915_request_retire+0x451/0x930 [i915]
<4> [110.026766]  retire_requests+0x4d/0x60 [i915]
<4> [110.026919]  engine_retire+0x63/0xe0 [i915]

Fixes: b1e3177bd1d8 ("drm/i915: Coordinate i915_active with its own mutex")
Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191205145934.663183-1-chris@chris-wilson.co.uk
(cherry picked from commit 6f7ac8285371fb0df58aba861eaab387f79ed04d)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9fdefbdc3546..17bb52aeff19 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -4120,17 +4120,18 @@ static void virtual_context_destroy(struct kref *kref)
 	for (n = 0; n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
 		struct rb_node *node = &ve->nodes[sibling->id].rb;
+		unsigned long flags;
 
 		if (RB_EMPTY_NODE(node))
 			continue;
 
-		spin_lock_irq(&sibling->active.lock);
+		spin_lock_irqsave(&sibling->active.lock, flags);
 
 		/* Detachment is lazily performed in the execlists tasklet */
 		if (!RB_EMPTY_NODE(node))
 			rb_erase_cached(node, &sibling->execlists.virtual);
 
-		spin_unlock_irq(&sibling->active.lock);
+		spin_unlock_irqrestore(&sibling->active.lock, flags);
 	}
 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
 
-- 
cgit 


From 2a264a0fd4dc9aa7900fd181c3a6464027cb4df1 Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Fri, 6 Dec 2019 11:43:38 -0800
Subject: drm/i915/perf: Allow non-privileged access when OA buffer is not
 sampled

SAMPLE_OA_REPORT enables sampling of OA reports from the OA buffer.
Since reports from OA buffer had system wide visibility, collecting
samples from the OA buffer was a privileged operation on previous
platforms. Prior to TGL, it was also necessary to sample the OA buffer
to normalize reports from MI REPORT PERF COUNT.

TGL has a dedicated OAR unit to sample perf reports for a specific
render context. This removes the necessity to sample OA buffer.

- If not sampling the OA buffer, allow non-privileged access. An earlier
  patch allows the non-privilege access:
  https://patchwork.freedesktop.org/patch/337716/?series=68582&rev=1
- Clear up the path for non-privileged access in this patch

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Fixes: 00a7f0d7155c ("drm/i915/tgl: Add perf support on TGL")
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191206194339.31356-1-umesh.nerlige.ramappa@intel.com
(cherry picked from commit 322d56aa3145a28445907ecc638a2c3aa3295c6b)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 65d7c2e599de..0f6e1d741d11 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2713,7 +2713,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		return -EINVAL;
 	}
 
-	if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
+	if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
+	    (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
 		DRM_DEBUG("Only OA report sampling supported\n");
 		return -EINVAL;
 	}
@@ -2745,7 +2746,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 
 	format_size = perf->oa_formats[props->oa_format].size;
 
-	stream->sample_flags |= SAMPLE_OA_REPORT;
+	stream->sample_flags = props->sample_flags;
 	stream->sample_size += format_size;
 
 	stream->oa_buffer.format_size = format_size;
-- 
cgit 


From 177e876af8d291d0351db3bae84b483e4722c4a7 Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Fri, 6 Dec 2019 11:43:39 -0800
Subject: drm/i915/perf: Configure OAR for specific context

Gen12 supports saving/restoring render counters per context. Apply OAR
configuration only for the context that is passed in to perf.

v2:
- Fix OACTXCONTROL value to only stop/resume counters.
- Remove gen12_update_reg_state_unlocked as power state is already
  applied by the caller.

v3: (Lionel)
- Move register initialization into the array
- Assume a valid oa_config in enable_metric_set

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Fixes: 00a7f0d7155c ("drm/i915/tgl: Add perf support on TGL")
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191206194339.31356-2-umesh.nerlige.ramappa@intel.com
(cherry picked from commit ccdeed497042676e13fc1625e2a341880eff5da5)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 199 ++++++++++++++++++++++-----------------
 1 file changed, 112 insertions(+), 87 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0f6e1d741d11..2ae14bc14931 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
 	u32 *reg_state = ce->lrc_reg_state;
 	int i;
 
-	if (IS_GEN(stream->perf->i915, 12)) {
-		u32 format = stream->oa_buffer.format;
+	reg_state[ctx_oactxctrl + 1] =
+		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+		GEN8_OA_COUNTER_RESUME;
 
-		reg_state[ctx_oactxctrl + 1] =
-			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
-			(stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
-	} else {
-		reg_state[ctx_oactxctrl + 1] =
-			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
-			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
-			GEN8_OA_COUNTER_RESUME;
-	}
-
-	for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
+	for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
 		reg_state[ctx_flexeu0 + i * 2 + 1] =
 			oa_config_flex_reg(stream->oa_config, flex_regs[i]);
 
@@ -2224,34 +2216,51 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
 	return err;
 }
 
-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
 {
-	struct i915_request *rq;
-	u32 *cs;
-	int err = 0;
-
-	rq = i915_request_create(ce);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs)) {
-		err = PTR_ERR(cs);
-		goto out;
-	}
-
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
-	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
-			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
-	*cs++ = MI_NOOP;
+	int err;
+	struct intel_context *ce = stream->pinned_ctx;
+	u32 format = stream->oa_buffer.format;
+	struct flex regs_context[] = {
+		{
+			GEN8_OACTXCONTROL,
+			stream->perf->ctx_oactxctrl_offset + 1,
+			enable ? GEN8_OA_COUNTER_RESUME : 0,
+		},
+	};
+	/* Offsets in regs_lri are not used since this configuration is only
+	 * applied using LRI. Initialize the correct offsets for posterity.
+	 */
+#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
+	struct flex regs_lri[] = {
+		{
+			GEN12_OAR_OACONTROL,
+			GEN12_OAR_OACONTROL_OFFSET + 1,
+			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+			(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
+		},
+		{
+			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
+			CTX_CONTEXT_CONTROL,
+			_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
+				      enable ?
+				      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
+				      0)
+		},
+	};
 
-	intel_ring_advance(rq, cs);
+	/* Modify the context image of pinned context with regs_context*/
+	err = intel_context_lock_pinned(ce);
+	if (err)
+		return err;
 
-out:
-	i915_request_add(rq);
+	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
+	intel_context_unlock_pinned(ce);
+	if (err)
+		return err;
 
-	return err;
+	/* Apply regs_lri using LRI with pinned context */
+	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
 }
 
 /*
@@ -2277,53 +2286,16 @@ out:
  *   per-context OA state.
  *
  * Note: it's only the RCS/Render context that has any OA state.
+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
  */
-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
-				      const struct i915_oa_config *oa_config)
+static int oa_configure_all_contexts(struct i915_perf_stream *stream,
+				     struct flex *regs,
+				     size_t num_regs)
 {
 	struct drm_i915_private *i915 = stream->perf->i915;
-	/* The MMIO offsets for Flex EU registers aren't contiguous */
-	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
-	struct flex regs[] = {
-		{
-			GEN8_R_PWR_CLK_STATE,
-			CTX_R_PWR_CLK_STATE,
-		},
-		{
-			IS_GEN(i915, 12) ?
-			GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
-			stream->perf->ctx_oactxctrl_offset + 1,
-		},
-		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
-		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
-		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
-		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
-		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
-		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
-		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
-	};
-#undef ctx_flexeuN
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx, *cn;
-	size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
-	int i, err;
-
-	if (IS_GEN(i915, 12)) {
-		u32 format = stream->oa_buffer.format;
-
-		regs[1].value =
-			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
-			(oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
-	} else {
-		regs[1].value =
-			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
-			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
-			GEN8_OA_COUNTER_RESUME;
-	}
-
-	for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
-		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+	int err;
 
 	lockdep_assert_held(&stream->perf->lock);
 
@@ -2353,7 +2325,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 
 		spin_unlock(&i915->gem.contexts.lock);
 
-		err = gen8_configure_context(ctx, regs, array_size);
+		err = gen8_configure_context(ctx, regs, num_regs);
 		if (err) {
 			i915_gem_context_put(ctx);
 			return err;
@@ -2378,7 +2350,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 
 		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
 
-		err = gen8_modify_self(ce, regs, array_size);
+		err = gen8_modify_self(ce, regs, num_regs);
 		if (err)
 			return err;
 	}
@@ -2386,6 +2358,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 	return 0;
 }
 
+static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
+					const struct i915_oa_config *oa_config)
+{
+	struct flex regs[] = {
+		{
+			GEN8_R_PWR_CLK_STATE,
+			CTX_R_PWR_CLK_STATE,
+		},
+	};
+
+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
+				      const struct i915_oa_config *oa_config)
+{
+	/* The MMIO offsets for Flex EU registers aren't contiguous */
+	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
+	struct flex regs[] = {
+		{
+			GEN8_R_PWR_CLK_STATE,
+			CTX_R_PWR_CLK_STATE,
+		},
+		{
+			GEN8_OACTXCONTROL,
+			stream->perf->ctx_oactxctrl_offset + 1,
+		},
+		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
+		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
+		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
+		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
+		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
+		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
+		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
+	};
+#undef ctx_flexeuN
+	int i;
+
+	regs[1].value =
+		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+		GEN8_OA_COUNTER_RESUME;
+
+	for (i = 2; i < ARRAY_SIZE(regs); i++)
+		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+
+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
 static int gen8_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
@@ -2464,7 +2486,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = lrc_configure_all_contexts(stream, oa_config);
+	ret = gen12_configure_all_contexts(stream, oa_config);
 	if (ret)
 		return ret;
 
@@ -2474,8 +2496,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 * requested this.
 	 */
 	if (stream->ctx) {
-		ret = gen12_emit_oar_config(stream->pinned_ctx,
-					    oa_config != NULL);
+		ret = gen12_configure_oar_context(stream, true);
 		if (ret)
 			return ret;
 	}
@@ -2509,11 +2530,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	lrc_configure_all_contexts(stream, NULL);
+	gen12_configure_all_contexts(stream, NULL);
 
 	/* disable the context save/restore or OAR counters */
 	if (stream->ctx)
-		gen12_emit_oar_config(stream->pinned_ctx, false);
+		gen12_configure_oar_context(stream, false);
 
 	/* Make sure we disable noa to save power. */
 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2855,7 +2876,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
 		return;
 
 	stream = engine->i915->perf.exclusive_stream;
-	if (stream)
+	/*
+	 * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
+	 * is already doing that, so nothing to be done for gen12 here.
+	 */
+	if (stream && INTEL_GEN(stream->perf->i915) < 12)
 		gen8_update_reg_state_unlocked(ce, stream);
 }
 
-- 
cgit 


From 0eb8e74f7202a4a98bbc0c1adeed3986cf50b66a Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Wed, 27 Nov 2019 22:12:09 +0200
Subject: drm/i915/fbc: Disable fbc by default on all glk+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We're missing a workaround in the fbc code for all glk+ platforms
which can cause corruption around the top of the screen. So
enabling fbc by default is a bad idea. I'm not keen to backport
the w/a so let's start by disabling fbc by default on all glk+.
We'll lift the restriction once the w/a is in place.

Cc: stable@vger.kernel.org
Cc: Daniel Drake <drake@endlessm.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Jian-Hong Pan <jian-hong@endlessm.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191127201222.16669-2-ville.syrjala@linux.intel.com
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
(cherry picked from commit cd8c021b36a66833cefe2c90a79a9e312a2a5690)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_fbc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 3111ecaeabd0..20616639b8ab 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1284,7 +1284,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
 		return 0;
 
 	/* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */
-	if (IS_GEMINILAKE(dev_priv))
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
 		return 0;
 
 	if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9)
-- 
cgit 


From 3ce8209735e120db6f0ef739480e3dc7e2cb6763 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Wed, 4 Dec 2019 20:05:41 +0200
Subject: drm/i915/hdcp: Nuke intel_hdcp_transcoder_config()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

intel_hdcp_transcoder_config() is clobbering some globally visible
state in .compute_config(). That is a big no no as .compute_config()
is supposed to have no visible side effects when either the commit
fails or it's just a TEST_ONLY commit.

Inline this stuff into intel_hdcp_enable() so that the state only
gets modified when we actually commit the state to the hardware.

Cc: Ramalingam C <ramalingam.c@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Uma Shankar <uma.shankar@intel.com>
Fixes: 39e2df090c3c ("drm/i915/hdcp: update current transcoder into intel_hdcp")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191204180549.1267-2-ville.syrjala@linux.intel.com
Reviewed-by: Ramalingam C <ramalingam.c@intel.com>
(cherry picked from commit 67e1d5ed85a83e232a9e0b995f5778a86722b96e)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_ddi.c  |  5 ++++-
 drivers/gpu/drm/i915/display/intel_dp.c   |  3 ---
 drivers/gpu/drm/i915/display/intel_hdcp.c | 26 ++++++++------------------
 drivers/gpu/drm/i915/display/intel_hdcp.h |  5 ++---
 drivers/gpu/drm/i915/display/intel_hdmi.c |  3 ---
 5 files changed, 14 insertions(+), 28 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c7c2b349858d..2a27fb5d7dc6 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3986,6 +3986,7 @@ static void intel_enable_ddi(struct intel_encoder *encoder,
 	if (conn_state->content_protection ==
 	    DRM_MODE_CONTENT_PROTECTION_DESIRED)
 		intel_hdcp_enable(to_intel_connector(conn_state->connector),
+				  crtc_state->cpu_transcoder,
 				  (u8)conn_state->hdcp_content_type);
 }
 
@@ -4089,7 +4090,9 @@ static void intel_ddi_update_pipe(struct intel_encoder *encoder,
 	if (conn_state->content_protection ==
 	    DRM_MODE_CONTENT_PROTECTION_DESIRED ||
 	    content_protection_type_changed)
-		intel_hdcp_enable(connector, (u8)conn_state->hdcp_content_type);
+		intel_hdcp_enable(connector,
+				  crtc_state->cpu_transcoder,
+				  (u8)conn_state->hdcp_content_type);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 050655a1a3d8..b05b2191b919 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2414,9 +2414,6 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 
 	intel_psr_compute_config(intel_dp, pipe_config);
 
-	intel_hdcp_transcoder_config(intel_connector,
-				     pipe_config->cpu_transcoder);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index f1f41ca8402b..a448815d8fc2 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -1821,23 +1821,6 @@ enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder)
 	}
 }
 
-void intel_hdcp_transcoder_config(struct intel_connector *connector,
-				  enum transcoder cpu_transcoder)
-{
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	struct intel_hdcp *hdcp = &connector->hdcp;
-
-	if (!hdcp->shim)
-		return;
-
-	if (INTEL_GEN(dev_priv) >= 12) {
-		mutex_lock(&hdcp->mutex);
-		hdcp->cpu_transcoder = cpu_transcoder;
-		hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder);
-		mutex_unlock(&hdcp->mutex);
-	}
-}
-
 static inline int initialize_hdcp_port_data(struct intel_connector *connector,
 					    const struct intel_hdcp_shim *shim)
 {
@@ -1959,8 +1942,10 @@ int intel_hdcp_init(struct intel_connector *connector,
 	return 0;
 }
 
-int intel_hdcp_enable(struct intel_connector *connector, u8 content_type)
+int intel_hdcp_enable(struct intel_connector *connector,
+		      enum transcoder cpu_transcoder, u8 content_type)
 {
+	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_hdcp *hdcp = &connector->hdcp;
 	unsigned long check_link_interval = DRM_HDCP_CHECK_PERIOD_MS;
 	int ret = -EINVAL;
@@ -1972,6 +1957,11 @@ int intel_hdcp_enable(struct intel_connector *connector, u8 content_type)
 	WARN_ON(hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED);
 	hdcp->content_type = content_type;
 
+	if (INTEL_GEN(dev_priv) >= 12) {
+		hdcp->cpu_transcoder = cpu_transcoder;
+		hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder);
+	}
+
 	/*
 	 * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup
 	 * is capable of HDCP2.2, it is preferred to use HDCP2.2.
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h
index 41c1053d9e38..f3c3272e712a 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.h
@@ -21,11 +21,10 @@ enum transcoder;
 void intel_hdcp_atomic_check(struct drm_connector *connector,
 			     struct drm_connector_state *old_state,
 			     struct drm_connector_state *new_state);
-void intel_hdcp_transcoder_config(struct intel_connector *connector,
-				  enum transcoder cpu_transcoder);
 int intel_hdcp_init(struct intel_connector *connector,
 		    const struct intel_hdcp_shim *hdcp_shim);
-int intel_hdcp_enable(struct intel_connector *connector, u8 content_type);
+int intel_hdcp_enable(struct intel_connector *connector,
+		      enum transcoder cpu_transcoder, u8 content_type);
 int intel_hdcp_disable(struct intel_connector *connector);
 bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port);
 bool intel_hdcp_capable(struct intel_connector *connector);
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index f6f5312205c4..f56fffc474fa 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -2489,9 +2489,6 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 		return -EINVAL;
 	}
 
-	intel_hdcp_transcoder_config(intel_hdmi->attached_connector,
-				     pipe_config->cpu_transcoder);
-
 	return 0;
 }
 
-- 
cgit 


From f26a9e959a7b1588c59f7a919b41b67175b211d8 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 9 Dec 2019 02:32:15 +0000
Subject: drm/i915/gt: Detect if we miss WaIdleLiteRestore

In order to avoid confusing the HW, we must never submit an empty ring
during lite-restore, that is we should always advance the RING_TAIL
before submitting to stay ahead of the RING_HEAD.

Normally this is prevented by keeping a couple of spare NOPs in the
request->wa_tail so that on resubmission we can advance the tail. This
relies on the request only being resubmitted once, which is the normal
condition as it is seen once for ELSP[1] and then later in ELSP[0]. On
preemption, the requests are unwound and the tail reset back to the
normal end point (as we know the request is incomplete and therefore its
RING_HEAD is even earlier).

However, if this w/a should fail we would try and resubmit the request
with the RING_TAIL already set to the location of this request's wa_tail
potentially causing a GPU hang. We can spot when we do try and
incorrectly resubmit without advancing the RING_TAIL and spare any
embarrassment by forcing the context restore.

In the case of preempt-to-busy, we leave the requests running on the HW
while we unwind. As the ring is still live, we cannot rewind our
rq->tail without forcing a reload so leave it set to rq->wa_tail and
only force a reload if we resubmit after a lite-restore. (Normally, the
forced reload will be a part of the preemption event.)

Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
Closes: https://gitlab.freedesktop.org/drm/intel/issues/673
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: stable@kernel.vger.org
Link: https://patchwork.freedesktop.org/patch/msgid/20191209023215.3519970-1-chris@chris-wilson.co.uk
(cherry picked from commit 82c69bf58650e644c61aa2bf5100b63a1070fd2f)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 46 +++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 17bb52aeff19..75dd0e0367b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -845,12 +845,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 	}
 }
 
-static void unwind_wa_tail(struct i915_request *rq)
-{
-	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
-	assert_ring_tail_valid(rq->ring, rq->tail);
-}
-
 static struct i915_request *
 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
@@ -863,12 +857,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->active.requests,
 					 sched.link) {
-
 		if (i915_request_completed(rq))
 			continue; /* XXX */
 
 		__i915_request_unsubmit(rq);
-		unwind_wa_tail(rq);
 
 		/*
 		 * Push the request back into the queue for later resubmission.
@@ -1161,13 +1153,29 @@ execlists_schedule_out(struct i915_request *rq)
 	i915_request_put(rq);
 }
 
-static u64 execlists_update_context(const struct i915_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
 {
 	struct intel_context *ce = rq->hw_context;
-	u64 desc;
+	u64 desc = ce->lrc_desc;
+	u32 tail;
 
-	ce->lrc_reg_state[CTX_RING_TAIL] =
-		intel_ring_set_tail(rq->ring, rq->tail);
+	/*
+	 * WaIdleLiteRestore:bdw,skl
+	 *
+	 * We should never submit the context with the same RING_TAIL twice
+	 * just in case we submit an empty ring, which confuses the HW.
+	 *
+	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+	 * the normal request to be able to always advance the RING_TAIL on
+	 * subsequent resubmissions (for lite restore). Should that fail us,
+	 * and we try and submit the same tail again, force the context
+	 * reload.
+	 */
+	tail = intel_ring_set_tail(rq->ring, rq->tail);
+	if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+		desc |= CTX_DESC_FORCE_RESTORE;
+	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+	rq->tail = rq->wa_tail;
 
 	/*
 	 * Make sure the context image is complete before we submit it to HW.
@@ -1186,13 +1194,11 @@ static u64 execlists_update_context(const struct i915_request *rq)
 	 */
 	mb();
 
-	desc = ce->lrc_desc;
-	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
-
 	/* Wa_1607138340:tgl */
 	if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
 		desc |= CTX_DESC_FORCE_RESTORE;
 
+	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
 	return desc;
 }
 
@@ -1703,16 +1709,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 				return;
 			}
-
-			/*
-			 * WaIdleLiteRestore:bdw,skl
-			 * Apply the wa NOOPs to prevent
-			 * ring:HEAD == rq:TAIL as we resubmit the
-			 * request. See gen8_emit_fini_breadcrumb() for
-			 * where we prepare the padding after the
-			 * end of the request.
-			 */
-			last->tail = last->wa_tail;
 		}
 	}
 
-- 
cgit 


From 750bde2fd4ffacc297473c36d6fdb29c395b06aa Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 21 Nov 2019 07:10:41 +0000
Subject: drm/i915: Serialise with remote retirement

Since retirement may be running in a worker on another CPU, it may be
skipped in the local intel_gt_wait_for_idle(). To ensure the state is
consistent for our sanity checks upon load, serialise with the remote
retirer by waiting on the timeline->mutex.

Outside of this use case, e.g. on suspend or module unload, we expect the
slack to be picked up by intel_gt_pm_wait_for_idle() and so prefer to
put the special case serialisation with retirement in its single user,
for now at least.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191121071044.97798-2-chris@chris-wilson.co.uk
(cherry picked from commit 2d0fb251360ab7eccbffd99f6933a2a4de678d52)
Fixes: 093b92287363 ("drm/i915: Split i915_active.mutex into an irq-safe spinlock for the rbtree")
Closes: https://gitlab.freedesktop.org/drm/intel/issues/754
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b9eb6b3149b7..d034fa413164 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -45,6 +45,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -1053,6 +1054,18 @@ out:
 	return err;
 }
 
+static int __intel_context_flush_retire(struct intel_context *ce)
+{
+	struct intel_timeline *tl;
+
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl))
+		return PTR_ERR(tl);
+
+	intel_context_timeline_unlock(tl);
+	return 0;
+}
+
 static int __intel_engines_record_defaults(struct intel_gt *gt)
 {
 	struct i915_request *requests[I915_NUM_ENGINES] = {};
@@ -1121,13 +1134,20 @@ err_rq:
 		if (!rq)
 			continue;
 
-		/* We want to be able to unbind the state from the GGTT */
-		GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
-
+		GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT,
+				     &rq->hw_context->flags));
 		state = rq->hw_context->state;
 		if (!state)
 			continue;
 
+		/* Serialise with retirement on another CPU */
+		err = __intel_context_flush_retire(rq->hw_context);
+		if (err)
+			goto out;
+
+		/* We want to be able to unbind the state from the GGTT */
+		GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
+
 		/*
 		 * As we will hold a reference to the logical state, it will
 		 * not be torn down with the context, and importantly the
-- 
cgit