drm/i915/gem: Prepare gen7 cmdparser for async execution

The gen7 cmdparser is primarily a promotion-based system to allow access to additional registers beyond the HW validation, and allows fallback to normal execution of the user batch buffer if valid and requires chaining. In the next patch, we will do the cmdparser validation in the pipeline asynchronously and so at the point of request construction we will not know if we want to execute the privileged and validated batch, or the original user batch. The solution employed here is to execute both batches, one with raised privileges and one as normal. This is because the gen7 MI_BATCH_BUFFER_START command cannot change privilege level within a batch and must strictly use the current privilege level (or undefined behaviour kills the GPU). So in order to execute the original batch, we need a second non-priviledged batch buffer chain from the ring, i.e. we need to emit two batches for each user batch. Inside the two batches we determine which one should actually execute, we provide a conditional trampoline to call the original batch. Implementation-wise, we create a single buffer and write the shadow and the trampoline inside it at different offsets; and bind the buffer into both the kernel GGTT for the privileged execution of the shadow and into the user ppGTT for the non-privileged execution of the trampoline and original batch. One buffer, two batches and two vma. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191211230858.599030-1-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2019-12-11 23:08:56 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2019-12-12 10:42:57 +0000
commit: 32d94048b988469f8bd62cdc6d934f9f58c2b7c5 (patch)
tree: cc294c97e5f55a64e0e8ba71de494adc2c5ab692 /drivers/gpu/drm/i915/i915_cmd_parser.c
parent: 7d929989bb7c476b3ef01dc5cd111622e42e78ea (diff)
1 files changed, 49 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index ddc8bf5175d9..34b0ea403a96 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1362,8 +1362,7 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
 	return 0;
 }
 
-static unsigned long *
-alloc_whitelist(struct drm_i915_private *i915, u32 batch_length)
+static unsigned long *alloc_whitelist(u32 batch_length)
 {
 	unsigned long *jmp;
 
@@ -1373,9 +1372,6 @@ alloc_whitelist(struct drm_i915_private *i915, u32 batch_length)
 	 * reasonably cheap due to kmalloc caches.
 	 */
 
-	if (CMDPARSER_USES_GGTT(i915))
-		return NULL;
-
 	/* Prefer to report transient allocation failure rather than hit oom */
 	jmp = bitmap_zalloc(DIV_ROUND_UP(batch_length, sizeof(u32)),
 			    GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
@@ -1394,6 +1390,7 @@ alloc_whitelist(struct drm_i915_private *i915, u32 batch_length)
  * @batch_offset: byte offset in the batch at which execution starts
  * @batch_length: length of the commands in batch_obj
  * @shadow: validated copy of the batch buffer in question
+ * @trampoline: whether to emit a conditional trampoline at the end of the batch
  *
  * Parses the specified batch buffer looking for privilege violations as
  * described in the overview.
@@ -1406,7 +1403,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 			    struct i915_vma *batch,
 			    u32 batch_offset,
 			    u32 batch_length,
-			    struct i915_vma *shadow)
+			    struct i915_vma *shadow,
+			    bool trampoline)
 {
 	u32 *cmd, *batch_end, offset = 0;
 	struct drm_i915_cmd_descriptor default_desc = noop_desc;
@@ -1430,8 +1428,10 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 		return PTR_ERR(cmd);
 	}
 
-	/* Defer failure until attempted use */
-	jump_whitelist = alloc_whitelist(engine->i915, batch_length);
+	jump_whitelist = NULL;
+	if (!trampoline)
+		/* Defer failure until attempted use */
+		jump_whitelist = alloc_whitelist(batch_length);
 
 	shadow_addr = gen8_canonical_addr(shadow->node.start);
 	batch_addr = gen8_canonical_addr(batch->node.start + batch_offset);
@@ -1493,6 +1493,47 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 		}
 	} while (1);
 
+	if (trampoline) {
+		/*
+		 * With the trampoline, the shadow is executed twice.
+		 *
+		 *   1 - starting at offset 0, in privileged mode
+		 *   2 - starting at offset batch_len, as non-privileged
+		 *
+		 * Only if the batch is valid and safe to execute, do we
+		 * allow the first privileged execution to proceed. If not,
+		 * we terminate the first batch and use the second batchbuffer
+		 * entry to chain to the original unsafe non-privileged batch,
+		 * leaving it to the HW to validate.
+		 */
+		*batch_end = MI_BATCH_BUFFER_END;
+
+		if (ret) {
+			/* Batch unsafe to execute with privileges, cancel! */
+			cmd = page_mask_bits(shadow->obj->mm.mapping);
+			*cmd = MI_BATCH_BUFFER_END;
+
+			/* If batch is unsafe but valid, jump to the original */
+			if (ret == -EACCES) {
+				unsigned int flags;
+
+				flags = MI_BATCH_NON_SECURE_I965;
+				if (IS_HASWELL(engine->i915))
+					flags = MI_BATCH_NON_SECURE_HSW;
+
+				GEM_BUG_ON(!IS_GEN_RANGE(engine->i915, 6, 7));
+				__gen6_emit_bb_start(batch_end,
+						     batch_addr,
+						     flags);
+
+				ret = 0; /* allow execution */
+			}
+		}
+
+		if (needs_clflush_after)
+			drm_clflush_virt_range(batch_end, 8);
+	}
+
 	if (needs_clflush_after) {
 		void *ptr = page_mask_bits(shadow->obj->mm.mapping);
author	Chris Wilson <chris@chris-wilson.co.uk>	2019-12-11 23:08:56 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2019-12-12 10:42:57 +0000
commit	32d94048b988469f8bd62cdc6d934f9f58c2b7c5 (patch)
tree	cc294c97e5f55a64e0e8ba71de494adc2c5ab692 /drivers/gpu/drm/i915/i915_cmd_parser.c
parent	7d929989bb7c476b3ef01dc5cd111622e42e78ea (diff)