drm/i915: Unify active context tracking between legacy/execlists/guc

The requests conversion introduced a nasty bug where we could generate a new request in the middle of constructing a request if we needed to idle the system in order to evict space for a context. The request to idle would be executed (and waited upon) before the current one, creating a minor havoc in the seqno accounting, as we will consider the current request to already be completed (prior to deferred seqno assignment) but ring->last_retired_head would have been updated and still could allow us to overwrite the current request before execution. We also employed two different mechanisms to track the active context until it was switched out. The legacy method allowed for waiting upon an active context (it could forcibly evict any vma, including context's), but the execlists method took a step backwards by pinning the vma for the entire active lifespan of the context (the only way to evict was to idle the entire GPU, not individual contexts). However, to circumvent the tricky issue of locking (i.e. we cannot take struct_mutex at the time of i915_gem_request_submit(), where we would want to move the previous context onto the active tracker and unpin it), we take the execlists approach and keep the contexts pinned until retirement. The benefit of the execlists approach, more important for execlists than legacy, was the reduction in work in pinning the context for each request - as the context was kept pinned until idle, it could short circuit the pinning for all active contexts. We introduce new engine vfuncs to pin and unpin the context respectively. The context is pinned at the start of the request, and only unpinned when the following request is retired (this ensures that the context is idle and coherent in main memory before we unpin it). We move the engine->last_context tracking into the retirement itself (rather than during request submission) in order to allow the submission to be reordered or unwound without undue difficultly. And finally an ulterior motive for unifying context handling was to prepare for mock requests. v2: Rename to last_retired_context, split out legacy_context tracking for MI_SET_CONTEXT. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161218153724.8439-3-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2016-12-18 15:37:20 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2016-12-18 16:18:50 +0000
commit: e8a9c58fcd9a5081f71f57f370af1347ed6a310b (patch)
tree: 61b8fe4eb3219a120f8e941ac4b0aa4a8f10fa69 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent: ef11c01db405b49d96a0ab2689807ea33ae85dd6 (diff)
1 files changed, 36 insertions, 22 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0b7d13b6e228..00ff572541b6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1939,8 +1939,26 @@ intel_ring_free(struct intel_ring *ring)
 	kfree(ring);
 }
 
-static int intel_ring_context_pin(struct i915_gem_context *ctx,
-				  struct intel_engine_cs *engine)
+static int context_pin(struct i915_gem_context *ctx, unsigned int flags)
+{
+	struct i915_vma *vma = ctx->engine[RCS].state;
+	int ret;
+
+	/* Clear this page out of any CPU caches for coherent swap-in/out.
+	 * We only want to do this on the first bind so that we do not stall
+	 * on an active context (which by nature is already on the GPU).
+	 */
+	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
+		ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
+		if (ret)
+			return ret;
+	}
+
+	return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags);
+}
+
+static int intel_ring_context_pin(struct intel_engine_cs *engine,
+				  struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 	int ret;
@@ -1951,13 +1969,15 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
 		return 0;
 
 	if (ce->state) {
-		struct i915_vma *vma;
+		unsigned int flags;
+
+		flags = 0;
+		if (ctx == ctx->i915->kernel_context)
+			flags = PIN_HIGH;
 
-		vma = i915_gem_context_pin_legacy(ctx, PIN_HIGH);
-		if (IS_ERR(vma)) {
-			ret = PTR_ERR(vma);
+		ret = context_pin(ctx, flags);
+		if (ret)
 			goto error;
-		}
 	}
 
 	/* The kernel context is only used as a placeholder for flushing the
@@ -1978,12 +1998,13 @@ error:
 	return ret;
 }
 
-static void intel_ring_context_unpin(struct i915_gem_context *ctx,
-				     struct intel_engine_cs *engine)
+static void intel_ring_context_unpin(struct intel_engine_cs *engine,
+				     struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+	GEM_BUG_ON(ce->pin_count == 0);
 
 	if (--ce->pin_count)
 		return;
@@ -2008,17 +2029,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	if (ret)
 		goto error;
 
-	/* We may need to do things with the shrinker which
-	 * require us to immediately switch back to the default
-	 * context. This can cause a problem as pinning the
-	 * default context also requires GTT space which may not
-	 * be available. To avoid this we always pin the default
-	 * context.
-	 */
-	ret = intel_ring_context_pin(dev_priv->kernel_context, engine);
-	if (ret)
-		goto error;
-
 	ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
 	if (IS_ERR(ring)) {
 		ret = PTR_ERR(ring);
@@ -2077,8 +2087,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
 
 	intel_engine_cleanup_common(engine);
 
-	intel_ring_context_unpin(dev_priv->kernel_context, engine);
-
 	engine->i915 = NULL;
 	dev_priv->engine[engine->id] = NULL;
 	kfree(engine);
@@ -2099,12 +2107,15 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
 	int ret;
 
+	GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count);
+
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
 	 * have to repeat work.
 	 */
 	request->reserved_space += LEGACY_REQUEST_SIZE;
 
+	GEM_BUG_ON(!request->engine->buffer);
 	request->ring = request->engine->buffer;
 
 	ret = intel_ring_begin(request, 0);
@@ -2584,6 +2595,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->init_hw = init_ring_common;
 	engine->reset_hw = reset_ring_common;
 
+	engine->context_pin = intel_ring_context_pin;
+	engine->context_unpin = intel_ring_context_unpin;
+
 	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
 	if (i915.semaphores) {
author	Chris Wilson <chris@chris-wilson.co.uk>	2016-12-18 15:37:20 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2016-12-18 16:18:50 +0000
commit	e8a9c58fcd9a5081f71f57f370af1347ed6a310b (patch)
tree	61b8fe4eb3219a120f8e941ac4b0aa4a8f10fa69 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent	ef11c01db405b49d96a0ab2689807ea33ae85dd6 (diff)