drm/i915: Pass intel_context to i915_request_create()

Start acquiring the logical intel_context and using that as our primary means for request allocation. This is the initial step to allow us to avoid requiring struct_mutex for request allocation along the perma-pinned kernel context, but it also provides a foundation for breaking up the complex request allocation to handle different scenarios inside execbuf. For the purpose of emitting a request from inside retirement (see the next patch for engine power management), we also need to lift control over the timeline mutex to the caller. v2: Note that the request carries the active reference upon construction. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-4-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2019-04-24 21:07:16 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2019-04-24 22:25:35 +0100
commit: 2ccdf6a1c3f7ff51d721ee7a5bed96e03da77205 (patch)
tree: 255fa933a62ed7f1e7b5fd06bb9ab03fb1ec14fa /drivers/gpu/drm/i915/i915_request.c
parent: 6eee33e87f6d1f6263162ce0874c1ef503eff041 (diff)
1 files changed, 142 insertions, 105 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 9a2665ee012a..705c125bafc6 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -581,7 +581,7 @@ static void ring_retire_requests(struct intel_ring *ring)
 }
 
 static noinline struct i915_request *
-i915_request_alloc_slow(struct intel_context *ce)
+request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 {
 	struct intel_ring *ring = ce->ring;
 	struct i915_request *rq;
@@ -589,6 +589,9 @@ i915_request_alloc_slow(struct intel_context *ce)
 	if (list_empty(&ring->request_list))
 		goto out;
 
+	if (!gfpflags_allow_blocking(gfp))
+		goto out;
+
 	/* Ratelimit ourselves to prevent oom from malicious clients */
 	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
 	cond_synchronize_rcu(rq->rcustate);
@@ -597,62 +600,21 @@ i915_request_alloc_slow(struct intel_context *ce)
 	ring_retire_requests(ring);
 
 out:
-	return kmem_cache_alloc(global.slab_requests, GFP_KERNEL);
+	return kmem_cache_alloc(global.slab_requests, gfp);
 }
 
-/**
- * i915_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
 struct i915_request *
-i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
+__i915_request_create(struct intel_context *ce, gfp_t gfp)
 {
-	struct drm_i915_private *i915 = engine->i915;
-	struct intel_context *ce;
-	struct i915_timeline *tl;
+	struct i915_timeline *tl = ce->ring->timeline;
 	struct i915_request *rq;
 	u32 seqno;
 	int ret;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	/*
-	 * Preempt contexts are reserved for exclusive use to inject a
-	 * preemption context switch. They are never to be used for any trivial
-	 * request!
-	 */
-	GEM_BUG_ON(ctx == i915->preempt_context);
-
-	/*
-	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-	 * EIO if the GPU is already wedged.
-	 */
-	ret = i915_terminally_wedged(i915);
-	if (ret)
-		return ERR_PTR(ret);
+	might_sleep_if(gfpflags_allow_blocking(gfp));
 
-	/*
-	 * Pinning the contexts may generate requests in order to acquire
-	 * GGTT space, so do this first before we reserve a seqno for
-	 * ourselves.
-	 */
-	ce = intel_context_pin(ctx, engine);
-	if (IS_ERR(ce))
-		return ERR_CAST(ce);
-
-	mutex_lock(&ce->ring->timeline->mutex);
-	intel_context_enter(ce);
-
-	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
-	    i915_request_completed(rq))
-		i915_request_retire(rq);
+	/* Check that the caller provided an already pinned context */
+	__intel_context_pin(ce);
 
 	/*
 	 * Beware: Dragons be flying overhead.
@@ -684,30 +646,26 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * Do not use kmem_cache_zalloc() here!
 	 */
 	rq = kmem_cache_alloc(global.slab_requests,
-			      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
-		rq = i915_request_alloc_slow(ce);
+		rq = request_alloc_slow(ce, gfp);
 		if (!rq) {
 			ret = -ENOMEM;
 			goto err_unreserve;
 		}
 	}
 
-	INIT_LIST_HEAD(&rq->active_list);
-	INIT_LIST_HEAD(&rq->execute_cb);
-
-	tl = ce->ring->timeline;
 	ret = i915_timeline_get_seqno(tl, rq, &seqno);
 	if (ret)
 		goto err_free;
 
-	rq->i915 = i915;
-	rq->engine = engine;
-	rq->gem_context = ctx;
+	rq->i915 = ce->engine->i915;
 	rq->hw_context = ce;
+	rq->gem_context = ce->gem_context;
+	rq->engine = ce->engine;
 	rq->ring = ce->ring;
 	rq->timeline = tl;
-	GEM_BUG_ON(rq->timeline == &engine->timeline);
+	GEM_BUG_ON(rq->timeline == &ce->engine->timeline);
 	rq->hwsp_seqno = tl->hwsp_seqno;
 	rq->hwsp_cacheline = tl->hwsp_cacheline;
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
@@ -728,6 +686,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->capture_list = NULL;
 	rq->waitboost = false;
 
+	INIT_LIST_HEAD(&rq->active_list);
+	INIT_LIST_HEAD(&rq->execute_cb);
+
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
 	 * eventually emit this request. This is to guarantee that the
@@ -740,7 +701,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * around inside i915_request_add() there is sufficient space at
 	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
+	rq->reserved_space =
+		2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -750,20 +712,16 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
-	ret = engine->request_alloc(rq);
+	ret = rq->engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
 
-	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(ce);
-
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
-	/* Check that we didn't interrupt ourselves with a new request */
-	lockdep_assert_held(&rq->timeline->mutex);
-	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
-	rq->cookie = lockdep_pin_lock(&rq->timeline->mutex);
+	/* Keep a second pin for the dual retirement along engine and ring */
+	__intel_context_pin(ce);
 
+	intel_context_mark_active(ce);
 	return rq;
 
 err_unwind:
@@ -777,12 +735,86 @@ err_unwind:
 err_free:
 	kmem_cache_free(global.slab_requests, rq);
 err_unreserve:
-	intel_context_exit(ce);
-	mutex_unlock(&ce->ring->timeline->mutex);
 	intel_context_unpin(ce);
 	return ERR_PTR(ret);
 }
 
+struct i915_request *
+i915_request_create(struct intel_context *ce)
+{
+	struct i915_request *rq;
+
+	intel_context_timeline_lock(ce);
+
+	/* Move our oldest request to the slab-cache (if not in use!) */
+	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
+	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
+	    i915_request_completed(rq))
+		i915_request_retire(rq);
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_KERNEL);
+	intel_context_exit(ce); /* active reference transferred to request */
+	if (IS_ERR(rq))
+		goto err_unlock;
+
+	/* Check that we do not interrupt ourselves with a new request */
+	rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex);
+
+	return rq;
+
+err_unlock:
+	intel_context_timeline_unlock(ce);
+	return rq;
+}
+
+/**
+ * i915_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct i915_request *
+i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_context *ce;
+	struct i915_request *rq;
+	int ret;
+
+	/*
+	 * Preempt contexts are reserved for exclusive use to inject a
+	 * preemption context switch. They are never to be used for any trivial
+	 * request!
+	 */
+	GEM_BUG_ON(ctx == i915->preempt_context);
+
+	/*
+	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged.
+	 */
+	ret = i915_terminally_wedged(i915);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = intel_context_pin(ctx, engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = i915_request_create(ce);
+	intel_context_unpin(ce);
+
+	return rq;
+}
+
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
@@ -1043,8 +1075,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = i915_active_request_raw(&timeline->last_request,
-				       &rq->i915->drm.struct_mutex);
+	prev = rcu_dereference_protected(timeline->last_request.request, 1);
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
@@ -1065,6 +1096,11 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	list_add_tail(&rq->link, &timeline->requests);
 	spin_unlock_irq(&timeline->lock);
 
+	/*
+	 * Make sure that no request gazumped us - if it was allocated after
+	 * our i915_request_alloc() and called __i915_request_add() before
+	 * us, the timeline will hold its seqno which is later than ours.
+	 */
 	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
 	__i915_active_request_set(&timeline->last_request, rq);
 
@@ -1076,36 +1112,23 @@ __i915_request_add_to_timeline(struct i915_request *rq)
  * request is not being tracked for completion but the work itself is
  * going to happen on the hardware. This would be a Bad Thing(tm).
  */
-void i915_request_add(struct i915_request *request)
+struct i915_request *__i915_request_commit(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct i915_timeline *timeline = request->timeline;
-	struct intel_ring *ring = request->ring;
+	struct intel_engine_cs *engine = rq->engine;
+	struct intel_ring *ring = rq->ring;
 	struct i915_request *prev;
 	u32 *cs;
 
 	GEM_TRACE("%s fence %llx:%lld\n",
-		  engine->name, request->fence.context, request->fence.seqno);
-
-	lockdep_assert_held(&request->timeline->mutex);
-	lockdep_unpin_lock(&request->timeline->mutex, request->cookie);
-
-	trace_i915_request_add(request);
-
-	/*
-	 * Make sure that no request gazumped us - if it was allocated after
-	 * our i915_request_alloc() and called __i915_request_add() before
-	 * us, the timeline will hold its seqno which is later than ours.
-	 */
-	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
+		  engine->name, rq->fence.context, rq->fence.seqno);
 
 	/*
 	 * To ensure that this call will not fail, space for its emissions
 	 * should already have been reserved in the ring buffer. Let the ring
 	 * know that it is time to use that space up.
 	 */
-	GEM_BUG_ON(request->reserved_space > request->ring->space);
-	request->reserved_space = 0;
+	GEM_BUG_ON(rq->reserved_space > ring->space);
+	rq->reserved_space = 0;
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
@@ -1113,17 +1136,17 @@ void i915_request_add(struct i915_request *request)
 	 * GPU processing the request, we never over-estimate the
 	 * position of the ring's HEAD.
 	 */
-	cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
+	cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
 	GEM_BUG_ON(IS_ERR(cs));
-	request->postfix = intel_ring_offset(request, cs);
+	rq->postfix = intel_ring_offset(rq, cs);
 
-	prev = __i915_request_add_to_timeline(request);
+	prev = __i915_request_add_to_timeline(rq);
 
-	list_add_tail(&request->ring_link, &ring->request_list);
-	if (list_is_first(&request->ring_link, &ring->request_list))
-		list_add(&ring->active_link, &request->i915->gt.active_rings);
-	request->i915->gt.active_engines |= request->engine->mask;
-	request->emitted_jiffies = jiffies;
+	list_add_tail(&rq->ring_link, &ring->request_list);
+	if (list_is_first(&rq->ring_link, &ring->request_list))
+		list_add(&ring->active_link, &rq->i915->gt.active_rings);
+	rq->i915->gt.active_engines |= rq->engine->mask;
+	rq->emitted_jiffies = jiffies;
 
 	/*
 	 * Let the backend know a new request has arrived that may need
@@ -1137,10 +1160,10 @@ void i915_request_add(struct i915_request *request)
 	 * run at the earliest possible convenience.
 	 */
 	local_bh_disable();
-	i915_sw_fence_commit(&request->semaphore);
+	i915_sw_fence_commit(&rq->semaphore);
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule) {
-		struct i915_sched_attr attr = request->gem_context->sched;
+		struct i915_sched_attr attr = rq->gem_context->sched;
 
 		/*
 		 * Boost actual workloads past semaphores!
@@ -1154,7 +1177,7 @@ void i915_request_add(struct i915_request *request)
 		 * far in the distance past over useful work, we keep a history
 		 * of any semaphore use along our dependency chain.
 		 */
-		if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
+		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
 			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
 
 		/*
@@ -1163,15 +1186,29 @@ void i915_request_add(struct i915_request *request)
 		 * Allow interactive/synchronous clients to jump ahead of
 		 * the bulk clients. (FQ_CODEL)
 		 */
-		if (list_empty(&request->sched.signalers_list))
+		if (list_empty(&rq->sched.signalers_list))
 			attr.priority |= I915_PRIORITY_NEWCLIENT;
 
-		engine->schedule(request, &attr);
+		engine->schedule(rq, &attr);
 	}
 	rcu_read_unlock();
-	i915_sw_fence_commit(&request->submit);
+	i915_sw_fence_commit(&rq->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 
+	return prev;
+}
+
+void i915_request_add(struct i915_request *rq)
+{
+	struct i915_request *prev;
+
+	lockdep_assert_held(&rq->timeline->mutex);
+	lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+
+	trace_i915_request_add(rq);
+
+	prev = __i915_request_commit(rq);
+
 	/*
 	 * In typical scenarios, we do not expect the previous request on
 	 * the timeline to be still tracked by timeline->last_request if it
@@ -1192,7 +1229,7 @@ void i915_request_add(struct i915_request *request)
 	if (prev && i915_request_completed(prev))
 		i915_request_retire_upto(prev);
 
-	mutex_unlock(&request->timeline->mutex);
+	mutex_unlock(&rq->timeline->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
author	Chris Wilson <chris@chris-wilson.co.uk>	2019-04-24 21:07:16 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2019-04-24 22:25:35 +0100
commit	2ccdf6a1c3f7ff51d721ee7a5bed96e03da77205 (patch)
tree	255fa933a62ed7f1e7b5fd06bb9ab03fb1ec14fa /drivers/gpu/drm/i915/i915_request.c
parent	6eee33e87f6d1f6263162ce0874c1ef503eff041 (diff)