summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_request.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r--drivers/gpu/drm/i915/i915_request.c148
1 files changed, 101 insertions, 47 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 76cf5ac91e94..4399941236cb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -31,26 +31,29 @@
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
+#include <drm/drm_print.h>
+
#include "gem/i915_gem_context.h"
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h"
#include "gt/intel_engine.h"
#include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_engine_regs.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_reset.h"
#include "gt/intel_ring.h"
#include "gt/intel_rps.h"
#include "i915_active.h"
+#include "i915_config.h"
#include "i915_deps.h"
+#include "i915_driver.h"
#include "i915_drv.h"
#include "i915_trace.h"
-#include "intel_pm.h"
struct execute_cb {
struct irq_work work;
struct i915_sw_fence *fence;
- struct i915_request *signal;
};
static struct kmem_cache *slab_requests;
@@ -58,7 +61,7 @@ static struct kmem_cache *slab_execute_cbs;
static const char *i915_fence_get_driver_name(struct dma_fence *fence)
{
- return dev_name(to_request(fence)->engine->i915->drm.dev);
+ return dev_name(to_request(fence)->i915->drm.dev);
}
static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
@@ -116,8 +119,10 @@ static void i915_fence_release(struct dma_fence *fence)
rq->guc_prio != GUC_PRIO_FINI);
i915_request_free_capture_list(fetch_and_zero(&rq->capture_list));
- if (i915_vma_snapshot_present(&rq->batch_snapshot))
- i915_vma_snapshot_put_onstack(&rq->batch_snapshot);
+ if (rq->batch_res) {
+ i915_vma_resource_put(rq->batch_res);
+ rq->batch_res = NULL;
+ }
/*
* The request is put onto a RCU freelist (i.e. the address
@@ -130,17 +135,39 @@ static void i915_fence_release(struct dma_fence *fence)
i915_sw_fence_fini(&rq->semaphore);
/*
- * Keep one request on each engine for reserved use under mempressure,
- * do not use with virtual engines as this really is only needed for
- * kernel contexts.
+ * Keep one request on each engine for reserved use under mempressure.
+ *
+ * We do not hold a reference to the engine here and so have to be
+ * very careful in what rq->engine we poke. The virtual engine is
+ * referenced via the rq->context and we released that ref during
+ * i915_request_retire(), ergo we must not dereference a virtual
+ * engine here. Not that we would want to, as the only consumer of
+ * the reserved engine->request_pool is the power management parking,
+ * which must-not-fail, and that is only run on the physical engines.
+ *
+ * Since the request must have been executed to be have completed,
+ * we know that it will have been processed by the HW and will
+ * not be unsubmitted again, so rq->engine and rq->execution_mask
+ * at this point is stable. rq->execution_mask will be a single
+ * bit if the last and _only_ engine it could execution on was a
+ * physical engine, if it's multiple bits then it started on and
+ * could still be on a virtual engine. Thus if the mask is not a
+ * power-of-two we assume that rq->engine may still be a virtual
+ * engine and so a dangling invalid pointer that we cannot dereference
+ *
+ * For example, consider the flow of a bonded request through a virtual
+ * engine. The request is created with a wide engine mask (all engines
+ * that we might execute on). On processing the bond, the request mask
+ * is reduced to one or more engines. If the request is subsequently
+ * bound to a single engine, it will then be constrained to only
+ * execute on that engine and never returned to the virtual engine
+ * after timeslicing away, see __unwind_incomplete_requests(). Thus we
+ * know that if the rq->execution_mask is a single bit, rq->engine
+ * can be a physical engine with the exact corresponding mask.
*/
- if (!intel_engine_is_virtual(rq->engine) &&
- !cmpxchg(&rq->engine->request_pool, NULL, rq)) {
- intel_context_put(rq->context);
+ if (is_power_of_2(rq->execution_mask) &&
+ !cmpxchg(&rq->engine->request_pool, NULL, rq))
return;
- }
-
- intel_context_put(rq->context);
kmem_cache_free(slab_requests, rq);
}
@@ -248,11 +275,6 @@ i915_request_active_engine(struct i915_request *rq,
return ret;
}
-static void __rq_init_watchdog(struct i915_request *rq)
-{
- rq->watchdog.timer.function = NULL;
-}
-
static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
{
struct i915_request *rq =
@@ -261,7 +283,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
if (!i915_request_completed(rq)) {
if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
- schedule_work(&gt->watchdog.work);
+ queue_work(gt->i915->unordered_wq, &gt->watchdog.work);
} else {
i915_request_put(rq);
}
@@ -269,6 +291,13 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
return HRTIMER_NORESTART;
}
+static void __rq_init_watchdog(struct i915_request *rq)
+{
+ struct i915_request_watchdog *wdg = &rq->watchdog;
+
+ hrtimer_setup(&wdg->timer, __rq_watchdog_expired, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+}
+
static void __rq_arm_watchdog(struct i915_request *rq)
{
struct i915_request_watchdog *wdg = &rq->watchdog;
@@ -279,8 +308,6 @@ static void __rq_arm_watchdog(struct i915_request *rq)
i915_request_get(rq);
- hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- wdg->timer.function = __rq_watchdog_expired;
hrtimer_start_range_ns(&wdg->timer,
ns_to_ktime(ce->watchdog.timeout_us *
NSEC_PER_USEC),
@@ -292,7 +319,7 @@ static void __rq_cancel_watchdog(struct i915_request *rq)
{
struct i915_request_watchdog *wdg = &rq->watchdog;
- if (wdg->timer.function && hrtimer_try_to_cancel(&wdg->timer) > 0)
+ if (hrtimer_try_to_cancel(&wdg->timer) > 0)
i915_request_put(rq);
}
@@ -308,7 +335,7 @@ void i915_request_free_capture_list(struct i915_capture_list *capture)
while (capture) {
struct i915_capture_list *next = capture->next;
- i915_vma_snapshot_put(capture->vma_snapshot);
+ i915_vma_resource_put(capture->vma_res);
kfree(capture);
capture = next;
}
@@ -447,7 +474,7 @@ static bool __request_in_flight(const struct i915_request *signal)
* to avoid tearing.]
*
* Note that the read of *execlists->active may race with the promotion
- * of execlists->pending[] to execlists->inflight[], overwritting
+ * of execlists->pending[] to execlists->inflight[], overwriting
* the value at *execlists->active. This is fine. The promotion implies
* that we received an ACK from the HW, and so the context is not
* stuck -- if we do not see ourselves in *active, the inflight status
@@ -607,7 +634,7 @@ bool __i915_request_submit(struct i915_request *request)
goto active;
}
- if (unlikely(intel_context_is_banned(request->context)))
+ if (unlikely(!intel_context_is_schedulable(request->context)))
i915_request_set_error_once(request, -EIO);
if (unlikely(fatal_error(request->fence.error)))
@@ -854,7 +881,7 @@ static void __i915_request_ctor(void *arg)
i915_sw_fence_init(&rq->semaphore, semaphore_notify);
clear_capture_list(rq);
- rq->batch_snapshot.present = false;
+ rq->batch_res = NULL;
init_llist_head(&rq->execute_cb);
}
@@ -917,22 +944,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
}
}
- /*
- * Hold a reference to the intel_context over life of an i915_request.
- * Without this an i915_request can exist after the context has been
- * destroyed (e.g. request retired, context closed, but user space holds
- * a reference to the request from an out fence). In the case of GuC
- * submission + virtual engine, the engine that the request references
- * is also destroyed which can trigger bad pointer dref in fence ops
- * (e.g. i915_fence_get_driver_name). We could likely change these
- * functions to avoid touching the engine but let's just be safe and
- * hold the intel_context reference. In execlist mode the request always
- * eventually points to a physical engine so this isn't an issue.
- */
- rq->context = intel_context_get(ce);
+ rq->context = ce;
rq->engine = ce->engine;
rq->ring = ce->ring;
rq->execution_mask = ce->engine->mask;
+ rq->i915 = ce->engine->i915;
ret = intel_timeline_get_seqno(tl, rq, &seqno);
if (ret)
@@ -960,7 +976,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
__rq_init_watchdog(rq);
assert_capture_list_is_null(rq);
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
- GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot));
+ GEM_BUG_ON(rq->batch_res);
/*
* Reserve space in the ring buffer for all the commands required to
@@ -1004,7 +1020,6 @@ err_unwind:
GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
err_free:
- intel_context_put(ce);
kmem_cache_free(slab_requests, rq);
err_unreserve:
intel_context_unpin(ce);
@@ -1203,7 +1218,7 @@ emit_semaphore_wait(struct i915_request *to,
/*
* If this or its dependents are waiting on an external fence
* that may fail catastrophically, then we want to avoid using
- * sempahores as they bypass the fence signaling metadata, and we
+ * semaphores as they bypass the fence signaling metadata, and we
* lose the fence->error propagation.
*/
if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
@@ -1336,7 +1351,7 @@ __i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
{
mark_external(rq);
return i915_sw_fence_await_dma_fence(&rq->submit, fence,
- i915_fence_context_timeout(rq->engine->i915,
+ i915_fence_context_timeout(rq->i915,
fence->context),
I915_FENCE_GFP);
}
@@ -1594,7 +1609,8 @@ i915_request_await_object(struct i915_request *to,
struct dma_fence *fence;
int ret = 0;
- dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) {
+ dma_resv_for_each_fence(&cursor, obj->base.resv,
+ dma_resv_usage_rw(write), fence) {
ret = i915_request_await_dma_fence(to, fence);
if (ret)
break;
@@ -1603,6 +1619,20 @@ i915_request_await_object(struct i915_request *to,
return ret;
}
+static void i915_request_await_huc(struct i915_request *rq)
+{
+ struct intel_huc *huc = &rq->context->engine->gt->uc.huc;
+
+ /* don't stall kernel submissions! */
+ if (!rcu_access_pointer(rq->context->gem_context))
+ return;
+
+ if (intel_huc_wait_required(huc))
+ i915_sw_fence_await_sw_fence(&rq->submit,
+ &huc->delayed_load.fence,
+ &rq->hucq);
+}
+
static struct i915_request *
__i915_request_ensure_parallel_ordering(struct i915_request *rq,
struct intel_timeline *timeline)
@@ -1629,6 +1659,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq,
request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
+ /*
+ * Users have to put a reference potentially got by
+ * __i915_active_fence_set() to the returned request
+ * when no longer needed
+ */
return to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
}
@@ -1675,6 +1710,10 @@ __i915_request_ensure_ordering(struct i915_request *rq,
0);
}
+ /*
+ * Users have to put the reference to prev potentially got
+ * by __i915_active_fence_set() when no longer needed
+ */
return prev;
}
@@ -1685,6 +1724,16 @@ __i915_request_add_to_timeline(struct i915_request *rq)
struct i915_request *prev;
/*
+ * Media workloads may require HuC, so stall them until HuC loading is
+ * complete. Note that HuC not being loaded when a user submission
+ * arrives can only happen when HuC is loaded via GSC and in that case
+ * we still expect the window between us starting to accept submissions
+ * and HuC loading completion to be small (a few hundred ms).
+ */
+ if (rq->engine->class == VIDEO_DECODE_CLASS)
+ i915_request_await_huc(rq);
+
+ /*
* Dependency tracking and request ordering along the timeline
* is special cased so that we can eliminate redundant ordering
* operations while building the request (we know that the timeline
@@ -1718,6 +1767,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
prev = __i915_request_ensure_ordering(rq, timeline);
else
prev = __i915_request_ensure_parallel_ordering(rq, timeline);
+ if (prev)
+ i915_request_put(prev);
/*
* Make sure that no request gazumped us - if it was allocated after
@@ -2135,7 +2186,7 @@ void i915_request_show(struct drm_printer *m,
const char *prefix,
int indent)
{
- const char *name = rq->fence.ops->get_timeline_name((struct dma_fence *)&rq->fence);
+ const char __rcu *timeline;
char buf[80] = "";
int x = 0;
@@ -2171,6 +2222,8 @@ void i915_request_show(struct drm_printer *m,
x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
+ rcu_read_lock();
+ timeline = dma_fence_timeline_name((struct dma_fence *)&rq->fence);
drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n",
prefix, indent, " ",
queue_status(rq),
@@ -2179,7 +2232,8 @@ void i915_request_show(struct drm_printer *m,
fence_status(rq),
buf,
jiffies_to_msecs(jiffies - rq->emitted_jiffies),
- name);
+ rcu_dereference(timeline));
+ rcu_read_unlock();
}
static bool engine_match_ring(struct intel_engine_cs *engine, struct i915_request *rq)