summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_request.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r--drivers/gpu/drm/i915/i915_request.c78
1 files changed, 57 insertions, 21 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 62fad16a55e8..4399941236cb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -31,6 +31,8 @@
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
+#include <drm/drm_print.h>
+
#include "gem/i915_gem_context.h"
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h"
@@ -43,16 +45,15 @@
#include "gt/intel_rps.h"
#include "i915_active.h"
+#include "i915_config.h"
#include "i915_deps.h"
#include "i915_driver.h"
#include "i915_drv.h"
#include "i915_trace.h"
-#include "intel_pm.h"
struct execute_cb {
struct irq_work work;
struct i915_sw_fence *fence;
- struct i915_request *signal;
};
static struct kmem_cache *slab_requests;
@@ -134,9 +135,7 @@ static void i915_fence_release(struct dma_fence *fence)
i915_sw_fence_fini(&rq->semaphore);
/*
- * Keep one request on each engine for reserved use under mempressure
- * do not use with virtual engines as this really is only needed for
- * kernel contexts.
+ * Keep one request on each engine for reserved use under mempressure.
*
* We do not hold a reference to the engine here and so have to be
* very careful in what rq->engine we poke. The virtual engine is
@@ -166,8 +165,7 @@ static void i915_fence_release(struct dma_fence *fence)
* know that if the rq->execution_mask is a single bit, rq->engine
* can be a physical engine with the exact corresponding mask.
*/
- if (!intel_engine_is_virtual(rq->engine) &&
- is_power_of_2(rq->execution_mask) &&
+ if (is_power_of_2(rq->execution_mask) &&
!cmpxchg(&rq->engine->request_pool, NULL, rq))
return;
@@ -277,11 +275,6 @@ i915_request_active_engine(struct i915_request *rq,
return ret;
}
-static void __rq_init_watchdog(struct i915_request *rq)
-{
- rq->watchdog.timer.function = NULL;
-}
-
static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
{
struct i915_request *rq =
@@ -290,7 +283,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
if (!i915_request_completed(rq)) {
if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
- schedule_work(&gt->watchdog.work);
+ queue_work(gt->i915->unordered_wq, &gt->watchdog.work);
} else {
i915_request_put(rq);
}
@@ -298,6 +291,13 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
return HRTIMER_NORESTART;
}
+static void __rq_init_watchdog(struct i915_request *rq)
+{
+ struct i915_request_watchdog *wdg = &rq->watchdog;
+
+ hrtimer_setup(&wdg->timer, __rq_watchdog_expired, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+}
+
static void __rq_arm_watchdog(struct i915_request *rq)
{
struct i915_request_watchdog *wdg = &rq->watchdog;
@@ -308,8 +308,6 @@ static void __rq_arm_watchdog(struct i915_request *rq)
i915_request_get(rq);
- hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- wdg->timer.function = __rq_watchdog_expired;
hrtimer_start_range_ns(&wdg->timer,
ns_to_ktime(ce->watchdog.timeout_us *
NSEC_PER_USEC),
@@ -321,7 +319,7 @@ static void __rq_cancel_watchdog(struct i915_request *rq)
{
struct i915_request_watchdog *wdg = &rq->watchdog;
- if (wdg->timer.function && hrtimer_try_to_cancel(&wdg->timer) > 0)
+ if (hrtimer_try_to_cancel(&wdg->timer) > 0)
i915_request_put(rq);
}
@@ -476,7 +474,7 @@ static bool __request_in_flight(const struct i915_request *signal)
* to avoid tearing.]
*
* Note that the read of *execlists->active may race with the promotion
- * of execlists->pending[] to execlists->inflight[], overwritting
+ * of execlists->pending[] to execlists->inflight[], overwriting
* the value at *execlists->active. This is fine. The promotion implies
* that we received an ACK from the HW, and so the context is not
* stuck -- if we do not see ourselves in *active, the inflight status
@@ -1220,7 +1218,7 @@ emit_semaphore_wait(struct i915_request *to,
/*
* If this or its dependents are waiting on an external fence
* that may fail catastrophically, then we want to avoid using
- * sempahores as they bypass the fence signaling metadata, and we
+ * semaphores as they bypass the fence signaling metadata, and we
* lose the fence->error propagation.
*/
if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
@@ -1353,7 +1351,7 @@ __i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
{
mark_external(rq);
return i915_sw_fence_await_dma_fence(&rq->submit, fence,
- i915_fence_context_timeout(rq->engine->i915,
+ i915_fence_context_timeout(rq->i915,
fence->context),
I915_FENCE_GFP);
}
@@ -1621,6 +1619,20 @@ i915_request_await_object(struct i915_request *to,
return ret;
}
+static void i915_request_await_huc(struct i915_request *rq)
+{
+ struct intel_huc *huc = &rq->context->engine->gt->uc.huc;
+
+ /* don't stall kernel submissions! */
+ if (!rcu_access_pointer(rq->context->gem_context))
+ return;
+
+ if (intel_huc_wait_required(huc))
+ i915_sw_fence_await_sw_fence(&rq->submit,
+ &huc->delayed_load.fence,
+ &rq->hucq);
+}
+
static struct i915_request *
__i915_request_ensure_parallel_ordering(struct i915_request *rq,
struct intel_timeline *timeline)
@@ -1647,6 +1659,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq,
request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
+ /*
+ * Users have to put a reference potentially got by
+ * __i915_active_fence_set() to the returned request
+ * when no longer needed
+ */
return to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
}
@@ -1693,6 +1710,10 @@ __i915_request_ensure_ordering(struct i915_request *rq,
0);
}
+ /*
+ * Users have to put the reference to prev potentially got
+ * by __i915_active_fence_set() when no longer needed
+ */
return prev;
}
@@ -1703,6 +1724,16 @@ __i915_request_add_to_timeline(struct i915_request *rq)
struct i915_request *prev;
/*
+ * Media workloads may require HuC, so stall them until HuC loading is
+ * complete. Note that HuC not being loaded when a user submission
+ * arrives can only happen when HuC is loaded via GSC and in that case
+ * we still expect the window between us starting to accept submissions
+ * and HuC loading completion to be small (a few hundred ms).
+ */
+ if (rq->engine->class == VIDEO_DECODE_CLASS)
+ i915_request_await_huc(rq);
+
+ /*
* Dependency tracking and request ordering along the timeline
* is special cased so that we can eliminate redundant ordering
* operations while building the request (we know that the timeline
@@ -1736,6 +1767,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
prev = __i915_request_ensure_ordering(rq, timeline);
else
prev = __i915_request_ensure_parallel_ordering(rq, timeline);
+ if (prev)
+ i915_request_put(prev);
/*
* Make sure that no request gazumped us - if it was allocated after
@@ -2153,7 +2186,7 @@ void i915_request_show(struct drm_printer *m,
const char *prefix,
int indent)
{
- const char *name = rq->fence.ops->get_timeline_name((struct dma_fence *)&rq->fence);
+ const char __rcu *timeline;
char buf[80] = "";
int x = 0;
@@ -2189,6 +2222,8 @@ void i915_request_show(struct drm_printer *m,
x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
+ rcu_read_lock();
+ timeline = dma_fence_timeline_name((struct dma_fence *)&rq->fence);
drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n",
prefix, indent, " ",
queue_status(rq),
@@ -2197,7 +2232,8 @@ void i915_request_show(struct drm_printer *m,
fence_status(rq),
buf,
jiffies_to_msecs(jiffies - rq->emitted_jiffies),
- name);
+ rcu_dereference(timeline));
+ rcu_read_unlock();
}
static bool engine_match_ring(struct intel_engine_cs *engine, struct i915_request *rq)