diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 148 |
1 files changed, 101 insertions, 47 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 76cf5ac91e94..4399941236cb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -31,26 +31,29 @@ #include <linux/sched/signal.h> #include <linux/sched/mm.h> +#include <drm/drm_print.h> + #include "gem/i915_gem_context.h" #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" #include "gt/intel_engine.h" #include "gt/intel_engine_heartbeat.h" +#include "gt/intel_engine_regs.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_reset.h" #include "gt/intel_ring.h" #include "gt/intel_rps.h" #include "i915_active.h" +#include "i915_config.h" #include "i915_deps.h" +#include "i915_driver.h" #include "i915_drv.h" #include "i915_trace.h" -#include "intel_pm.h" struct execute_cb { struct irq_work work; struct i915_sw_fence *fence; - struct i915_request *signal; }; static struct kmem_cache *slab_requests; @@ -58,7 +61,7 @@ static struct kmem_cache *slab_execute_cbs; static const char *i915_fence_get_driver_name(struct dma_fence *fence) { - return dev_name(to_request(fence)->engine->i915->drm.dev); + return dev_name(to_request(fence)->i915->drm.dev); } static const char *i915_fence_get_timeline_name(struct dma_fence *fence) @@ -116,8 +119,10 @@ static void i915_fence_release(struct dma_fence *fence) rq->guc_prio != GUC_PRIO_FINI); i915_request_free_capture_list(fetch_and_zero(&rq->capture_list)); - if (i915_vma_snapshot_present(&rq->batch_snapshot)) - i915_vma_snapshot_put_onstack(&rq->batch_snapshot); + if (rq->batch_res) { + i915_vma_resource_put(rq->batch_res); + rq->batch_res = NULL; + } /* * The request is put onto a RCU freelist (i.e. the address @@ -130,17 +135,39 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->semaphore); /* - * Keep one request on each engine for reserved use under mempressure, - * do not use with virtual engines as this really is only needed for - * kernel contexts. + * Keep one request on each engine for reserved use under mempressure. + * + * We do not hold a reference to the engine here and so have to be + * very careful in what rq->engine we poke. The virtual engine is + * referenced via the rq->context and we released that ref during + * i915_request_retire(), ergo we must not dereference a virtual + * engine here. Not that we would want to, as the only consumer of + * the reserved engine->request_pool is the power management parking, + * which must-not-fail, and that is only run on the physical engines. + * + * Since the request must have been executed to be have completed, + * we know that it will have been processed by the HW and will + * not be unsubmitted again, so rq->engine and rq->execution_mask + * at this point is stable. rq->execution_mask will be a single + * bit if the last and _only_ engine it could execution on was a + * physical engine, if it's multiple bits then it started on and + * could still be on a virtual engine. Thus if the mask is not a + * power-of-two we assume that rq->engine may still be a virtual + * engine and so a dangling invalid pointer that we cannot dereference + * + * For example, consider the flow of a bonded request through a virtual + * engine. The request is created with a wide engine mask (all engines + * that we might execute on). On processing the bond, the request mask + * is reduced to one or more engines. If the request is subsequently + * bound to a single engine, it will then be constrained to only + * execute on that engine and never returned to the virtual engine + * after timeslicing away, see __unwind_incomplete_requests(). Thus we + * know that if the rq->execution_mask is a single bit, rq->engine + * can be a physical engine with the exact corresponding mask. */ - if (!intel_engine_is_virtual(rq->engine) && - !cmpxchg(&rq->engine->request_pool, NULL, rq)) { - intel_context_put(rq->context); + if (is_power_of_2(rq->execution_mask) && + !cmpxchg(&rq->engine->request_pool, NULL, rq)) return; - } - - intel_context_put(rq->context); kmem_cache_free(slab_requests, rq); } @@ -248,11 +275,6 @@ i915_request_active_engine(struct i915_request *rq, return ret; } -static void __rq_init_watchdog(struct i915_request *rq) -{ - rq->watchdog.timer.function = NULL; -} - static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer) { struct i915_request *rq = @@ -261,7 +283,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer) if (!i915_request_completed(rq)) { if (llist_add(&rq->watchdog.link, >->watchdog.list)) - schedule_work(>->watchdog.work); + queue_work(gt->i915->unordered_wq, >->watchdog.work); } else { i915_request_put(rq); } @@ -269,6 +291,13 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer) return HRTIMER_NORESTART; } +static void __rq_init_watchdog(struct i915_request *rq) +{ + struct i915_request_watchdog *wdg = &rq->watchdog; + + hrtimer_setup(&wdg->timer, __rq_watchdog_expired, CLOCK_MONOTONIC, HRTIMER_MODE_REL); +} + static void __rq_arm_watchdog(struct i915_request *rq) { struct i915_request_watchdog *wdg = &rq->watchdog; @@ -279,8 +308,6 @@ static void __rq_arm_watchdog(struct i915_request *rq) i915_request_get(rq); - hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - wdg->timer.function = __rq_watchdog_expired; hrtimer_start_range_ns(&wdg->timer, ns_to_ktime(ce->watchdog.timeout_us * NSEC_PER_USEC), @@ -292,7 +319,7 @@ static void __rq_cancel_watchdog(struct i915_request *rq) { struct i915_request_watchdog *wdg = &rq->watchdog; - if (wdg->timer.function && hrtimer_try_to_cancel(&wdg->timer) > 0) + if (hrtimer_try_to_cancel(&wdg->timer) > 0) i915_request_put(rq); } @@ -308,7 +335,7 @@ void i915_request_free_capture_list(struct i915_capture_list *capture) while (capture) { struct i915_capture_list *next = capture->next; - i915_vma_snapshot_put(capture->vma_snapshot); + i915_vma_resource_put(capture->vma_res); kfree(capture); capture = next; } @@ -447,7 +474,7 @@ static bool __request_in_flight(const struct i915_request *signal) * to avoid tearing.] * * Note that the read of *execlists->active may race with the promotion - * of execlists->pending[] to execlists->inflight[], overwritting + * of execlists->pending[] to execlists->inflight[], overwriting * the value at *execlists->active. This is fine. The promotion implies * that we received an ACK from the HW, and so the context is not * stuck -- if we do not see ourselves in *active, the inflight status @@ -607,7 +634,7 @@ bool __i915_request_submit(struct i915_request *request) goto active; } - if (unlikely(intel_context_is_banned(request->context))) + if (unlikely(!intel_context_is_schedulable(request->context))) i915_request_set_error_once(request, -EIO); if (unlikely(fatal_error(request->fence.error))) @@ -854,7 +881,7 @@ static void __i915_request_ctor(void *arg) i915_sw_fence_init(&rq->semaphore, semaphore_notify); clear_capture_list(rq); - rq->batch_snapshot.present = false; + rq->batch_res = NULL; init_llist_head(&rq->execute_cb); } @@ -917,22 +944,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - /* - * Hold a reference to the intel_context over life of an i915_request. - * Without this an i915_request can exist after the context has been - * destroyed (e.g. request retired, context closed, but user space holds - * a reference to the request from an out fence). In the case of GuC - * submission + virtual engine, the engine that the request references - * is also destroyed which can trigger bad pointer dref in fence ops - * (e.g. i915_fence_get_driver_name). We could likely change these - * functions to avoid touching the engine but let's just be safe and - * hold the intel_context reference. In execlist mode the request always - * eventually points to a physical engine so this isn't an issue. - */ - rq->context = intel_context_get(ce); + rq->context = ce; rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; + rq->i915 = ce->engine->i915; ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) @@ -960,7 +976,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) __rq_init_watchdog(rq); assert_capture_list_is_null(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); - GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot)); + GEM_BUG_ON(rq->batch_res); /* * Reserve space in the ring buffer for all the commands required to @@ -1004,7 +1020,6 @@ err_unwind: GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); err_free: - intel_context_put(ce); kmem_cache_free(slab_requests, rq); err_unreserve: intel_context_unpin(ce); @@ -1203,7 +1218,7 @@ emit_semaphore_wait(struct i915_request *to, /* * If this or its dependents are waiting on an external fence * that may fail catastrophically, then we want to avoid using - * sempahores as they bypass the fence signaling metadata, and we + * semaphores as they bypass the fence signaling metadata, and we * lose the fence->error propagation. */ if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) @@ -1336,7 +1351,7 @@ __i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) { mark_external(rq); return i915_sw_fence_await_dma_fence(&rq->submit, fence, - i915_fence_context_timeout(rq->engine->i915, + i915_fence_context_timeout(rq->i915, fence->context), I915_FENCE_GFP); } @@ -1594,7 +1609,8 @@ i915_request_await_object(struct i915_request *to, struct dma_fence *fence; int ret = 0; - dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) { + dma_resv_for_each_fence(&cursor, obj->base.resv, + dma_resv_usage_rw(write), fence) { ret = i915_request_await_dma_fence(to, fence); if (ret) break; @@ -1603,6 +1619,20 @@ i915_request_await_object(struct i915_request *to, return ret; } +static void i915_request_await_huc(struct i915_request *rq) +{ + struct intel_huc *huc = &rq->context->engine->gt->uc.huc; + + /* don't stall kernel submissions! */ + if (!rcu_access_pointer(rq->context->gem_context)) + return; + + if (intel_huc_wait_required(huc)) + i915_sw_fence_await_sw_fence(&rq->submit, + &huc->delayed_load.fence, + &rq->hucq); +} + static struct i915_request * __i915_request_ensure_parallel_ordering(struct i915_request *rq, struct intel_timeline *timeline) @@ -1629,6 +1659,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq, request_to_parent(rq)->parallel.last_rq = i915_request_get(rq); + /* + * Users have to put a reference potentially got by + * __i915_active_fence_set() to the returned request + * when no longer needed + */ return to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); } @@ -1675,6 +1710,10 @@ __i915_request_ensure_ordering(struct i915_request *rq, 0); } + /* + * Users have to put the reference to prev potentially got + * by __i915_active_fence_set() when no longer needed + */ return prev; } @@ -1685,6 +1724,16 @@ __i915_request_add_to_timeline(struct i915_request *rq) struct i915_request *prev; /* + * Media workloads may require HuC, so stall them until HuC loading is + * complete. Note that HuC not being loaded when a user submission + * arrives can only happen when HuC is loaded via GSC and in that case + * we still expect the window between us starting to accept submissions + * and HuC loading completion to be small (a few hundred ms). + */ + if (rq->engine->class == VIDEO_DECODE_CLASS) + i915_request_await_huc(rq); + + /* * Dependency tracking and request ordering along the timeline * is special cased so that we can eliminate redundant ordering * operations while building the request (we know that the timeline @@ -1718,6 +1767,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) prev = __i915_request_ensure_ordering(rq, timeline); else prev = __i915_request_ensure_parallel_ordering(rq, timeline); + if (prev) + i915_request_put(prev); /* * Make sure that no request gazumped us - if it was allocated after @@ -2135,7 +2186,7 @@ void i915_request_show(struct drm_printer *m, const char *prefix, int indent) { - const char *name = rq->fence.ops->get_timeline_name((struct dma_fence *)&rq->fence); + const char __rcu *timeline; char buf[80] = ""; int x = 0; @@ -2171,6 +2222,8 @@ void i915_request_show(struct drm_printer *m, x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); + rcu_read_lock(); + timeline = dma_fence_timeline_name((struct dma_fence *)&rq->fence); drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n", prefix, indent, " ", queue_status(rq), @@ -2179,7 +2232,8 @@ void i915_request_show(struct drm_printer *m, fence_status(rq), buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), - name); + rcu_dereference(timeline)); + rcu_read_unlock(); } static bool engine_match_ring(struct intel_engine_cs *engine, struct i915_request *rq) |
