summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c')
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c195
1 files changed, 139 insertions, 56 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0f83c6d4376f..f8cb7c630d5b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -398,7 +398,7 @@ static inline void set_context_guc_id_invalid(struct intel_context *ce)
static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
{
- return &ce->engine->gt->uc.guc;
+ return gt_to_guc(ce->engine->gt);
}
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@@ -1223,7 +1223,7 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
* determine validity of these values. Instead we read the values multiple times
* until they are consistent. In test runs, 3 attempts results in consistent
* values. The upper bound is set to 6 attempts and may need to be tuned as per
- * any new occurences.
+ * any new occurrences.
*/
static void __get_engine_usage_record(struct intel_engine_cs *engine,
u32 *last_in, u32 *id, u32 *total)
@@ -1243,10 +1243,25 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine,
} while (++i < 6);
}
+static void __set_engine_usage_record(struct intel_engine_cs *engine,
+ u32 last_in, u32 id, u32 total)
+{
+ struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
+
+#define record_write(map_, field_, val_) \
+ iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_)
+
+ record_write(&rec_map, last_switch_in_stamp, last_in);
+ record_write(&rec_map, current_context_index, id);
+ record_write(&rec_map, total_runtime, total);
+
+#undef record_write
+}
+
static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
{
struct intel_engine_guc_stats *stats = &engine->stats.guc;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u32 last_switch, ctx_id, total;
lockdep_assert_held(&guc->timestamp.lock);
@@ -1270,15 +1285,12 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
static u32 gpm_timestamp_shift(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
- u32 reg, shift;
+ u32 reg;
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
- shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
- GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
-
- return 3 - shift;
+ return 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
}
static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
@@ -1311,7 +1323,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
struct intel_gt *gt = engine->gt;
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
u64 total, gt_stamp_saved;
unsigned long flags;
u32 reset_count;
@@ -1339,7 +1351,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
* start_gt_clk is derived from GuC state. To get a consistent
* view of activity, we query the GuC state only if gt is awake.
*/
- wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt);
+ wakeref = in_reset ? NULL : intel_gt_pm_get_if_awake(gt);
if (wakeref) {
stats_saved = *stats;
gt_stamp_saved = guc->timestamp.gt_stamp;
@@ -1363,9 +1375,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
total += intel_gt_clock_interval_to_ns(gt, clk);
}
+ if (total > stats->total)
+ stats->total = total;
+
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
- return ns_to_ktime(total);
+ return ns_to_ktime(stats->total);
}
static void guc_enable_busyness_worker(struct intel_guc *guc)
@@ -1431,13 +1446,39 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
guc_update_pm_timestamp(guc, &unused);
for_each_engine(engine, gt, id) {
+ struct intel_engine_guc_stats *stats = &engine->stats.guc;
+
guc_update_engine_gt_clks(engine);
- engine->stats.guc.prev_total = 0;
+
+ /*
+ * If resetting a running context, accumulate the active
+ * time as well since there will be no context switch.
+ */
+ if (stats->running) {
+ u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
+
+ stats->total_gt_clks += clk;
+ }
+ stats->prev_total = 0;
+ stats->running = 0;
}
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
}
+static void __update_guc_busyness_running_state(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ for_each_engine(engine, gt, id)
+ engine->stats.guc.running = false;
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
static void __update_guc_busyness_stats(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -1543,6 +1584,9 @@ err_trylock:
static int guc_action_enable_usage_stats(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
u32 offset = intel_guc_engine_usage_offset(guc);
u32 action[] = {
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
@@ -1550,6 +1594,9 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc)
0,
};
+ for_each_engine(engine, gt, id)
+ __set_engine_usage_record(engine, 0, 0xffffffff, 0);
+
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
@@ -1577,11 +1624,14 @@ static void guc_fini_engine_stats(struct intel_guc *guc)
void intel_guc_busyness_park(struct intel_gt *gt)
{
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
if (!guc_submission_initialized(guc))
return;
+ /* Assume no engines are running and set running state to false */
+ __update_guc_busyness_running_state(guc);
+
/*
* There is a race with suspend flow where the worker runs after suspend
* and causes an unclaimed register access warning. Cancel the worker
@@ -1604,7 +1654,7 @@ void intel_guc_busyness_park(struct intel_gt *gt)
void intel_guc_busyness_unpark(struct intel_gt *gt)
{
- struct intel_guc *guc = &gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(gt);
unsigned long flags;
ktime_t unused;
@@ -1688,6 +1738,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
spin_lock_irq(guc_to_gt(guc)->irq_lock);
spin_unlock_irq(guc_to_gt(guc)->irq_lock);
+ /* Flush tasklet */
+ tasklet_disable(&guc->ct.receive_tasklet);
+ tasklet_enable(&guc->ct.receive_tasklet);
+
guc_flush_submissions(guc);
guc_flush_destroyed_contexts(guc);
flush_work(&guc->ct.requests.worker);
@@ -2005,6 +2059,8 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
+ int outstanding;
+
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
!intel_guc_is_fw_running(guc) ||
@@ -2014,11 +2070,14 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
/*
* Technically possible for either of these values to be non-zero here,
- * but very unlikely + harmless. Regardless let's add a warn so we can
+ * but very unlikely + harmless. Regardless let's add an error so we can
* see in CI if this happens frequently / a precursor to taking down the
* machine.
*/
- GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
+ outstanding = atomic_read(&guc->outstanding_submission_g2h);
+ if (outstanding)
+ guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n",
+ outstanding);
atomic_set(&guc->outstanding_submission_g2h, 0);
intel_guc_global_policies_update(guc);
@@ -2189,7 +2248,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
static void guc_submit_request(struct i915_request *rq)
{
struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
- struct intel_guc *guc = &rq->engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(rq->engine->gt);
unsigned long flags;
/* Will be called from irq-context when using foreign fences. */
@@ -2215,11 +2274,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
order_base_2(ce->parallel.number_children
+ 1));
else
- ret = ida_simple_get(&guc->submission_state.guc_ids,
- NUMBER_MULTI_LRC_GUC_ID(guc),
- guc->submission_state.num_guc_ids,
- GFP_KERNEL | __GFP_RETRY_MAYFAIL |
- __GFP_NOWARN);
+ ret = ida_alloc_range(&guc->submission_state.guc_ids,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
+ guc->submission_state.num_guc_ids - 1,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(ret < 0))
return ret;
@@ -2242,8 +2300,8 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
+ 1));
} else {
--guc->submission_state.guc_ids_in_use;
- ida_simple_remove(&guc->submission_state.guc_ids,
- ce->guc_id.id);
+ ida_free(&guc->submission_state.guc_ids,
+ ce->guc_id.id);
}
clr_ctx_id_mapping(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
@@ -2640,6 +2698,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
+MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY)
#undef MAKE_CONTEXT_POLICY_ADD
@@ -2655,10 +2714,11 @@ static int __guc_context_set_context_policies(struct intel_guc *guc,
static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
struct context_policy policy;
u32 execution_quantum;
u32 preemption_timeout;
+ u32 slpc_ctx_freq_req = 0;
unsigned long flags;
int ret;
@@ -2670,11 +2730,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
execution_quantum = engine->props.timeslice_duration_ms * 1000;
preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+ if (ce->flags & BIT(CONTEXT_LOW_LATENCY))
+ slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
+
__guc_context_policy_start_klv(&policy, ce->guc_id.id);
__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
__guc_context_policy_add_execution_quantum(&policy, execution_quantum);
__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+ __guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req);
if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
__guc_context_policy_add_preempt_to_idle(&policy, 1);
@@ -2731,7 +2795,7 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
static void prepare_context_registration_info_v69(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u32 ctx_id = ce->guc_id.id;
struct guc_lrc_desc_v69 *desc;
struct intel_context *child;
@@ -2800,7 +2864,7 @@ static void prepare_context_registration_info_v70(struct intel_context *ce,
struct guc_ctxt_registration_info *info)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
u32 ctx_id = ce->guc_id.id;
GEM_BUG_ON(!engine->mask);
@@ -2837,9 +2901,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce,
ce->parallel.guc.wqi_tail = 0;
ce->parallel.guc.wqi_head = 0;
- wq_desc_offset = i915_ggtt_offset(ce->state) +
+ wq_desc_offset = (u64)i915_ggtt_offset(ce->state) +
__get_parent_scratch_offset(ce);
- wq_base_offset = i915_ggtt_offset(ce->state) +
+ wq_base_offset = (u64)i915_ggtt_offset(ce->state) +
__get_wq_offset(ce);
info->wq_desc_lo = lower_32_bits(wq_desc_offset);
info->wq_desc_hi = upper_32_bits(wq_desc_offset);
@@ -2863,7 +2927,7 @@ static int try_context_registration(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
intel_wakeref_t wakeref;
u32 ctx_id = ce->guc_id.id;
bool context_registered;
@@ -2944,7 +3008,7 @@ static int __guc_context_pin(struct intel_context *ce,
/*
* GuC context gets pinned in guc_request_alloc. See that function for
- * explaination of why.
+ * explanation of why.
*/
return lrc_pin(ce, engine, vaddr);
@@ -3382,10 +3446,10 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce)
*/
ret = deregister_context(ce, ce->guc_id.id);
if (ret) {
- spin_lock(&ce->guc_state.lock);
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
set_context_registered(ce);
clr_context_destroyed(ce);
- spin_unlock(&ce->guc_state.lock);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
/*
* As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements
* the wakeref immediately but per function spec usage call this after unlock.
@@ -4262,20 +4326,25 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq,
u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
/* Short circuit function */
- if (prio < I915_PRIORITY_NORMAL ||
- rq->guc_prio == GUC_PRIO_FINI ||
- (rq->guc_prio != GUC_PRIO_INIT &&
- !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
+ if (prio < I915_PRIORITY_NORMAL)
return;
spin_lock(&ce->guc_state.lock);
- if (rq->guc_prio != GUC_PRIO_FINI) {
- if (rq->guc_prio != GUC_PRIO_INIT)
- sub_context_inflight_prio(ce, rq->guc_prio);
- rq->guc_prio = new_guc_prio;
- add_context_inflight_prio(ce, rq->guc_prio);
- update_context_prio(ce);
- }
+
+ if (rq->guc_prio == GUC_PRIO_FINI)
+ goto exit;
+
+ if (!new_guc_prio_higher(rq->guc_prio, new_guc_prio))
+ goto exit;
+
+ if (rq->guc_prio != GUC_PRIO_INIT)
+ sub_context_inflight_prio(ce, rq->guc_prio);
+
+ rq->guc_prio = new_guc_prio;
+ add_context_inflight_prio(ce, rq->guc_prio);
+ update_context_prio(ce);
+
+exit:
spin_unlock(&ce->guc_state.lock);
}
@@ -4491,7 +4560,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
if (engine->class == COMPUTE_CLASS)
if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(engine->i915))
- engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
+
+ /* Wa_16019325821 */
+ /* Wa_14019159160 */
+ if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) &&
+ IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT;
/*
* TODO: GuC supports timeslicing and semaphores as well, but they're
@@ -4502,7 +4577,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
*/
engine->emit_bb_start = gen8_emit_bb_start;
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
engine->emit_bb_start = xehp_emit_bb_start;
}
@@ -4544,7 +4619,7 @@ static void guc_sched_engine_destroy(struct kref *kref)
int intel_guc_submission_setup(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
/*
* The setup relies on several assumptions (e.g. irqs always enabled)
@@ -5303,7 +5378,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
void intel_guc_find_hung_context(struct intel_engine_cs *engine)
{
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
struct intel_context *ce;
struct i915_request *rq;
unsigned long index;
@@ -5365,7 +5440,7 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
struct i915_request *hung_rq,
struct drm_printer *m)
{
- struct intel_guc *guc = &engine->gt->uc.guc;
+ struct intel_guc *guc = gt_to_guc(engine->gt);
struct intel_context *ce;
unsigned long index;
unsigned long flags;
@@ -5457,12 +5532,20 @@ static inline void guc_log_context(struct drm_printer *p,
{
drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
- drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
- ce->ring->head,
- ce->lrc_reg_state[CTX_RING_HEAD]);
- drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
- ce->ring->tail,
- ce->lrc_reg_state[CTX_RING_TAIL]);
+ if (intel_context_pin_if_active(ce)) {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
+ ce->ring->head,
+ ce->lrc_reg_state[CTX_RING_HEAD]);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
+ ce->ring->tail,
+ ce->lrc_reg_state[CTX_RING_TAIL]);
+ intel_context_unpin(ce);
+ } else {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n",
+ ce->ring->head);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n",
+ ce->ring->tail);
+ }
drm_printf(p, "\t\tContext Pin Count: %u\n",
atomic_read(&ce->pin_count));
drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
@@ -5817,7 +5900,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
if (!ve)
return ERR_PTR(-ENOMEM);
- guc = &siblings[0]->gt->uc.guc;
+ guc = gt_to_guc(siblings[0]->gt);
ve->base.i915 = siblings[0]->i915;
ve->base.gt = siblings[0]->gt;