diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 235 |
1 files changed, 162 insertions, 73 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f3dcae4b9d45..127316d2c8aa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -398,7 +398,7 @@ static inline void set_context_guc_id_invalid(struct intel_context *ce) static inline struct intel_guc *ce_to_guc(struct intel_context *ce) { - return &ce->engine->gt->uc.guc; + return gt_to_guc(ce->engine->gt); } static inline struct i915_priolist *to_priolist(struct rb_node *rb) @@ -633,7 +633,7 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, atomic_inc(&guc->outstanding_submission_g2h); ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - if (ret) + if (ret && g2h_len_dw) atomic_dec(&guc->outstanding_submission_g2h); return ret; @@ -1223,7 +1223,7 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) * determine validity of these values. Instead we read the values multiple times * until they are consistent. In test runs, 3 attempts results in consistent * values. The upper bound is set to 6 attempts and may need to be tuned as per - * any new occurences. + * any new occurrences. */ static void __get_engine_usage_record(struct intel_engine_cs *engine, u32 *last_in, u32 *id, u32 *total) @@ -1243,10 +1243,25 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine, } while (++i < 6); } +static void __set_engine_usage_record(struct intel_engine_cs *engine, + u32 last_in, u32 id, u32 total) +{ + struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); + +#define record_write(map_, field_, val_) \ + iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_) + + record_write(&rec_map, last_switch_in_stamp, last_in); + record_write(&rec_map, current_context_index, id); + record_write(&rec_map, total_runtime, total); + +#undef record_write +} + static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) { struct intel_engine_guc_stats *stats = &engine->stats.guc; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u32 last_switch, ctx_id, total; lockdep_assert_held(&guc->timestamp.lock); @@ -1270,15 +1285,12 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) static u32 gpm_timestamp_shift(struct intel_gt *gt) { intel_wakeref_t wakeref; - u32 reg, shift; + u32 reg; with_intel_runtime_pm(gt->uncore->rpm, wakeref) reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); - shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; - - return 3 - shift; + return 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); } static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) @@ -1311,7 +1323,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; struct intel_gt *gt = engine->gt; - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); u64 total, gt_stamp_saved; unsigned long flags; u32 reset_count; @@ -1339,7 +1351,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) * start_gt_clk is derived from GuC state. To get a consistent * view of activity, we query the GuC state only if gt is awake. */ - wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt); + wakeref = in_reset ? NULL : intel_gt_pm_get_if_awake(gt); if (wakeref) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; @@ -1363,9 +1375,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) total += intel_gt_clock_interval_to_ns(gt, clk); } + if (total > stats->total) + stats->total = total; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); - return ns_to_ktime(total); + return ns_to_ktime(stats->total); } static void guc_enable_busyness_worker(struct intel_guc *guc) @@ -1403,14 +1418,17 @@ static void guc_cancel_busyness_worker(struct intel_guc *guc) * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through * every possible call stack is unfeasible. It would be too intrusive to many * areas that really don't care about the GuC backend. However, there is the - * 'reset_in_progress' flag available, so just use that. + * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked. + * So just use those. Note that testing both is required due to the hideously + * complex nature of the i915 driver's reset code paths. * * And note that in the case of a reset occurring during driver unload - * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set - * is fine because there is another cancel in _finish (when the reset flag is - * not). + * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the + * reset flag/mutex are set) is fine because there is another explicit cancel in + * intel_guc_submission_fini (when the reset flag/mutex are not). */ - if (guc_to_gt(guc)->uc.reset_in_progress) + if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) || + test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags)) cancel_delayed_work(&guc->timestamp.work); else cancel_delayed_work_sync(&guc->timestamp.work); @@ -1424,19 +1442,43 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) unsigned long flags; ktime_t unused; - guc_cancel_busyness_worker(guc); - spin_lock_irqsave(&guc->timestamp.lock, flags); guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { + struct intel_engine_guc_stats *stats = &engine->stats.guc; + guc_update_engine_gt_clks(engine); - engine->stats.guc.prev_total = 0; + + /* + * If resetting a running context, accumulate the active + * time as well since there will be no context switch. + */ + if (stats->running) { + u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; + + stats->total_gt_clks += clk; + } + stats->prev_total = 0; + stats->running = 0; } spin_unlock_irqrestore(&guc->timestamp.lock, flags); } +static void __update_guc_busyness_running_state(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) + engine->stats.guc.running = false; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + static void __update_guc_busyness_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1542,6 +1584,9 @@ err_trylock: static int guc_action_enable_usage_stats(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; u32 offset = intel_guc_engine_usage_offset(guc); u32 action[] = { INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, @@ -1549,6 +1594,9 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc) 0, }; + for_each_engine(engine, gt, id) + __set_engine_usage_record(engine, 0, 0xffffffff, 0); + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } @@ -1576,11 +1624,14 @@ static void guc_fini_engine_stats(struct intel_guc *guc) void intel_guc_busyness_park(struct intel_gt *gt) { - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); if (!guc_submission_initialized(guc)) return; + /* Assume no engines are running and set running state to false */ + __update_guc_busyness_running_state(guc); + /* * There is a race with suspend flow where the worker runs after suspend * and causes an unclaimed register access warning. Cancel the worker @@ -1603,7 +1654,7 @@ void intel_guc_busyness_park(struct intel_gt *gt) void intel_guc_busyness_unpark(struct intel_gt *gt) { - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); unsigned long flags; ktime_t unused; @@ -1687,6 +1738,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_lock_irq(guc_to_gt(guc)->irq_lock); spin_unlock_irq(guc_to_gt(guc)->irq_lock); + /* Flush tasklet */ + tasklet_disable(&guc->ct.receive_tasklet); + tasklet_enable(&guc->ct.receive_tasklet); + guc_flush_submissions(guc); guc_flush_destroyed_contexts(guc); flush_work(&guc->ct.requests.worker); @@ -2004,12 +2059,7 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) void intel_guc_submission_reset_finish(struct intel_guc *guc) { - /* - * Ensure the busyness worker gets cancelled even on a fatal wedge. - * Note that reset_prepare is not allowed to because it confuses lockdep. - */ - if (guc_submission_initialized(guc)) - guc_cancel_busyness_worker(guc); + int outstanding; /* Reset called during driver load or during wedge? */ if (unlikely(!guc_submission_initialized(guc) || @@ -2020,11 +2070,14 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) /* * Technically possible for either of these values to be non-zero here, - * but very unlikely + harmless. Regardless let's add a warn so we can + * but very unlikely + harmless. Regardless let's add an error so we can * see in CI if this happens frequently / a precursor to taking down the * machine. */ - GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); + outstanding = atomic_read(&guc->outstanding_submission_g2h); + if (outstanding) + guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n", + outstanding); atomic_set(&guc->outstanding_submission_g2h, 0); intel_guc_global_policies_update(guc); @@ -2136,6 +2189,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) if (!guc->submission_initialized) return; + guc_fini_engine_stats(guc); guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy_v69(guc); i915_sched_engine_put(guc->sched_engine); @@ -2194,7 +2248,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) static void guc_submit_request(struct i915_request *rq) { struct i915_sched_engine *sched_engine = rq->engine->sched_engine; - struct intel_guc *guc = &rq->engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(rq->engine->gt); unsigned long flags; /* Will be called from irq-context when using foreign fences. */ @@ -2220,11 +2274,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) order_base_2(ce->parallel.number_children + 1)); else - ret = ida_simple_get(&guc->submission_state.guc_ids, - NUMBER_MULTI_LRC_GUC_ID(guc), - guc->submission_state.num_guc_ids, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | - __GFP_NOWARN); + ret = ida_alloc_range(&guc->submission_state.guc_ids, + NUMBER_MULTI_LRC_GUC_ID(guc), + guc->submission_state.num_guc_ids - 1, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(ret < 0)) return ret; @@ -2247,8 +2300,8 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) + 1)); } else { --guc->submission_state.guc_ids_in_use; - ida_simple_remove(&guc->submission_state.guc_ids, - ce->guc_id.id); + ida_free(&guc->submission_state.guc_ids, + ce->guc_id.id); } clr_ctx_id_mapping(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); @@ -2645,6 +2698,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) +MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY) #undef MAKE_CONTEXT_POLICY_ADD @@ -2660,10 +2714,11 @@ static int __guc_context_set_context_policies(struct intel_guc *guc, static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); struct context_policy policy; u32 execution_quantum; u32 preemption_timeout; + u32 slpc_ctx_freq_req = 0; unsigned long flags; int ret; @@ -2675,11 +2730,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) execution_quantum = engine->props.timeslice_duration_ms * 1000; preemption_timeout = engine->props.preempt_timeout_ms * 1000; + if (ce->flags & BIT(CONTEXT_LOW_LATENCY)) + slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; + __guc_context_policy_start_klv(&policy, ce->guc_id.id); __guc_context_policy_add_priority(&policy, ce->guc_state.prio); __guc_context_policy_add_execution_quantum(&policy, execution_quantum); __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req); if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) __guc_context_policy_add_preempt_to_idle(&policy, 1); @@ -2736,7 +2795,7 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) static void prepare_context_registration_info_v69(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u32 ctx_id = ce->guc_id.id; struct guc_lrc_desc_v69 *desc; struct intel_context *child; @@ -2805,7 +2864,7 @@ static void prepare_context_registration_info_v70(struct intel_context *ce, struct guc_ctxt_registration_info *info) { struct intel_engine_cs *engine = ce->engine; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u32 ctx_id = ce->guc_id.id; GEM_BUG_ON(!engine->mask); @@ -2842,9 +2901,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce, ce->parallel.guc.wqi_tail = 0; ce->parallel.guc.wqi_head = 0; - wq_desc_offset = i915_ggtt_offset(ce->state) + + wq_desc_offset = (u64)i915_ggtt_offset(ce->state) + __get_parent_scratch_offset(ce); - wq_base_offset = i915_ggtt_offset(ce->state) + + wq_base_offset = (u64)i915_ggtt_offset(ce->state) + __get_wq_offset(ce); info->wq_desc_lo = lower_32_bits(wq_desc_offset); info->wq_desc_hi = upper_32_bits(wq_desc_offset); @@ -2868,7 +2927,7 @@ static int try_context_registration(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); intel_wakeref_t wakeref; u32 ctx_id = ce->guc_id.id; bool context_registered; @@ -2949,7 +3008,7 @@ static int __guc_context_pin(struct intel_context *ce, /* * GuC context gets pinned in guc_request_alloc. See that function for - * explaination of why. + * explanation of why. */ return lrc_pin(ce, engine, vaddr); @@ -3384,18 +3443,29 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) * GuC is active, lets destroy this context, but at this point we can still be racing * with suspend, so we undo everything if the H2G fails in deregister_context so * that GuC reset will find this context during clean up. + * + * There is a race condition where the reset code could have altered + * this context's state and done a wakeref put before we try to + * deregister it here. So check if the context is still set to be + * destroyed before undoing earlier changes, to avoid two wakeref puts + * on the same context. */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { - spin_lock(&ce->guc_state.lock); - set_context_registered(ce); - clr_context_destroyed(ce); - spin_unlock(&ce->guc_state.lock); + bool pending_destroyed; + spin_lock_irqsave(&ce->guc_state.lock, flags); + pending_destroyed = context_destroyed(ce); + if (pending_destroyed) { + set_context_registered(ce); + clr_context_destroyed(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. */ - intel_wakeref_put_async(>->wakeref); + if (pending_destroyed) + intel_wakeref_put_async(>->wakeref); } return ret; @@ -4267,20 +4337,25 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); /* Short circuit function */ - if (prio < I915_PRIORITY_NORMAL || - rq->guc_prio == GUC_PRIO_FINI || - (rq->guc_prio != GUC_PRIO_INIT && - !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) + if (prio < I915_PRIORITY_NORMAL) return; spin_lock(&ce->guc_state.lock); - if (rq->guc_prio != GUC_PRIO_FINI) { - if (rq->guc_prio != GUC_PRIO_INIT) - sub_context_inflight_prio(ce, rq->guc_prio); - rq->guc_prio = new_guc_prio; - add_context_inflight_prio(ce, rq->guc_prio); - update_context_prio(ce); - } + + if (rq->guc_prio == GUC_PRIO_FINI) + goto exit; + + if (!new_guc_prio_higher(rq->guc_prio, new_guc_prio)) + goto exit; + + if (rq->guc_prio != GUC_PRIO_INIT) + sub_context_inflight_prio(ce, rq->guc_prio); + + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + +exit: spin_unlock(&ce->guc_state.lock); } @@ -4496,7 +4571,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) if (engine->class == COMPUTE_CLASS) if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(engine->i915)) - engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT; + + /* Wa_16019325821 */ + /* Wa_14019159160 */ + if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) && + IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) + engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT; /* * TODO: GuC supports timeslicing and semaphores as well, but they're @@ -4507,7 +4588,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) */ engine->emit_bb_start = gen8_emit_bb_start; - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) engine->emit_bb_start = xehp_emit_bb_start; } @@ -4549,7 +4630,7 @@ static void guc_sched_engine_destroy(struct kref *kref) int intel_guc_submission_setup(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); /* * The setup relies on several assumptions (e.g. irqs always enabled) @@ -5308,7 +5389,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, void intel_guc_find_hung_context(struct intel_engine_cs *engine) { - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); struct intel_context *ce; struct i915_request *rq; unsigned long index; @@ -5370,7 +5451,7 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine, struct i915_request *hung_rq, struct drm_printer *m) { - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); struct intel_context *ce; unsigned long index; unsigned long flags; @@ -5462,12 +5543,20 @@ static inline void guc_log_context(struct drm_printer *p, { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); + if (intel_context_pin_if_active(ce)) { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + intel_context_unpin(ce); + } else { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n", + ce->ring->head); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n", + ce->ring->tail); + } drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", @@ -5822,7 +5911,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, if (!ve) return ERR_PTR(-ENOMEM); - guc = &siblings[0]->gt->uc.guc; + guc = gt_to_guc(siblings[0]->gt); ve->base.i915 = siblings[0]->i915; ve->base.gt = siblings[0]->gt; |