diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 845 |
1 files changed, 714 insertions, 131 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 53f3ed3244d5..68f2b8d363ac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -25,15 +25,16 @@ #include "gt/intel_mocs.h" #include "gt/intel_ring.h" +#include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "i915_trace.h" +#include "i915_wait_util.h" #include "intel_guc_ads.h" #include "intel_guc_capture.h" #include "intel_guc_print.h" #include "intel_guc_submission.h" -#include "i915_drv.h" -#include "i915_reg.h" -#include "i915_trace.h" - /** * DOC: GuC-based command submission * @@ -235,6 +236,13 @@ set_context_destroyed(struct intel_context *ce) ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; } +static inline void +clr_context_destroyed(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED; +} + static inline bool context_pending_disable(struct intel_context *ce) { return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; @@ -390,7 +398,7 @@ static inline void set_context_guc_id_invalid(struct intel_context *ce) static inline struct intel_guc *ce_to_guc(struct intel_context *ce) { - return &ce->engine->gt->uc.guc; + return gt_to_guc(ce->engine->gt); } static inline struct i915_priolist *to_priolist(struct rb_node *rb) @@ -612,6 +620,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, u32 g2h_len_dw, bool loop) { + int ret; + /* * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), * so we don't handle the case where we don't get a reply because we @@ -622,7 +632,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, if (g2h_len_dw) atomic_inc(&guc->outstanding_submission_g2h); - return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + if (ret && g2h_len_dw) + atomic_dec(&guc->outstanding_submission_g2h); + + return ret; } int intel_guc_wait_for_pending_msg(struct intel_guc *guc, @@ -1106,7 +1120,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) if (deregister) guc_signal_context_fence(ce); if (destroyed) { - intel_gt_pm_put_async(guc_to_gt(guc)); + intel_gt_pm_put_async_untracked(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } @@ -1209,7 +1223,7 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) * determine validity of these values. Instead we read the values multiple times * until they are consistent. In test runs, 3 attempts results in consistent * values. The upper bound is set to 6 attempts and may need to be tuned as per - * any new occurences. + * any new occurrences. */ static void __get_engine_usage_record(struct intel_engine_cs *engine, u32 *last_in, u32 *id, u32 *total) @@ -1229,10 +1243,25 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine, } while (++i < 6); } +static void __set_engine_usage_record(struct intel_engine_cs *engine, + u32 last_in, u32 id, u32 total) +{ + struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); + +#define record_write(map_, field_, val_) \ + iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_) + + record_write(&rec_map, last_switch_in_stamp, last_in); + record_write(&rec_map, current_context_index, id); + record_write(&rec_map, total_runtime, total); + +#undef record_write +} + static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) { struct intel_engine_guc_stats *stats = &engine->stats.guc; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u32 last_switch, ctx_id, total; lockdep_assert_held(&guc->timestamp.lock); @@ -1256,15 +1285,12 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) static u32 gpm_timestamp_shift(struct intel_gt *gt) { intel_wakeref_t wakeref; - u32 reg, shift; + u32 reg; with_intel_runtime_pm(gt->uncore->rpm, wakeref) reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); - shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; - - return 3 - shift; + return 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); } static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) @@ -1297,11 +1323,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; struct intel_gt *gt = engine->gt; - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); u64 total, gt_stamp_saved; unsigned long flags; u32 reset_count; bool in_reset; + intel_wakeref_t wakeref; spin_lock_irqsave(&guc->timestamp.lock, flags); @@ -1324,7 +1351,8 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) * start_gt_clk is derived from GuC state. To get a consistent * view of activity, we query the GuC state only if gt is awake. */ - if (!in_reset && intel_gt_pm_get_if_awake(gt)) { + wakeref = in_reset ? NULL : intel_gt_pm_get_if_awake(gt); + if (wakeref) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; /* @@ -1333,7 +1361,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) */ guc_update_engine_gt_clks(engine); guc_update_pm_timestamp(guc, now); - intel_gt_pm_put_async(gt); + intel_gt_pm_put_async(gt, wakeref); if (i915_reset_count(gpu_error) != reset_count) { *stats = stats_saved; guc->timestamp.gt_stamp = gt_stamp_saved; @@ -1347,9 +1375,63 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) total += intel_gt_clock_interval_to_ns(gt, clk); } + if (total > stats->total) + stats->total = total; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); - return ns_to_ktime(total); + return ns_to_ktime(stats->total); +} + +static void guc_enable_busyness_worker(struct intel_guc *guc) +{ + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); +} + +static void guc_cancel_busyness_worker(struct intel_guc *guc) +{ + /* + * There are many different call stacks that can get here. Some of them + * hold the reset mutex. The busyness worker also attempts to acquire the + * reset mutex. Synchronously flushing a worker thread requires acquiring + * the worker mutex. Lockdep sees this as a conflict. It thinks that the + * flush can deadlock because it holds the worker mutex while waiting for + * the reset mutex, but another thread is holding the reset mutex and might + * attempt to use other worker functions. + * + * In practice, this scenario does not exist because the busyness worker + * does not block waiting for the reset mutex. It does a try-lock on it and + * immediately exits if the lock is already held. Unfortunately, the mutex + * in question (I915_RESET_BACKOFF) is an i915 implementation which has lockdep + * annotation but not to the extent of explaining the 'might lock' is also a + * 'does not need to lock'. So one option would be to add more complex lockdep + * annotations to ignore the issue (if at all possible). A simpler option is to + * just not flush synchronously when a rest in progress. Given that the worker + * will just early exit and re-schedule itself anyway, there is no advantage + * to running it immediately. + * + * If a reset is not in progress, then the synchronous flush may be required. + * As noted many call stacks lead here, some during suspend and driver unload + * which do require a synchronous flush to make sure the worker is stopped + * before memory is freed. + * + * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through + * every possible call stack is unfeasible. It would be too intrusive to many + * areas that really don't care about the GuC backend. However, there is the + * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked. + * So just use those. Note that testing both is required due to the hideously + * complex nature of the i915 driver's reset code paths. + * + * And note that in the case of a reset occurring during driver unload + * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the + * reset flag/mutex are set) is fine because there is another explicit cancel in + * intel_guc_submission_fini (when the reset flag/mutex are not). + */ + if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) || + test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags)) + cancel_delayed_work(&guc->timestamp.work); + else + cancel_delayed_work_sync(&guc->timestamp.work); } static void __reset_guc_busyness_stats(struct intel_guc *guc) @@ -1360,19 +1442,43 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) unsigned long flags; ktime_t unused; - cancel_delayed_work_sync(&guc->timestamp.work); - spin_lock_irqsave(&guc->timestamp.lock, flags); guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { + struct intel_engine_guc_stats *stats = &engine->stats.guc; + guc_update_engine_gt_clks(engine); - engine->stats.guc.prev_total = 0; + + /* + * If resetting a running context, accumulate the active + * time as well since there will be no context switch. + */ + if (stats->running) { + u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; + + stats->total_gt_clks += clk; + } + stats->prev_total = 0; + stats->running = 0; } spin_unlock_irqrestore(&guc->timestamp.lock, flags); } +static void __update_guc_busyness_running_state(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) + engine->stats.guc.running = false; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + static void __update_guc_busyness_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1392,16 +1498,67 @@ static void __update_guc_busyness_stats(struct intel_guc *guc) spin_unlock_irqrestore(&guc->timestamp.lock, flags); } +static void __guc_context_update_stats(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + lrc_update_runtime(ce); + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + +static void guc_context_update_stats(struct intel_context *ce) +{ + if (!intel_context_pin_if_active(ce)) + return; + + __guc_context_update_stats(ce); + intel_context_unpin(ce); +} + static void guc_timestamp_ping(struct work_struct *wrk) { struct intel_guc *guc = container_of(wrk, typeof(*guc), timestamp.work.work); struct intel_uc *uc = container_of(guc, typeof(*uc), guc); struct intel_gt *gt = guc_to_gt(guc); + struct intel_context *ce; intel_wakeref_t wakeref; + unsigned long index; int srcu, ret; /* + * Ideally the busyness worker should take a gt pm wakeref because the + * worker only needs to be active while gt is awake. However, the + * gt_park path cancels the worker synchronously and this complicates + * the flow if the worker is also running at the same time. The cancel + * waits for the worker and when the worker releases the wakeref, that + * would call gt_park and would lead to a deadlock. + * + * The resolution is to take the global pm wakeref if runtime pm is + * already active. If not, we don't need to update the busyness stats as + * the stats would already be updated when the gt was parked. + * + * Note: + * - We do not requeue the worker if we cannot take a reference to runtime + * pm since intel_guc_busyness_unpark would requeue the worker in the + * resume path. + * + * - If the gt was parked longer than time taken for GT timestamp to roll + * over, we ignore those rollovers since we don't care about tracking + * the exact GT time. We only care about roll overs when the gt is + * active and running workloads. + * + * - There is a window of time between gt_park and runtime suspend, + * where the worker may run. This is acceptable since the worker will + * not find any new data to update busyness. + */ + wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); + if (!wakeref) + return; + + /* * Synchronize with gt reset to make sure the worker does not * corrupt the engine/guc stats. NB: can't actually block waiting * for a reset to complete as the reset requires flushing out @@ -1409,19 +1566,27 @@ static void guc_timestamp_ping(struct work_struct *wrk) */ ret = intel_gt_reset_trylock(gt, &srcu); if (ret) - return; + goto err_trylock; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) - __update_guc_busyness_stats(guc); + __update_guc_busyness_stats(guc); + + /* adjust context stats for overflow */ + xa_for_each(&guc->context_lookup, index, ce) + guc_context_update_stats(ce); intel_gt_reset_unlock(gt, srcu); - mod_delayed_work(system_highpri_wq, &guc->timestamp.work, - guc->timestamp.ping_delay); + guc_enable_busyness_worker(guc); + +err_trylock: + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } static int guc_action_enable_usage_stats(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; u32 offset = intel_guc_engine_usage_offset(guc); u32 action[] = { INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, @@ -1429,38 +1594,50 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc) 0, }; + for_each_engine(engine, gt, id) + __set_engine_usage_record(engine, 0, 0xffffffff, 0); + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static void guc_init_engine_stats(struct intel_guc *guc) +static int guc_init_engine_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); intel_wakeref_t wakeref; + int ret; - mod_delayed_work(system_highpri_wq, &guc->timestamp.work, - guc->timestamp.ping_delay); + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + ret = guc_action_enable_usage_stats(guc); - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { - int ret = guc_action_enable_usage_stats(guc); + if (ret) + guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); + else + guc_enable_busyness_worker(guc); - if (ret) - guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); - } + return ret; +} + +static void guc_fini_engine_stats(struct intel_guc *guc) +{ + guc_cancel_busyness_worker(guc); } void intel_guc_busyness_park(struct intel_gt *gt) { - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); if (!guc_submission_initialized(guc)) return; + /* Assume no engines are running and set running state to false */ + __update_guc_busyness_running_state(guc); + /* * There is a race with suspend flow where the worker runs after suspend * and causes an unclaimed register access warning. Cancel the worker * synchronously here. */ - cancel_delayed_work_sync(&guc->timestamp.work); + guc_cancel_busyness_worker(guc); /* * Before parking, we should sample engine busyness stats if we need to. @@ -1477,7 +1654,7 @@ void intel_guc_busyness_park(struct intel_gt *gt) void intel_guc_busyness_unpark(struct intel_gt *gt) { - struct intel_guc *guc = >->uc.guc; + struct intel_guc *guc = gt_to_guc(gt); unsigned long flags; ktime_t unused; @@ -1487,8 +1664,7 @@ void intel_guc_busyness_unpark(struct intel_gt *gt) spin_lock_irqsave(&guc->timestamp.lock, flags); guc_update_pm_timestamp(guc, &unused); spin_unlock_irqrestore(&guc->timestamp.lock, flags); - mod_delayed_work(system_highpri_wq, &guc->timestamp.work, - guc->timestamp.ping_delay); + guc_enable_busyness_worker(guc); } static inline bool @@ -1539,6 +1715,11 @@ static void guc_flush_submissions(struct intel_guc *guc) spin_unlock_irqrestore(&sched_engine->lock, flags); } +void intel_guc_submission_flush_work(struct intel_guc *guc) +{ + flush_work(&guc->submission_state.destroyed_worker); +} + static void guc_flush_destroyed_contexts(struct intel_guc *guc); void intel_guc_submission_reset_prepare(struct intel_guc *guc) @@ -1557,6 +1738,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_lock_irq(guc_to_gt(guc)->irq_lock); spin_unlock_irq(guc_to_gt(guc)->irq_lock); + /* Flush tasklet */ + tasklet_disable(&guc->ct.receive_tasklet); + tasklet_enable(&guc->ct.receive_tasklet); + guc_flush_submissions(guc); guc_flush_destroyed_contexts(guc); flush_work(&guc->ct.requests.worker); @@ -1615,16 +1800,14 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) static void guc_engine_reset_prepare(struct intel_engine_cs *engine) { - if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) - return; - - intel_engine_stop_cs(engine); - /* * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - intel_engine_wait_for_pending_mi_fw(engine); + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { + intel_engine_stop_cs(engine); + intel_engine_wait_for_pending_mi_fw(engine); + } } static void guc_reset_nop(struct intel_engine_cs *engine) @@ -1727,6 +1910,20 @@ next_context: intel_context_put(parent); } +void wake_up_all_tlb_invalidate(struct intel_guc *guc) +{ + struct intel_guc_tlb_wait *wait; + unsigned long i; + + if (!intel_guc_tlb_invalidation_is_available(guc)) + return; + + xa_lock_irq(&guc->tlb_lookup); + xa_for_each(&guc->tlb_lookup, i, wait) + wake_up(&wait->wq); + xa_unlock_irq(&guc->tlb_lookup); +} + void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) { struct intel_context *ce; @@ -1852,33 +2049,99 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) /* GuC is blown away, drop all references to contexts */ xa_destroy(&guc->context_lookup); + + /* + * Wedged GT won't respond to any TLB invalidation request. Simply + * release all the blocked waiters. + */ + wake_up_all_tlb_invalidate(guc); } void intel_guc_submission_reset_finish(struct intel_guc *guc) { + int outstanding; + /* Reset called during driver load or during wedge? */ if (unlikely(!guc_submission_initialized(guc) || + !intel_guc_is_fw_running(guc) || intel_gt_is_wedged(guc_to_gt(guc)))) { return; } /* * Technically possible for either of these values to be non-zero here, - * but very unlikely + harmless. Regardless let's add a warn so we can + * but very unlikely + harmless. Regardless let's add an error so we can * see in CI if this happens frequently / a precursor to taking down the * machine. */ - GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); + outstanding = atomic_read(&guc->outstanding_submission_g2h); + if (outstanding) + guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n", + outstanding); atomic_set(&guc->outstanding_submission_g2h, 0); intel_guc_global_policies_update(guc); enable_submission(guc); intel_gt_unpark_heartbeats(guc_to_gt(guc)); + + /* + * The full GT reset will have cleared the TLB caches and flushed the + * G2H message queue; we can release all the blocked waiters. + */ + wake_up_all_tlb_invalidate(guc); } static void destroyed_worker_func(struct work_struct *w); static void reset_fail_worker_func(struct work_struct *w); +bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc) +{ + return HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915) && + intel_guc_is_ready(guc); +} + +static int init_tlb_lookup(struct intel_guc *guc) +{ + struct intel_guc_tlb_wait *wait; + int err; + + if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) + return 0; + + xa_init_flags(&guc->tlb_lookup, XA_FLAGS_ALLOC); + + wait = kzalloc(sizeof(*wait), GFP_KERNEL); + if (!wait) + return -ENOMEM; + + init_waitqueue_head(&wait->wq); + + /* Preallocate a shared id for use under memory pressure. */ + err = xa_alloc_cyclic_irq(&guc->tlb_lookup, &guc->serial_slot, wait, + xa_limit_32b, &guc->next_seqno, GFP_KERNEL); + if (err < 0) { + kfree(wait); + return err; + } + + return 0; +} + +static void fini_tlb_lookup(struct intel_guc *guc) +{ + struct intel_guc_tlb_wait *wait; + + if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) + return; + + wait = xa_load(&guc->tlb_lookup, guc->serial_slot); + if (wait && wait->busy) + guc_err(guc, "Unexpected busy item in tlb_lookup on fini\n"); + kfree(wait); + + xa_destroy(&guc->tlb_lookup); +} + /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -1897,11 +2160,15 @@ int intel_guc_submission_init(struct intel_guc *guc) return ret; } + ret = init_tlb_lookup(guc); + if (ret) + goto destroy_pool; + guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); if (!guc->submission_state.guc_ids_bitmap) { ret = -ENOMEM; - goto destroy_pool; + goto destroy_tlb; } guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; @@ -1910,9 +2177,10 @@ int intel_guc_submission_init(struct intel_guc *guc) return 0; +destroy_tlb: + fini_tlb_lookup(guc); destroy_pool: guc_lrc_desc_pool_destroy_v69(guc); - return ret; } @@ -1921,10 +2189,12 @@ void intel_guc_submission_fini(struct intel_guc *guc) if (!guc->submission_initialized) return; + guc_fini_engine_stats(guc); guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy_v69(guc); i915_sched_engine_put(guc->sched_engine); bitmap_free(guc->submission_state.guc_ids_bitmap); + fini_tlb_lookup(guc); guc->submission_initialized = false; } @@ -1978,7 +2248,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) static void guc_submit_request(struct i915_request *rq) { struct i915_sched_engine *sched_engine = rq->engine->sched_engine; - struct intel_guc *guc = &rq->engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(rq->engine->gt); unsigned long flags; /* Will be called from irq-context when using foreign fences. */ @@ -2004,11 +2274,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) order_base_2(ce->parallel.number_children + 1)); else - ret = ida_simple_get(&guc->submission_state.guc_ids, - NUMBER_MULTI_LRC_GUC_ID(guc), - guc->submission_state.num_guc_ids, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | - __GFP_NOWARN); + ret = ida_alloc_range(&guc->submission_state.guc_ids, + NUMBER_MULTI_LRC_GUC_ID(guc), + guc->submission_state.num_guc_ids - 1, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(ret < 0)) return ret; @@ -2031,8 +2300,8 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) + 1)); } else { --guc->submission_state.guc_ids_in_use; - ida_simple_remove(&guc->submission_state.guc_ids, - ce->guc_id.id); + ida_free(&guc->submission_state.guc_ids, + ce->guc_id.id); } clr_ctx_id_mapping(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); @@ -2429,6 +2698,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) +MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY) #undef MAKE_CONTEXT_POLICY_ADD @@ -2444,10 +2714,11 @@ static int __guc_context_set_context_policies(struct intel_guc *guc, static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); struct context_policy policy; u32 execution_quantum; u32 preemption_timeout; + u32 slpc_ctx_freq_req = 0; unsigned long flags; int ret; @@ -2459,11 +2730,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) execution_quantum = engine->props.timeslice_duration_ms * 1000; preemption_timeout = engine->props.preempt_timeout_ms * 1000; + if (ce->flags & BIT(CONTEXT_LOW_LATENCY)) + slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; + __guc_context_policy_start_klv(&policy, ce->guc_id.id); __guc_context_policy_add_priority(&policy, ce->guc_state.prio); __guc_context_policy_add_execution_quantum(&policy, execution_quantum); __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req); if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) __guc_context_policy_add_preempt_to_idle(&policy, 1); @@ -2520,7 +2795,7 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) static void prepare_context_registration_info_v69(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u32 ctx_id = ce->guc_id.id; struct guc_lrc_desc_v69 *desc; struct intel_context *child; @@ -2589,7 +2864,7 @@ static void prepare_context_registration_info_v70(struct intel_context *ce, struct guc_ctxt_registration_info *info) { struct intel_engine_cs *engine = ce->engine; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); u32 ctx_id = ce->guc_id.id; GEM_BUG_ON(!engine->mask); @@ -2626,9 +2901,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce, ce->parallel.guc.wqi_tail = 0; ce->parallel.guc.wqi_head = 0; - wq_desc_offset = i915_ggtt_offset(ce->state) + + wq_desc_offset = (u64)i915_ggtt_offset(ce->state) + __get_parent_scratch_offset(ce); - wq_base_offset = i915_ggtt_offset(ce->state) + + wq_base_offset = (u64)i915_ggtt_offset(ce->state) + __get_wq_offset(ce); info->wq_desc_lo = lower_32_bits(wq_desc_offset); info->wq_desc_hi = upper_32_bits(wq_desc_offset); @@ -2652,7 +2927,7 @@ static int try_context_registration(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); intel_wakeref_t wakeref; u32 ctx_id = ce->guc_id.id; bool context_registered; @@ -2733,7 +3008,7 @@ static int __guc_context_pin(struct intel_context *ce, /* * GuC context gets pinned in guc_request_alloc. See that function for - * explaination of why. + * explanation of why. */ return lrc_pin(ce, engine, vaddr); @@ -2760,6 +3035,7 @@ static void guc_context_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); + __guc_context_update_stats(ce); unpin_guc_id(guc, ce); lrc_unpin(ce); @@ -3130,12 +3406,13 @@ static void guc_context_close(struct intel_context *ce) spin_unlock_irqrestore(&ce->guc_state.lock, flags); } -static inline void guc_lrc_desc_unpin(struct intel_context *ce) +static inline int guc_lrc_desc_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); struct intel_gt *gt = guc_to_gt(guc); unsigned long flags; bool disabled; + int ret; GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); @@ -3146,18 +3423,52 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); disabled = submission_disabled(guc); if (likely(!disabled)) { + /* + * Take a gt-pm ref and change context state to be destroyed. + * NOTE: a G2H IRQ that comes after will put this gt-pm ref back + */ __intel_gt_pm_get(gt); set_context_destroyed(ce); clr_context_registered(ce); } spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { release_guc_id(guc, ce); __guc_context_destroy(ce); - return; + return 0; } - deregister_context(ce, ce->guc_id.id); + /* + * GuC is active, lets destroy this context, but at this point we can still be racing + * with suspend, so we undo everything if the H2G fails in deregister_context so + * that GuC reset will find this context during clean up. + * + * There is a race condition where the reset code could have altered + * this context's state and done a wakeref put before we try to + * deregister it here. So check if the context is still set to be + * destroyed before undoing earlier changes, to avoid two wakeref puts + * on the same context. + */ + ret = deregister_context(ce, ce->guc_id.id); + if (ret) { + bool pending_destroyed; + spin_lock_irqsave(&ce->guc_state.lock, flags); + pending_destroyed = context_destroyed(ce); + if (pending_destroyed) { + set_context_registered(ce); + clr_context_destroyed(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + /* + * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements + * the wakeref immediately but per function spec usage call this after unlock. + */ + if (pending_destroyed) + intel_wakeref_put_async(>->wakeref); + } + + return ret; } static void __guc_context_destroy(struct intel_context *ce) @@ -3225,7 +3536,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc) if (!ce) break; - guc_lrc_desc_unpin(ce); + if (guc_lrc_desc_unpin(ce)) { + /* + * This means GuC's CT link severed mid-way which could happen + * in suspend-resume corner cases. In this case, put the + * context back into the destroyed_contexts list which will + * get picked up on the next context deregistration event or + * purged in a GuC sanitization event (reset/unload/wedged/...). + */ + spin_lock_irqsave(&guc->submission_state.lock, flags); + list_add_tail(&ce->destroyed_link, + &guc->submission_state.destroyed_contexts); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + /* Bail now since the list might never be emptied if h2gs fail */ + break; + } + } } @@ -3234,9 +3560,20 @@ static void destroyed_worker_func(struct work_struct *w) struct intel_guc *guc = container_of(w, struct intel_guc, submission_state.destroyed_worker); struct intel_gt *gt = guc_to_gt(guc); - int tmp; + intel_wakeref_t wakeref; + + /* + * In rare cases we can get here via async context-free fence-signals that + * come very late in suspend flow or very early in resume flows. In these + * cases, GuC won't be ready but just skipping it here is fine as these + * pending-destroy-contexts get destroyed totally at GuC reset time at the + * end of suspend.. OR.. this worker can be picked up later on the next + * context destruction trigger after resume-completes + */ + if (!intel_guc_is_ready(guc)) + return; - with_intel_gt_pm(gt, tmp) + with_intel_gt_pm(gt, wakeref) deregister_destroyed_contexts(guc); } @@ -3441,6 +3778,7 @@ static void remove_from_context(struct i915_request *rq) } static const struct intel_context_ops guc_context_ops = { + .flags = COPS_RUNTIME_CYCLES, .alloc = guc_context_alloc, .close = guc_context_close, @@ -3459,6 +3797,8 @@ static const struct intel_context_ops guc_context_ops = { .sched_disable = guc_context_sched_disable, + .update_stats = guc_context_update_stats, + .reset = lrc_reset, .destroy = guc_context_destroy, @@ -3714,6 +4054,7 @@ static int guc_virtual_context_alloc(struct intel_context *ce) } static const struct intel_context_ops virtual_guc_context_ops = { + .flags = COPS_RUNTIME_CYCLES, .alloc = guc_virtual_context_alloc, .close = guc_context_close, @@ -3731,6 +4072,7 @@ static const struct intel_context_ops virtual_guc_context_ops = { .exit = guc_virtual_context_exit, .sched_disable = guc_context_sched_disable, + .update_stats = guc_context_update_stats, .destroy = guc_context_destroy, @@ -3995,20 +4337,25 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); /* Short circuit function */ - if (prio < I915_PRIORITY_NORMAL || - rq->guc_prio == GUC_PRIO_FINI || - (rq->guc_prio != GUC_PRIO_INIT && - !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) + if (prio < I915_PRIORITY_NORMAL) return; spin_lock(&ce->guc_state.lock); - if (rq->guc_prio != GUC_PRIO_FINI) { - if (rq->guc_prio != GUC_PRIO_INIT) - sub_context_inflight_prio(ce, rq->guc_prio); - rq->guc_prio = new_guc_prio; - add_context_inflight_prio(ce, rq->guc_prio); - update_context_prio(ce); - } + + if (rq->guc_prio == GUC_PRIO_FINI) + goto exit; + + if (!new_guc_prio_higher(rq->guc_prio, new_guc_prio)) + goto exit; + + if (rq->guc_prio != GUC_PRIO_INIT) + sub_context_inflight_prio(ce, rq->guc_prio); + + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + +exit: spin_unlock(&ce->guc_state.lock); } @@ -4102,9 +4449,11 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = guc_submit_request; } -static inline void guc_kernel_context_pin(struct intel_guc *guc, - struct intel_context *ce) +static inline int guc_kernel_context_pin(struct intel_guc *guc, + struct intel_context *ce) { + int ret; + /* * Note: we purposefully do not check the returns below because * the registration can only fail if a reset is just starting. @@ -4112,16 +4461,24 @@ static inline void guc_kernel_context_pin(struct intel_guc *guc, * isn't happening and even it did this code would be run again. */ - if (context_guc_id_invalid(ce)) - pin_guc_id(guc, ce); + if (context_guc_id_invalid(ce)) { + ret = pin_guc_id(guc, ce); + + if (ret < 0) + return ret; + } if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) guc_context_init(ce); - try_context_registration(ce, true); + ret = try_context_registration(ce, true); + if (ret) + unpin_guc_id(guc, ce); + + return ret; } -static inline void guc_init_lrc_mapping(struct intel_guc *guc) +static inline int guc_init_submission(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; @@ -4148,9 +4505,17 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) struct intel_context *ce; list_for_each_entry(ce, &engine->pinned_contexts_list, - pinned_contexts_link) - guc_kernel_context_pin(guc, ce); + pinned_contexts_link) { + int ret = guc_kernel_context_pin(guc, ce); + + if (ret) { + /* No point in trying to clean up as i915 will wedge on failure */ + return ret; + } + } } + + return 0; } static void guc_release(struct intel_engine_cs *engine) @@ -4204,9 +4569,15 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) /* Wa_14014475959:dg2 */ if (engine->class == COMPUTE_CLASS) - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(engine->i915)) - engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT; + + /* Wa_16019325821 */ + /* Wa_14019159160 */ + if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) && + IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) + engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT; /* * TODO: GuC supports timeslicing and semaphores as well, but they're @@ -4217,7 +4588,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) */ engine->emit_bb_start = gen8_emit_bb_start; - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) engine->emit_bb_start = xehp_emit_bb_start; } @@ -4259,7 +4630,7 @@ static void guc_sched_engine_destroy(struct kref *kref) int intel_guc_submission_setup(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); /* * The setup relies on several assumptions (e.g. irqs always enabled) @@ -4393,42 +4764,69 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc) return ret; } -void intel_guc_submission_enable(struct intel_guc *guc) +static void guc_route_semaphores(struct intel_guc *guc, bool to_guc) { struct intel_gt *gt = guc_to_gt(guc); + u32 val; - /* Enable and route to GuC */ - if (GRAPHICS_VER(gt->i915) >= 12) - intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, - GUC_SEM_INTR_ROUTE_TO_GUC | - GUC_SEM_INTR_ENABLE_ALL); + if (GRAPHICS_VER(gt->i915) < 12) + return; - guc_init_lrc_mapping(guc); - guc_init_engine_stats(guc); - guc_init_global_schedule_policy(guc); + if (to_guc) + val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL; + else + val = 0; + + intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val); } -void intel_guc_submission_disable(struct intel_guc *guc) +int intel_guc_submission_enable(struct intel_guc *guc) { - struct intel_gt *gt = guc_to_gt(guc); + int ret; + + /* Semaphore interrupt enable and route to GuC */ + guc_route_semaphores(guc, true); + + ret = guc_init_submission(guc); + if (ret) + goto fail_sem; + + ret = guc_init_engine_stats(guc); + if (ret) + goto fail_sem; - /* Note: By the time we're here, GuC may have already been reset */ + ret = guc_init_global_schedule_policy(guc); + if (ret) + goto fail_stats; + + return 0; + +fail_stats: + guc_fini_engine_stats(guc); +fail_sem: + guc_route_semaphores(guc, false); + return ret; +} + +/* Note: By the time we're here, GuC may have already been reset */ +void intel_guc_submission_disable(struct intel_guc *guc) +{ + guc_cancel_busyness_worker(guc); - /* Disable and route to host */ - if (GRAPHICS_VER(gt->i915) >= 12) - intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0); + /* Semaphore interrupt disable and route to host */ + guc_route_semaphores(guc, false); } static bool __guc_submission_supported(struct intel_guc *guc) { /* GuC submission is unavailable for pre-Gen11 */ return intel_guc_is_supported(guc) && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; + GRAPHICS_VER(guc_to_i915(guc)) >= 11; } static bool __guc_submission_selected(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); if (!intel_guc_submission_is_supported(guc)) return false; @@ -4504,6 +4902,154 @@ g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) return ce; } +static void wait_wake_outstanding_tlb_g2h(struct intel_guc *guc, u32 seqno) +{ + struct intel_guc_tlb_wait *wait; + unsigned long flags; + + xa_lock_irqsave(&guc->tlb_lookup, flags); + wait = xa_load(&guc->tlb_lookup, seqno); + + if (wait) + wake_up(&wait->wq); + else + guc_dbg(guc, + "Stale TLB invalidation response with seqno %d\n", seqno); + + xa_unlock_irqrestore(&guc->tlb_lookup, flags); +} + +int intel_guc_tlb_invalidation_done(struct intel_guc *guc, + const u32 *payload, u32 len) +{ + if (len < 1) + return -EPROTO; + + wait_wake_outstanding_tlb_g2h(guc, payload[0]); + return 0; +} + +static long must_wait_woken(struct wait_queue_entry *wq_entry, long timeout) +{ + /* + * This is equivalent to wait_woken() with the exception that + * we do not wake up early if the kthread task has been completed. + * As we are called from page reclaim in any task context, + * we may be invoked from stopped kthreads, but we *must* + * complete the wait from the HW. + */ + do { + set_current_state(TASK_UNINTERRUPTIBLE); + if (wq_entry->flags & WQ_FLAG_WOKEN) + break; + + timeout = schedule_timeout(timeout); + } while (timeout); + + /* See wait_woken() and woken_wake_function() */ + __set_current_state(TASK_RUNNING); + smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); + + return timeout; +} + +static bool intel_gt_is_enabled(const struct intel_gt *gt) +{ + /* Check if GT is wedged or suspended */ + if (intel_gt_is_wedged(gt) || !intel_irqs_enabled(gt->i915)) + return false; + return true; +} + +static int guc_send_invalidate_tlb(struct intel_guc *guc, + enum intel_guc_tlb_invalidation_type type) +{ + struct intel_guc_tlb_wait _wq, *wq = &_wq; + struct intel_gt *gt = guc_to_gt(guc); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int err; + u32 seqno; + u32 action[] = { + INTEL_GUC_ACTION_TLB_INVALIDATION, + 0, + REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_TYPE_MASK, type) | + REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_MODE_MASK, + INTEL_GUC_TLB_INVAL_MODE_HEAVY) | + INTEL_GUC_TLB_INVAL_FLUSH_CACHE, + }; + u32 size = ARRAY_SIZE(action); + + /* + * Early guard against GT enablement. TLB invalidation should not be + * attempted if the GT is disabled due to suspend/wedge. + */ + if (!intel_gt_is_enabled(gt)) + return -EINVAL; + + init_waitqueue_head(&_wq.wq); + + if (xa_alloc_cyclic_irq(&guc->tlb_lookup, &seqno, wq, + xa_limit_32b, &guc->next_seqno, + GFP_ATOMIC | __GFP_NOWARN) < 0) { + /* Under severe memory pressure? Serialise TLB allocations */ + xa_lock_irq(&guc->tlb_lookup); + wq = xa_load(&guc->tlb_lookup, guc->serial_slot); + wait_event_lock_irq(wq->wq, + !READ_ONCE(wq->busy), + guc->tlb_lookup.xa_lock); + /* + * Update wq->busy under lock to ensure only one waiter can + * issue the TLB invalidation command using the serial slot at a + * time. The condition is set to true before releasing the lock + * so that other caller continue to wait until woken up again. + */ + wq->busy = true; + xa_unlock_irq(&guc->tlb_lookup); + + seqno = guc->serial_slot; + } + + action[1] = seqno; + + add_wait_queue(&wq->wq, &wait); + + /* This is a critical reclaim path and thus we must loop here. */ + err = intel_guc_send_busy_loop(guc, action, size, G2H_LEN_DW_INVALIDATE_TLB, true); + if (err) + goto out; + + /* + * Late guard against GT enablement. It is not an error for the TLB + * invalidation to time out if the GT is disabled during the process + * due to suspend/wedge. In fact, the TLB invalidation is cancelled + * in this case. + */ + if (!must_wait_woken(&wait, intel_guc_ct_max_queue_time_jiffies()) && + intel_gt_is_enabled(gt)) { + guc_err(guc, + "TLB invalidation response timed out for seqno %u\n", seqno); + err = -ETIME; + } +out: + remove_wait_queue(&wq->wq, &wait); + if (seqno != guc->serial_slot) + xa_erase_irq(&guc->tlb_lookup, seqno); + + return err; +} + +/* Send a H2G command to invalidate the TLBs at engine level and beyond. */ +int intel_guc_invalidate_tlb_engines(struct intel_guc *guc) +{ + return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_ENGINES); +} + +/* Send a H2G command to invalidate the GuC's internal TLB. */ +int intel_guc_invalidate_tlb_guc(struct intel_guc *guc) +{ + return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_GUC); +} + int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) @@ -4545,7 +5091,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, intel_context_put(ce); } else if (context_destroyed(ce)) { /* Context has been destroyed */ - intel_gt_pm_put_async(guc_to_gt(guc)); + intel_gt_pm_put_async_untracked(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } @@ -4638,13 +5184,38 @@ static void capture_error_state(struct intel_guc *guc, { struct intel_gt *gt = guc_to_gt(guc); struct drm_i915_private *i915 = gt->i915; - struct intel_engine_cs *engine = __context_to_physical_engine(ce); intel_wakeref_t wakeref; + intel_engine_mask_t engine_mask; + + if (intel_engine_is_virtual(ce->engine)) { + struct intel_engine_cs *e; + intel_engine_mask_t tmp, virtual_mask = ce->engine->mask; + + engine_mask = 0; + for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) { + bool match = intel_guc_capture_is_matching_engine(gt, ce, e); + + if (match) { + intel_engine_set_hung_context(e, ce); + engine_mask |= e->mask; + i915_increase_reset_engine_count(&i915->gpu_error, + e); + } + } + + if (!engine_mask) { + guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s", + ce->guc_id.id, ce->engine->name); + engine_mask = ~0U; + } + } else { + intel_engine_set_hung_context(ce->engine, ce); + engine_mask = ce->engine->mask; + i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); + } - intel_engine_set_hung_context(engine, ce); with_intel_runtime_pm(&i915->runtime_pm, wakeref) - i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); - atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); + i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); } static void guc_context_replay(struct intel_context *ce) @@ -4658,18 +5229,19 @@ static void guc_context_replay(struct intel_context *ce) static void guc_handle_context_reset(struct intel_guc *guc, struct intel_context *ce) { + bool capture = intel_context_is_schedulable(ce); + trace_intel_context_reset(ce); - drm_dbg(&guc_to_gt(guc)->i915->drm, "Got GuC reset of 0x%04X, exiting = %d, banned = %d\n", - ce->guc_id.id, test_bit(CONTEXT_EXITING, &ce->flags), - test_bit(CONTEXT_BANNED, &ce->flags)); + guc_dbg(guc, "%s context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n", + capture ? "Got" : "Ignoring", + ce->guc_id.id, ce->engine->name, + str_yes_no(intel_context_is_exiting(ce)), + str_yes_no(intel_context_is_banned(ce))); - if (likely(intel_context_is_schedulable(ce))) { + if (capture) { capture_error_state(guc, ce); guc_context_replay(ce); - } else { - guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s", - ce->guc_id.id, ce->engine->name); } } @@ -4817,7 +5389,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, void intel_guc_find_hung_context(struct intel_engine_cs *engine) { - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); struct intel_context *ce; struct i915_request *rq; unsigned long index; @@ -4879,7 +5451,7 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine, struct i915_request *hung_rq, struct drm_printer *m) { - struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_guc *guc = gt_to_guc(engine->gt); struct intel_context *ce; unsigned long index; unsigned long flags; @@ -4971,12 +5543,20 @@ static inline void guc_log_context(struct drm_printer *p, { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); + if (intel_context_pin_if_active(ce)) { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + intel_context_unpin(ce); + } else { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n", + ce->ring->head); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n", + ce->ring->tail); + } drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", @@ -5331,7 +5911,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, if (!ve) return ERR_PTR(-ENOMEM); - guc = &siblings[0]->gt->uc.guc; + guc = gt_to_guc(siblings[0]->gt); ve->base.i915 = siblings[0]->i915; ve->base.gt = siblings[0]->gt; @@ -5355,6 +5935,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, ve->base.flags = I915_ENGINE_IS_VIRTUAL; + BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); + ve->base.mask = VIRTUAL_ENGINES; + intel_context_init(&ve->context, &ve->base); for (n = 0; n < count; n++) { |
