diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc_submit.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 1728 |
1 files changed, 1372 insertions, 356 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index e4e3658e6a13..ed7be50b2f72 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -10,10 +10,12 @@ #include <linux/circ_buf.h> #include <linux/delay.h> #include <linux/dma-fence-array.h> +#include <linux/math64.h> #include <drm/drm_managed.h> #include "abi/guc_actions_abi.h" +#include "abi/guc_actions_slpc_abi.h" #include "abi/guc_klvs_abi.h" #include "regs/xe_lrc_layout.h" #include "xe_assert.h" @@ -23,11 +25,14 @@ #include "xe_force_wake.h" #include "xe_gpu_scheduler.h" #include "xe_gt.h" +#include "xe_gt_clock.h" #include "xe_gt_printk.h" #include "xe_guc.h" +#include "xe_guc_capture.h" #include "xe_guc_ct.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_id_mgr.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" @@ -35,9 +40,11 @@ #include "xe_macros.h" #include "xe_map.h" #include "xe_mocs.h" +#include "xe_pm.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" +#include "xe_uc_fw.h" #include "xe_vm.h" static struct xe_guc * @@ -52,13 +59,19 @@ exec_queue_to_guc(struct xe_exec_queue *q) * engine done being processed). */ #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) -#define ENGINE_STATE_ENABLED (1 << 1) -#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) +#define EXEC_QUEUE_STATE_ENABLED (1 << 1) +#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) -#define ENGINE_STATE_SUSPENDED (1 << 5) -#define EXEC_QUEUE_STATE_RESET (1 << 6) -#define ENGINE_STATE_KILLED (1 << 7) +#define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) +#define EXEC_QUEUE_STATE_RESET (1 << 6) +#define EXEC_QUEUE_STATE_KILLED (1 << 7) +#define EXEC_QUEUE_STATE_WEDGED (1 << 8) +#define EXEC_QUEUE_STATE_BANNED (1 << 9) +#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) +#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) +#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12) +#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13) static bool exec_queue_registered(struct xe_exec_queue *q) { @@ -77,17 +90,17 @@ static void clear_exec_queue_registered(struct xe_exec_queue *q) static bool exec_queue_enabled(struct xe_exec_queue *q) { - return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; } static void set_exec_queue_enabled(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); + atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); } static void clear_exec_queue_enabled(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); + atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); } static bool exec_queue_pending_enable(struct xe_exec_queue *q) @@ -130,29 +143,34 @@ static void set_exec_queue_destroyed(struct xe_exec_queue *q) atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); } +static void clear_exec_queue_destroyed(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); +} + static bool exec_queue_banned(struct xe_exec_queue *q) { - return (q->flags & EXEC_QUEUE_FLAG_BANNED); + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; } static void set_exec_queue_banned(struct xe_exec_queue *q) { - q->flags |= EXEC_QUEUE_FLAG_BANNED; + atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); } static bool exec_queue_suspended(struct xe_exec_queue *q) { - return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; } static void set_exec_queue_suspended(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); + atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); } static void clear_exec_queue_suspended(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); + atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); } static bool exec_queue_reset(struct xe_exec_queue *q) @@ -167,77 +185,124 @@ static void set_exec_queue_reset(struct xe_exec_queue *q) static bool exec_queue_killed(struct xe_exec_queue *q) { - return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; } static void set_exec_queue_killed(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_KILLED, &q->guc->state); + atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); } -static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) +static bool exec_queue_wedged(struct xe_exec_queue *q) { - return exec_queue_killed(q) || exec_queue_banned(q); + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; } -#ifdef CONFIG_PROVE_LOCKING -static int alloc_submit_wq(struct xe_guc *guc) +static void set_exec_queue_wedged(struct xe_exec_queue *q) { - int i; + atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); +} - for (i = 0; i < NUM_SUBMIT_WQ; ++i) { - guc->submission_state.submit_wq_pool[i] = - alloc_ordered_workqueue("submit_wq", 0); - if (!guc->submission_state.submit_wq_pool[i]) - goto err_free; - } +static bool exec_queue_check_timeout(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; +} - return 0; +static void set_exec_queue_check_timeout(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); +} -err_free: - while (i) - destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); +static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); +} - return -ENOMEM; +static bool exec_queue_extra_ref(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; } -static void free_submit_wq(struct xe_guc *guc) +static void set_exec_queue_extra_ref(struct xe_exec_queue *q) { - int i; + atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); +} + +static void clear_exec_queue_extra_ref(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); +} - for (i = 0; i < NUM_SUBMIT_WQ; ++i) - destroy_workqueue(guc->submission_state.submit_wq_pool[i]); +static bool exec_queue_pending_resume(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; } -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) +static void set_exec_queue_pending_resume(struct xe_exec_queue *q) { - int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; + atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); +} - return guc->submission_state.submit_wq_pool[idx]; +static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); } -#else -static int alloc_submit_wq(struct xe_guc *guc) + +static bool exec_queue_pending_tdr_exit(struct xe_exec_queue *q) { - return 0; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT; } -static void free_submit_wq(struct xe_guc *guc) +static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) { + atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); +} +static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); } -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) +static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) { - return NULL; + return (atomic_read(&q->guc->state) & + (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | + EXEC_QUEUE_STATE_BANNED)); } -#endif static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = wait_event_timeout(guc->submission_state.fini_wq, + xa_empty(&guc->submission_state.exec_queue_lookup), + HZ * 5); + + drain_workqueue(xe->destroy_wq); + + xe_gt_assert(gt, ret); xa_destroy(&guc->submission_state.exec_queue_lookup); - free_submit_wq(guc); +} + +static void guc_submit_wedged_fini(void *arg) +{ + struct xe_guc *guc = arg; + struct xe_exec_queue *q; + unsigned long index; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + if (exec_queue_wedged(q)) { + mutex_unlock(&guc->submission_state.lock); + xe_exec_queue_put(q); + mutex_lock(&guc->submission_state.lock); + } + } + mutex_unlock(&guc->submission_state.lock); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -250,13 +315,25 @@ static void primelockdep(struct xe_guc *guc) fs_reclaim_acquire(GFP_KERNEL); mutex_lock(&guc->submission_state.lock); - might_lock(&guc->submission_state.suspend.lock); mutex_unlock(&guc->submission_state.lock); fs_reclaim_release(GFP_KERNEL); } -int xe_guc_submit_init(struct xe_guc *guc) +/** + * xe_guc_submit_init() - Initialize GuC submission. + * @guc: the &xe_guc to initialize + * @num_ids: number of GuC context IDs to use + * + * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all + * GuC context IDs supported by the GuC firmware should be used for submission. + * + * Only VF drivers will have to provide explicit number of GuC context IDs + * that they can use for submission. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); @@ -266,11 +343,7 @@ int xe_guc_submit_init(struct xe_guc *guc) if (err) return err; - err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0); - if (err) - return err; - - err = alloc_submit_wq(guc); + err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); if (err) return err; @@ -278,14 +351,80 @@ int xe_guc_submit_init(struct xe_guc *guc) xa_init(&guc->submission_state.exec_queue_lookup); - spin_lock_init(&guc->submission_state.suspend.lock); - guc->submission_state.suspend.context = dma_fence_context_alloc(1); + init_waitqueue_head(&guc->submission_state.fini_wq); primelockdep(guc); + guc->submission_state.initialized = true; + return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } +/* + * Given that we want to guarantee enough RCS throughput to avoid missing + * frames, we set the yield policy to 20% of each 80ms interval. + */ +#define RC_YIELD_DURATION 80 /* in ms */ +#define RC_YIELD_RATIO 20 /* in percent */ +static u32 *emit_render_compute_yield_klv(u32 *emit) +{ + *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); + *emit++ = RC_YIELD_DURATION; + *emit++ = RC_YIELD_RATIO; + + return emit; +} + +#define SCHEDULING_POLICY_MAX_DWORDS 16 +static int guc_init_global_schedule_policy(struct xe_guc *guc) +{ + u32 data[SCHEDULING_POLICY_MAX_DWORDS]; + u32 *emit = data; + u32 count = 0; + int ret; + + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) + return 0; + + *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; + + if (CCS_MASK(guc_to_gt(guc))) + emit = emit_render_compute_yield_klv(emit); + + count = emit - data; + if (count > 1) { + xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); + + ret = xe_guc_ct_send_block(&guc->ct, data, count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC scheduling policies: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + +int xe_guc_submit_enable(struct xe_guc *guc) +{ + int ret; + + ret = guc_init_global_schedule_policy(guc); + if (ret) + return ret; + + guc->submission_state.enabled = true; + + return 0; +} + +void xe_guc_submit_disable(struct xe_guc *guc) +{ + guc->submission_state.enabled = false; +} + static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) { int i; @@ -297,12 +436,14 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa xe_guc_id_mgr_release_locked(&guc->submission_state.idm, q->guc->id, q->width); + + if (xa_empty(&guc->submission_state.exec_queue_lookup)) + wake_up(&guc->submission_state.fini_wq); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) { int ret; - void *ptr; int i; /* @@ -322,12 +463,10 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) q->guc->id = ret; for (i = 0; i < q->width; ++i) { - ptr = xa_store(&guc->submission_state.exec_queue_lookup, - q->guc->id + i, q, GFP_NOWAIT); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); + ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, + q->guc->id + i, q, GFP_NOWAIT)); + if (ret) goto err_release; - } } return 0; @@ -384,6 +523,7 @@ static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) +MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) #undef MAKE_EXEC_QUEUE_POLICY_ADD static const int xe_exec_queue_prio_to_guc[] = { @@ -396,17 +536,22 @@ static const int xe_exec_queue_prio_to_guc[] = { static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) { struct exec_queue_policy policy; - struct xe_device *xe = guc_to_xe(guc); enum xe_exec_queue_priority prio = q->sched_props.priority; u32 timeslice_us = q->sched_props.timeslice_us; + u32 slpc_exec_queue_freq_req = 0; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - xe_assert(xe, exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + + if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) + slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; __guc_exec_queue_policy_start_klv(&policy, q->guc->id); __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); + __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, + slpc_exec_queue_freq_req); xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, __guc_exec_queue_policy_action_size(&policy), 0, 0); @@ -430,17 +575,16 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ field_, val_) -static void __register_mlrc_engine(struct xe_guc *guc, - struct xe_exec_queue *q, - struct guc_ctxt_registration_info *info) +static void __register_mlrc_exec_queue(struct xe_guc *guc, + struct xe_exec_queue *q, + struct guc_ctxt_registration_info *info) { #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) - struct xe_device *xe = guc_to_xe(guc); u32 action[MAX_MLRC_REG_SIZE]; int len = 0; int i; - xe_assert(xe, xe_exec_queue_is_parallel(q)); + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; action[len++] = info->flags; @@ -457,20 +601,29 @@ static void __register_mlrc_engine(struct xe_guc *guc, action[len++] = info->hwlrca_hi; for (i = 1; i < q->width; ++i) { - struct xe_lrc *lrc = q->lrc + i; + struct xe_lrc *lrc = q->lrc[i]; action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); } - xe_assert(xe, len <= MAX_MLRC_REG_SIZE); + /* explicitly checks some fields that we might fixup later */ + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); + xe_gt_assert(guc_to_gt(guc), q->width == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); + xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); #undef MAX_MLRC_REG_SIZE xe_guc_ct_send(&guc->ct, action, len, 0, 0); } -static void __register_engine(struct xe_guc *guc, - struct guc_ctxt_registration_info *info) +static void __register_exec_queue(struct xe_guc *guc, + struct guc_ctxt_registration_info *info) { u32 action[] = { XE_GUC_ACTION_REGISTER_CONTEXT, @@ -487,17 +640,26 @@ static void __register_engine(struct xe_guc *guc, info->hwlrca_hi, }; + /* explicitly checks some fields that we might fixup later */ + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } -static void register_engine(struct xe_exec_queue *q) +static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct xe_lrc *lrc = q->lrc; + struct xe_lrc *lrc = q->lrc[0]; struct guc_ctxt_registration_info info; - xe_assert(xe, !exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); memset(&info, 0, sizeof(info)); info.context_idx = q->guc->id; @@ -505,7 +667,8 @@ static void register_engine(struct xe_exec_queue *q) info.engine_submit_mask = q->logical_mask; info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); - info.flags = CONTEXT_REGISTRATION_FLAG_KMD; + info.flags = CONTEXT_REGISTRATION_FLAG_KMD | + FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); if (xe_exec_queue_is_parallel(q)) { u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); @@ -538,9 +701,9 @@ static void register_engine(struct xe_exec_queue *q) set_exec_queue_registered(q); trace_xe_exec_queue_register(q); if (xe_exec_queue_is_parallel(q)) - __register_mlrc_engine(guc, q, &info); + __register_mlrc_exec_queue(guc, q, &info); else - __register_engine(guc, &info); + __register_exec_queue(guc, &info); init_policies(guc, q); } @@ -549,16 +712,21 @@ static u32 wq_space_until_wrap(struct xe_exec_queue *q) return (WQ_SIZE - q->guc->wqi_tail); } +static bool vf_recovery(struct xe_guc *guc) +{ + return xe_gt_recovery_pending(guc_to_gt(guc)); +} + static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); unsigned int sleep_period_ms = 1; #define AVAILABLE_SPACE \ CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) - if (wqi_size > AVAILABLE_SPACE) { + if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { try_again: q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); if (wqi_size > AVAILABLE_SPACE) { @@ -581,13 +749,13 @@ static int wq_noop_append(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; if (wq_wait_for_space(q, wq_space_until_wrap(q))) return -ENODEV; - xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); + xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | @@ -601,7 +769,7 @@ static void wq_item_append(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); @@ -617,48 +785,50 @@ static void wq_item_append(struct xe_exec_queue *q) wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | FIELD_PREP(WQ_LEN_MASK, len_dw); - wqi[i++] = xe_lrc_descriptor(q->lrc); + wqi[i++] = xe_lrc_descriptor(q->lrc[0]); wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | - FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64)); + FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); wqi[i++] = 0; for (j = 1; j < q->width; ++j) { - struct xe_lrc *lrc = q->lrc + j; + struct xe_lrc *lrc = q->lrc[j]; wqi[i++] = lrc->ring.tail / sizeof(u64); } - xe_assert(xe, i == wqi_size / sizeof(u32)); + xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[q->guc->wqi_tail / sizeof(u32)])); xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); q->guc->wqi_tail += wqi_size; - xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); + xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); xe_device_wmb(xe); - map = xe_lrc_parallel_map(q->lrc); + map = xe_lrc_parallel_map(q->lrc[0]); parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); } #define RESUME_PENDING ~0x0ull -static void submit_exec_queue(struct xe_exec_queue *q) +static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) { struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_lrc *lrc = q->lrc; + struct xe_lrc *lrc = q->lrc[0]; u32 action[3]; u32 g2h_len = 0; u32 num_g2h = 0; int len = 0; bool extra_submit = false; - xe_assert(xe, exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - if (xe_exec_queue_is_parallel(q)) - wq_item_append(q); - else - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + if (!job->restore_replay || job->last_replay) { + if (xe_exec_queue_is_parallel(q)) + wq_item_append(q); + else + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); + job->last_replay = false; + } if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) return; @@ -700,30 +870,33 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) struct xe_sched_job *job = to_xe_sched_job(drm_job); struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - bool lr = xe_exec_queue_is_lr(q); + bool lr = xe_exec_queue_is_lr(q), killed_or_banned_or_wedged = + exec_queue_killed_or_banned_or_wedged(q); - xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || - exec_queue_banned(q) || exec_queue_suspended(q)); + xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || + exec_queue_banned(q) || exec_queue_suspended(q)); trace_xe_sched_job_run(job); - if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { + if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) - register_engine(q); - if (!lr) /* LR jobs are emitted in the exec IOCTL */ + register_exec_queue(q, GUC_CONTEXT_NORMAL); + if (!job->restore_replay) q->ring_ops->emit_job(job); - submit_exec_queue(q); + submit_exec_queue(q, job); + job->restore_replay = false; } - if (lr) { - xe_sched_job_set_error(job, -EOPNOTSUPP); - return NULL; - } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { - return job->fence; - } else { - return dma_fence_get(job->fence); - } + /* + * We don't care about job-fence ordering in LR VMs because these fences + * are never exported; they are used solely to keep jobs on the pending + * list. Once a queue enters an error state, there's no need to track + * them. + */ + if (killed_or_banned_or_wedged && lr) + xe_sched_job_set_error(job, -ECANCELED); + + return job->fence; } static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) @@ -734,7 +907,7 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) xe_sched_job_put(job); } -static int guc_read_stopped(struct xe_guc *guc) +int xe_guc_read_stopped(struct xe_guc *guc) { return atomic_read(&guc->submission_state.stopped); } @@ -750,20 +923,24 @@ static void disable_scheduling_deregister(struct xe_guc *guc, struct xe_exec_queue *q) { MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); - struct xe_device *xe = guc_to_xe(guc); int ret; set_min_preemption_timeout(guc, q); smp_rmb(); - ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret) { + ret = wait_event_timeout(guc->ct.wq, + (!exec_queue_pending_enable(q) && + !exec_queue_pending_disable(q)) || + xe_guc_read_stopped(guc) || + vf_recovery(guc), + HZ * 5); + if (!ret && !vf_recovery(guc)) { struct xe_gpu_scheduler *sched = &q->guc->sched; - drm_warn(&xe->drm, "Pending enable failed to respond"); + xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(sched); + if (!xe_exec_queue_is_lr(q)) + xe_sched_tdr_queue_imm(sched); return; } @@ -781,55 +958,6 @@ static void disable_scheduling_deregister(struct xe_guc *guc, G2H_LEN_DW_DEREGISTER_CONTEXT, 2); } -static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p); - -#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) -static void simple_error_capture(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_err_printer(&xe->drm, NULL); - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 adj_logical_mask = q->logical_mask; - u32 width_mask = (0x1 << q->width) - 1; - int i; - bool cookie; - - if (q->vm && !q->vm->error_capture.capture_once) { - q->vm->error_capture.capture_once = true; - cookie = dma_fence_begin_signalling(); - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { - if (adj_logical_mask & BIT(i)) { - adj_logical_mask |= width_mask << i; - i += q->width; - } else { - ++i; - } - } - - if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL)) - xe_gt_info(guc_to_gt(guc), - "failed to get forcewake for error capture"); - xe_guc_ct_print(&guc->ct, &p, true); - guc_exec_queue_print(q, &p); - for_each_hw_engine(hwe, guc_to_gt(guc), id) { - if (hwe->class != q->hwe->class || - !(BIT(hwe->logical_instance) & adj_logical_mask)) - continue; - xe_hw_engine_print(hwe, &p); - } - xe_analyze_vm(&p, q->vm, q->gt->info.id); - xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); - dma_fence_end_signalling(cookie); - } -} -#else -static void simple_error_capture(struct xe_exec_queue *q) -{ -} -#endif - static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); @@ -844,18 +972,79 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) xe_sched_tdr_queue_imm(&q->guc->sched); } +/** + * xe_guc_submit_wedge() - Wedge GuC submission + * @guc: the GuC object + * + * Save exec queue's registered with GuC state by taking a ref to each queue. + * Register a DRMM handler to drop refs upon driver unload. + */ +void xe_guc_submit_wedge(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_exec_queue *q; + unsigned long index; + int err; + + xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); + + /* + * If device is being wedged even before submission_state is + * initialized, there's nothing to do here. + */ + if (!guc->submission_state.initialized) + return; + + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, + guc_submit_wedged_fini, guc); + if (err) { + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " + "Although device is wedged.\n"); + return; + } + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (xe_exec_queue_get_unless_zero(q)) + set_exec_queue_wedged(q); + mutex_unlock(&guc->submission_state.lock); +} + +static bool guc_submit_hint_wedged(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + + if (xe->wedged.mode != 2) + return false; + + if (xe_device_wedged(xe)) + return true; + + xe_device_declare_wedged(xe); + + return true; +} + static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) { struct xe_guc_exec_queue *ge = container_of(w, struct xe_guc_exec_queue, lr_tdr); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &ge->sched; + struct xe_sched_job *job; + bool wedged = false; + + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); + + if (vf_recovery(guc)) + return; - xe_assert(xe, xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -870,7 +1059,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) * xe_guc_deregister_done_handler() which treats it as an unexpected * state. */ - if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { + if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { struct xe_guc *guc = exec_queue_to_guc(q); int ret; @@ -883,16 +1072,147 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) */ ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_disable(q) || - guc_read_stopped(guc), HZ * 5); + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if (vf_recovery(guc)) + return; + if (!ret) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); + xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", + q->guc->id); + xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n", + q->guc->id); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); return; } } + if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) + xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id); + + xe_hw_fence_irq_stop(q->fence_irq); + xe_sched_submission_start(sched); + + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) + xe_sched_job_set_error(job, -ECANCELED); + spin_unlock(&sched->base.job_list_lock); + + xe_hw_fence_irq_start(q->fence_irq); +} + +#define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) + +static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) +{ + struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); + u32 ctx_timestamp, ctx_job_timestamp; + u32 timeout_ms = q->sched_props.job_timeout_ms; + u32 diff; + u64 running_time_ms; + + if (!xe_sched_job_started(job)) { + xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id); + + return xe_sched_invalidate_job(job, 2); + } + + ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); + ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); + + /* + * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch + * possible overflows with a high timeout. + */ + xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); + + diff = ctx_timestamp - ctx_job_timestamp; + + /* + * Ensure timeout is within 5% to account for an GuC scheduling latency + */ + running_time_ms = + ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); + + xe_gt_dbg(gt, + "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, running_time_ms, timeout_ms, diff); + + return running_time_ms >= timeout_ms; +} + +static void enable_scheduling(struct xe_exec_queue *q) +{ + MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); + struct xe_guc *guc = exec_queue_to_guc(q); + int ret; + + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); + + set_exec_queue_pending_enable(q); + set_exec_queue_enabled(q); + trace_xe_exec_queue_scheduling_enable(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + + ret = wait_event_timeout(guc->ct.wq, + !exec_queue_pending_enable(q) || + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { + xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); + set_exec_queue_banned(q); + xe_gt_reset_async(q->gt); + if (!xe_exec_queue_is_lr(q)) + xe_sched_tdr_queue_imm(&q->guc->sched); + } +} + +static void disable_scheduling(struct xe_exec_queue *q, bool immediate) +{ + MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); + struct xe_guc *guc = exec_queue_to_guc(q); + + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + + if (immediate) + set_min_preemption_timeout(guc, q); + clear_exec_queue_enabled(q); + set_exec_queue_pending_disable(q); + trace_xe_exec_queue_scheduling_disable(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); +} + +static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) +{ + u32 action[] = { + XE_GUC_ACTION_DEREGISTER_CONTEXT, + q->guc->id, + }; + + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + + set_exec_queue_destroyed(q); + trace_xe_exec_queue_deregister(q); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_DEREGISTER_CONTEXT, 1); } static enum drm_gpu_sched_stat @@ -902,60 +1222,91 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_sched_job *tmp_job; struct xe_exec_queue *q = job->q; struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); + struct xe_guc *guc = exec_queue_to_guc(q); + const char *process_name = "no process"; + struct xe_device *xe = guc_to_xe(guc); + unsigned int fw_ref; int err = -ETIME; + pid_t pid = -1; int i = 0; + bool wedged = false, skip_timeout_check; + + xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_lr(q)); /* * TDR has fired before free job worker. Common if exec queue - * immediately closed after last fence signaled. + * immediately closed after last fence signaled. Add back to pending + * list so job can be freed and kick scheduler ensuring free job is not + * lost. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - guc_exec_queue_free_job(drm_job); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || + vf_recovery(guc)) + return DRM_GPU_SCHED_STAT_NO_HANG; - return DRM_GPU_SCHED_STAT_NOMINAL; - } + /* Kill the run_job entry point */ + xe_sched_submission_stop(sched); - drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), q->guc->id, q->flags); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, - "Kernel-submitted job timed out\n"); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), - "VM job timed out on non-killed execqueue\n"); + /* Must check all state after stopping scheduler */ + skip_timeout_check = exec_queue_reset(q) || + exec_queue_killed_or_banned_or_wedged(q) || + exec_queue_destroyed(q); - simple_error_capture(q); - xe_devcoredump(job); + /* + * If devcoredump not captured and GuC capture for the job is not ready + * do manual capture first and decide later if we need to use it + */ + if (!exec_queue_killed(q) && !xe->devcoredump.captured && + !xe_guc_capture_get_matching_and_lock(q)) { + /* take force wake before engine register manual capture */ + fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) + xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); - trace_xe_sched_job_timedout(job); + xe_engine_snapshot_capture_for_queue(q); - /* Kill the run_job entry point */ - xe_sched_submission_stop(sched); + xe_force_wake_put(gt_to_fw(q->gt), fw_ref); + } /* - * Kernel jobs should never fail, nor should VM jobs if they do - * somethings has gone wrong and the GT needs a reset + * XXX: Sampling timeout doesn't work in wedged mode as we have to + * modify scheduling state to read timestamp. We could read the + * timestamp from a register to accumulate current running time but this + * doesn't work for SRIOV. For now assuming timeouts in wedged mode are + * genuine timeouts. */ - if (q->flags & EXEC_QUEUE_FLAG_KERNEL || - (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { - if (!xe_sched_invalidate_job(job, 2)) { - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - xe_gt_reset_async(q->gt); - goto out; - } - } + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - /* Engine state now stable, disable scheduling if needed */ - if (exec_queue_registered(q)) { - struct xe_guc *guc = exec_queue_to_guc(q); + /* Engine state now stable, disable scheduling to check timestamp */ + if (!wedged && exec_queue_registered(q)) { int ret; if (exec_queue_reset(q)) err = -EIO; - set_exec_queue_banned(q); + if (!exec_queue_destroyed(q)) { - xe_exec_queue_get(q); - disable_scheduling_deregister(guc, q); + /* + * Wait for any pending G2H to flush out before + * modifying state + */ + ret = wait_event_timeout(guc->ct.wq, + (!exec_queue_pending_enable(q) && + !exec_queue_pending_disable(q)) || + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if (vf_recovery(guc)) + goto handle_vf_resume; + if (!ret || xe_guc_read_stopped(guc)) + goto trigger_reset; + + /* + * Flag communicates to G2H handler that schedule + * disable originated from a timeout check. The G2H then + * avoid triggering cleanup or deregistering the exec + * queue. + */ + set_exec_queue_check_timeout(q); + disable_scheduling(q, skip_timeout_check); } /* @@ -969,17 +1320,81 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) smp_rmb(); ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_disable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); + xe_guc_read_stopped(guc) || + vf_recovery(guc), HZ * 5); + if (vf_recovery(guc)) + goto handle_vf_resume; + if (!ret || xe_guc_read_stopped(guc)) { +trigger_reset: + if (!ret) + xe_gt_warn(guc_to_gt(guc), + "Schedule disable failed to respond, guc_id=%d", + q->guc->id); + xe_devcoredump(q, job, + "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", + q->guc->id, ret, xe_guc_read_stopped(guc)); + set_exec_queue_extra_ref(q); + xe_exec_queue_get(q); /* GT reset owns this */ + set_exec_queue_banned(q); xe_gt_reset_async(q->gt); xe_sched_tdr_queue_imm(sched); - goto out; + goto rearm; } } + /* + * Check if job is actually timed out, if so restart job execution and TDR + */ + if (!wedged && !skip_timeout_check && !check_timeout(q, job) && + !exec_queue_reset(q) && exec_queue_registered(q)) { + clear_exec_queue_check_timeout(q); + goto sched_enable; + } + + if (q->vm && q->vm->xef) { + process_name = q->vm->xef->process_name; + pid = q->vm->xef->pid; + } + + if (!exec_queue_killed(q)) + xe_gt_notice(guc_to_gt(guc), + "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, q->flags, process_name, pid); + + trace_xe_sched_job_timedout(job); + + if (!exec_queue_killed(q)) + xe_devcoredump(q, job, + "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, q->flags); + + /* + * Kernel jobs should never fail, nor should VM jobs if they do + * somethings has gone wrong and the GT needs a reset + */ + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, + "Kernel-submitted job timed out\n"); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), + "VM job timed out on non-killed execqueue\n"); + if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || + (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { + if (!xe_sched_invalidate_job(job, 2)) { + clear_exec_queue_check_timeout(q); + xe_gt_reset_async(q->gt); + goto rearm; + } + } + + /* Finish cleaning up exec queue via deregister */ + set_exec_queue_banned(q); + if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { + set_exec_queue_extra_ref(q); + xe_exec_queue_get(q); + __deregister_exec_queue(guc, q); + } + /* Stop fence signaling */ xe_hw_fence_irq_stop(q->fence_irq); @@ -989,6 +1404,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) */ xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); + xe_guc_exec_queue_trigger_cleanup(q); /* Mark all outstanding jobs as bad, thus completing them */ @@ -1000,41 +1416,73 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); -out: - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; + +sched_enable: + set_exec_queue_pending_tdr_exit(q); + enable_scheduling(q); +rearm: + /* + * XXX: Ideally want to adjust timeout based on current execution time + * but there is not currently an easy way to do in DRM scheduler. With + * some thought, do this in a follow up. + */ + xe_sched_submission_start(sched); +handle_vf_resume: + return DRM_GPU_SCHED_STAT_NO_HANG; } -static void __guc_exec_queue_fini_async(struct work_struct *w) +static void guc_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_guc_exec_queue *ge = q->guc; + struct xe_guc *guc = exec_queue_to_guc(q); + + release_guc_id(guc, q); + xe_sched_entity_fini(&ge->entity); + xe_sched_fini(&ge->sched); + + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); +} + +static void __guc_exec_queue_destroy_async(struct work_struct *w) { struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, fini_async); + container_of(w, struct xe_guc_exec_queue, destroy_async); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); + xe_pm_runtime_get(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); - release_guc_id(guc, q); - xe_sched_entity_fini(&ge->entity); - xe_sched_fini(&ge->sched); + /* Confirm no work left behind accessing device structures */ + cancel_delayed_work_sync(&ge->sched.base.work_tdr); - kfree(ge); xe_exec_queue_fini(q); + + xe_pm_runtime_put(guc_to_xe(guc)); } -static void guc_exec_queue_fini_async(struct xe_exec_queue *q) +static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) { - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + + INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); /* We must block on kernel engines so slabs are empty on driver unload */ - if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) - __guc_exec_queue_fini_async(&q->guc->fini_async); + if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) + __guc_exec_queue_destroy_async(&q->guc->destroy_async); else - queue_work(system_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->destroy_async); } -static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) +static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) { /* * Might be done from within the GPU scheduler, need to do async as we @@ -1043,27 +1491,36 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) * this we and don't really care when everything is fini'd, just that it * is. */ - guc_exec_queue_fini_async(q); + guc_exec_queue_destroy_async(q); } static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); + xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); - if (exec_queue_registered(q)) + /* + * Expected state transitions for cleanup: + * - If the exec queue is registered and GuC firmware is running, we must first + * disable scheduling and deregister the queue to ensure proper teardown and + * resource release in the GuC, then destroy the exec queue on driver side. + * - If the GuC is already stopped (e.g., during driver unload or GPU reset), + * we cannot expect a response for the deregister request. In this case, + * it is safe to directly destroy the exec queue on driver side, as the GuC + * will not process further requests and all resources must be cleaned up locally. + */ + if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) disable_scheduling_deregister(guc, q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) { - return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); + return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); } static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) @@ -1076,18 +1533,37 @@ static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *ms kfree(msg); } -static void suspend_fence_signal(struct xe_exec_queue *q) +static void __suspend_fence_signal(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || - guc_read_stopped(guc)); - xe_assert(xe, q->guc->suspend_pending); + if (!q->guc->suspend_pending) + return; + + WRITE_ONCE(q->guc->suspend_pending, false); - q->guc->suspend_pending = false; - smp_wmb(); - wake_up(&q->guc->suspend_wait); + /* + * We use a GuC shared wait queue for VFs because the VF resfix start + * interrupt must be able to wake all instances of suspend_wait. This + * prevents the VF migration worker from being starved during + * scheduling. + */ + if (IS_SRIOV_VF(xe)) + wake_up_all(&guc->ct.wq); + else + wake_up(&q->guc->suspend_wait); +} + +static void suspend_fence_signal(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + + xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || + xe_guc_read_stopped(guc)); + xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); + + __suspend_fence_signal(q); } static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) @@ -1097,11 +1573,11 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && exec_queue_enabled(q)) { - wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || - guc_read_stopped(guc)); + wait_event(guc->ct.wq, vf_recovery(guc) || + ((q->guc->resume_time != RESUME_PENDING || + xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); - if (!guc_read_stopped(guc)) { - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); + if (!xe_guc_read_stopped(guc)) { s64 since_resume_ms = ktime_ms_delta(ktime_get(), q->guc->resume_time); @@ -1112,12 +1588,7 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) msleep(wait_ms); set_exec_queue_suspended(q); - clear_exec_queue_enabled(q); - set_exec_queue_pending_disable(q); - trace_xe_exec_queue_scheduling_disable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + disable_scheduling(q, false); } } else if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -1128,19 +1599,14 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); if (guc_exec_queue_allowed_to_change_state(q)) { - MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); - - q->guc->resume_time = RESUME_PENDING; clear_exec_queue_suspended(q); - set_exec_queue_pending_enable(q); - set_exec_queue_enabled(q); - trace_xe_exec_queue_scheduling_enable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + if (!exec_queue_enabled(q)) { + q->guc->resume_time = RESUME_PENDING; + set_exec_queue_pending_resume(q); + enable_scheduling(q); + } } else { clear_exec_queue_suspended(q); } @@ -1150,9 +1616,14 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) #define SET_SCHED_PROPS 2 #define SUSPEND 3 #define RESUME 4 +#define OPCODE_MASK 0xf +#define MSG_LOCKED BIT(8) +#define MSG_HEAD BIT(9) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { + struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); + trace_xe_sched_msg_recv(msg); switch (msg->opcode) { @@ -1171,6 +1642,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) default: XE_WARN_ON("Unknown message type"); } + + xe_pm_runtime_put(xe); } static const struct drm_sched_backend_ops drm_sched_ops = { @@ -1187,12 +1660,11 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; - int err; + int err, i; - xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); + xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); ge = kzalloc(sizeof(*ge), GFP_KERNEL); if (!ge) @@ -1200,13 +1672,16 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->guc = ge; ge->q = q; + init_rcu_head(&ge->rcu); init_waitqueue_head(&ge->suspend_wait); + for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) + INIT_LIST_HEAD(&ge->static_msgs[i].link); + timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - get_submit_wq(guc), - q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, + NULL, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, q->name, gt_to_xe(q->gt)->drm.dev); if (err) @@ -1228,7 +1703,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->entity = &ge->entity; - if (guc_read_stopped(guc)) + if (xe_guc_read_stopped(guc) || vf_recovery(guc)) xe_sched_stop(sched); mutex_unlock(&guc->submission_state.lock); @@ -1254,31 +1729,61 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q) { trace_xe_exec_queue_kill(q); set_exec_queue_killed(q); + __suspend_fence_signal(q); xe_guc_exec_queue_trigger_cleanup(q); } static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, u32 opcode) { + xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); + INIT_LIST_HEAD(&msg->link); - msg->opcode = opcode; + msg->opcode = opcode & OPCODE_MASK; msg->private_data = q; trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&q->guc->sched, msg); + if (opcode & MSG_HEAD) + xe_sched_add_msg_head(&q->guc->sched, msg); + else if (opcode & MSG_LOCKED) + xe_sched_add_msg_locked(&q->guc->sched, msg); + else + xe_sched_add_msg(&q->guc->sched, msg); +} + +static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, + struct xe_sched_msg *msg, + u32 opcode) +{ + if (!list_empty(&msg->link)) + return; + + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); +} + +static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, + struct xe_sched_msg *msg, + u32 opcode) +{ + if (!list_empty(&msg->link)) + return false; + + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); + + return true; } #define STATIC_MSG_CLEANUP 0 #define STATIC_MSG_SUSPEND 1 #define STATIC_MSG_RESUME 2 -static void guc_exec_queue_fini(struct xe_exec_queue *q) +static void guc_exec_queue_destroy(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) guc_exec_queue_add_msg(q, msg, CLEANUP); else - __guc_exec_queue_fini(exec_queue_to_guc(q), q); + __guc_exec_queue_destroy(exec_queue_to_guc(q), q); } static int guc_exec_queue_set_priority(struct xe_exec_queue *q, @@ -1286,7 +1791,8 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q, { struct xe_sched_msg *msg; - if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) + if (q->sched_props.priority == priority || + exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -1304,7 +1810,7 @@ static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_u struct xe_sched_msg *msg; if (q->sched_props.timeslice_us == timeslice_us || - exec_queue_killed_or_banned(q)) + exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -1323,7 +1829,7 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, struct xe_sched_msg *msg; if (q->sched_props.preempt_timeout_us == preempt_timeout_us || - exec_queue_killed_or_banned(q)) + exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -1338,43 +1844,83 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, static int guc_exec_queue_suspend(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) + if (exec_queue_killed_or_banned_or_wedged(q)) return -EINVAL; - q->guc->suspend_pending = true; - guc_exec_queue_add_msg(q, msg, SUSPEND); + xe_sched_msg_lock(sched); + if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) + q->guc->suspend_pending = true; + xe_sched_msg_unlock(sched); return 0; } -static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) +static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + int ret; + + /* + * Likely don't need to check exec_queue_killed() as we clear + * suspend_pending upon kill but to be paranoid but races in which + * suspend_pending is set after kill also check kill here. + */ +#define WAIT_COND \ + (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ + xe_guc_read_stopped(guc)) + +retry: + if (IS_SRIOV_VF(xe)) + ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || + vf_recovery(guc), + HZ * 5); + else + ret = wait_event_interruptible_timeout(q->guc->suspend_wait, + WAIT_COND, HZ * 5); + + if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) + return -EAGAIN; + + if (!ret) { + xe_gt_warn(guc_to_gt(guc), + "Suspend fence, guc_id=%d, failed to respond", + q->guc->id); + /* XXX: Trigger GT reset? */ + return -ETIME; + } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { + /* Corner case on RESFIX DONE where vf_recovery() changes */ + goto retry; + } + +#undef WAIT_COND - wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || - guc_read_stopped(guc)); + return ret < 0 ? ret : 0; } static void guc_exec_queue_resume(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, !q->guc->suspend_pending); + xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); - guc_exec_queue_add_msg(q, msg, RESUME); + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg(q, msg, RESUME); + xe_sched_msg_unlock(sched); } static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) { - return exec_queue_reset(q); + return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); } /* - * All of these functions are an abstraction layer which other parts of XE can + * All of these functions are an abstraction layer which other parts of Xe can * use to trap into the GuC backend. All of these functions, aside from init, * really shouldn't do much other than trap into the DRM scheduler which * synchronizes these operations. @@ -1383,6 +1929,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .init = guc_exec_queue_init, .kill = guc_exec_queue_kill, .fini = guc_exec_queue_fini, + .destroy = guc_exec_queue_destroy, .set_priority = guc_exec_queue_set_priority, .set_timeslice = guc_exec_queue_set_timeslice, .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, @@ -1401,17 +1948,18 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) /* Clean up lost G2H + reset engine state */ if (exec_queue_registered(q)) { - if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || - xe_exec_queue_is_lr(q)) + if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else if (exec_queue_destroyed(q)) - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); suspend_fence_signal(q); } - atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, + atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | + EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | + EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); q->guc->resume_time = 0; trace_xe_exec_queue_stop(q); @@ -1423,15 +1971,23 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) */ if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { struct xe_sched_job *job = xe_sched_first_pending_job(sched); + bool ban = false; if (job) { if ((xe_sched_job_started(job) && !xe_sched_job_completed(job)) || xe_sched_invalidate_job(job, 2)) { trace_xe_sched_job_ban(job); - xe_sched_tdr_queue_imm(&q->guc->sched); - set_exec_queue_banned(q); + ban = true; } + } else if (xe_exec_queue_is_lr(q) && + !xe_lrc_ring_is_idle(q->lrc[0])) { + ban = true; + } + + if (ban) { + set_exec_queue_banned(q); + xe_guc_exec_queue_trigger_cleanup(q); } } } @@ -1440,6 +1996,12 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; + if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) + return 0; + + if (!guc->submission_state.initialized) + return 0; + /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset @@ -1456,21 +2018,26 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) void xe_guc_submit_reset_wait(struct xe_guc *guc) { - wait_event(guc->ct.wq, !guc_read_stopped(guc)); + wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || + !xe_guc_read_stopped(guc)); } -int xe_guc_submit_stop(struct xe_guc *guc) +void xe_guc_submit_stop(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, guc_read_stopped(guc) == 1); + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + guc_exec_queue_stop(guc, q); + } mutex_unlock(&guc->submission_state.lock); @@ -1479,37 +2046,202 @@ int xe_guc_submit_stop(struct xe_guc *guc) * creation which is protected by guc->submission_state.lock. */ - return 0; +} + +static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, + struct xe_exec_queue *q) +{ + bool pending_enable, pending_disable, pending_resume; + + pending_enable = exec_queue_pending_enable(q); + pending_resume = exec_queue_pending_resume(q); + + if (pending_enable && pending_resume) { + q->guc->needs_resume = true; + xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", + q->guc->id); + } + + if (pending_enable && !pending_resume && + !exec_queue_pending_tdr_exit(q)) { + clear_exec_queue_registered(q); + if (xe_exec_queue_is_lr(q)) + xe_exec_queue_put(q); + xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", + q->guc->id); + } + + if (pending_enable) { + clear_exec_queue_enabled(q); + clear_exec_queue_pending_resume(q); + clear_exec_queue_pending_tdr_exit(q); + clear_exec_queue_pending_enable(q); + xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", + q->guc->id); + } + + if (exec_queue_destroyed(q) && exec_queue_registered(q)) { + clear_exec_queue_destroyed(q); + if (exec_queue_extra_ref(q)) + xe_exec_queue_put(q); + else + q->guc->needs_cleanup = true; + clear_exec_queue_extra_ref(q); + xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", + q->guc->id); + } + + pending_disable = exec_queue_pending_disable(q); + + if (pending_disable && exec_queue_suspended(q)) { + clear_exec_queue_suspended(q); + q->guc->needs_suspend = true; + xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", + q->guc->id); + } + + if (pending_disable) { + if (!pending_enable) + set_exec_queue_enabled(q); + clear_exec_queue_pending_disable(q); + clear_exec_queue_check_timeout(q); + xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", + q->guc->id); + } + + q->guc->resume_time = 0; +} + +static void lrc_parallel_clear(struct xe_lrc *lrc) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + struct iosys_map map = xe_lrc_parallel_map(lrc); + int i; + + for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) + parallel_write(xe, map, wq[i], + FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | + FIELD_PREP(WQ_LEN_MASK, 0)); +} + +/* + * This function is quite complex but only real way to ensure no state is lost + * during VF resume flows. The function scans the queue state, make adjustments + * as needed, and queues jobs / messages which replayed upon unpause. + */ +static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job; + int i; + + lockdep_assert_held(&guc->submission_state.lock); + + /* Stop scheduling + flush any DRM scheduler operations */ + xe_sched_submission_stop(sched); + if (xe_exec_queue_is_lr(q)) + cancel_work_sync(&q->guc->lr_tdr); + else + cancel_delayed_work_sync(&sched->base.work_tdr); + + guc_exec_queue_revert_pending_state_change(guc, q); + + if (xe_exec_queue_is_parallel(q)) { + /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ + struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); + + /* + * NOP existing WQ commands that may contain stale GGTT + * addresses. These will be replayed upon unpause. The hardware + * seems to get confused if the WQ head/tail pointers are + * adjusted. + */ + if (lrc) + lrc_parallel_clear(lrc); + } + + job = xe_sched_first_pending_job(sched); + if (job) { + job->restore_replay = true; + + /* + * Adjust software tail so jobs submitted overwrite previous + * position in ring buffer with new GGTT addresses. + */ + for (i = 0; i < q->width; ++i) + q->lrc[i]->ring.tail = job->ptrs[i].head; + } +} + +/** + * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be disabled + */ +void xe_guc_submit_pause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + guc_exec_queue_pause(guc, q); + } + mutex_unlock(&guc->submission_state.lock); } static void guc_exec_queue_start(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; - if (!exec_queue_killed_or_banned(q)) { + if (!exec_queue_killed_or_banned_or_wedged(q)) { + struct xe_sched_job *job = xe_sched_first_pending_job(sched); int i; trace_xe_exec_queue_resubmit(q); - for (i = 0; i < q->width; ++i) - xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail); + if (job) { + for (i = 0; i < q->width; ++i) { + /* + * The GuC context is unregistered at this point + * time, adjusting software ring tail ensures + * jobs are rewritten in original placement, + * adjusting LRC tail ensures the newly loaded + * GuC / contexts only view the LRC tail + * increasing as jobs are written out. + */ + q->lrc[i]->ring.tail = job->ptrs[i].head; + xe_lrc_set_ring_tail(q->lrc[i], + xe_lrc_ring_head(q->lrc[i])); + } + } xe_sched_resubmit_jobs(sched); } xe_sched_submission_start(sched); + xe_sched_submission_resume_tdr(sched); } int xe_guc_submit_start(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, guc_read_stopped(guc) == 1); + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); atomic_dec(&guc->submission_state.stopped); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to start parallel queues */ + if (q->guc->id != index) + continue; + guc_exec_queue_start(q); + } mutex_unlock(&guc->submission_state.lock); wake_up_all(&guc->ct.wq); @@ -1517,25 +2249,170 @@ int xe_guc_submit_start(struct xe_guc *guc) return 0; } +static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, + struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job = NULL; + bool restore_replay = false; + + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + restore_replay |= job->restore_replay; + if (restore_replay) { + xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", + q->guc->id, xe_sched_job_seqno(job)); + + q->ring_ops->emit_job(job); + job->restore_replay = true; + } + } + + if (job) + job->last_replay = true; +} + +/** + * xe_guc_submit_unpause_prepare - Prepare unpause submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause + */ +void xe_guc_submit_unpause_prepare(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + guc_exec_queue_unpause_prepare(guc, q); + } + mutex_unlock(&guc->submission_state.lock); +} + +static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_msg *msg; + + if (q->guc->needs_cleanup) { + msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; + + guc_exec_queue_add_msg(q, msg, CLEANUP); + q->guc->needs_cleanup = false; + } + + if (q->guc->needs_suspend) { + msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; + + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); + xe_sched_msg_unlock(sched); + + q->guc->needs_suspend = false; + } + + /* + * The resume must be in the message queue before the suspend as it is + * not possible for a resume to be issued if a suspend pending is, but + * the inverse is possible. + */ + if (q->guc->needs_resume) { + msg = q->guc->static_msgs + STATIC_MSG_RESUME; + + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg_head(q, msg, RESUME); + xe_sched_msg_unlock(sched); + + q->guc->needs_resume = false; + } +} + +static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); + + lockdep_assert_held(&guc->submission_state.lock); + + xe_sched_resubmit_jobs(sched); + guc_exec_queue_replay_pending_state_change(q); + xe_sched_submission_start(sched); + if (needs_tdr) + xe_guc_exec_queue_trigger_cleanup(q); + xe_sched_submission_resume_tdr(sched); +} + +/** + * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be enabled + */ +void xe_guc_submit_unpause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* + * Prevent redundant attempts to stop parallel queues, or queues + * created after resfix done. + */ + if (q->guc->id != index || + !READ_ONCE(q->guc->sched.base.pause_submit)) + continue; + + guc_exec_queue_unpause(guc, q); + } + mutex_unlock(&guc->submission_state.lock); +} + +/** + * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be aborted + */ +void xe_guc_submit_pause_abort(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; + + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + xe_sched_submission_start(sched); + if (exec_queue_killed_or_banned_or_wedged(q)) + xe_guc_exec_queue_trigger_cleanup(q); + } + mutex_unlock(&guc->submission_state.lock); +} + static struct xe_exec_queue * g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; if (unlikely(guc_id >= GUC_ID_MAX)) { - drm_err(&xe->drm, "Invalid guc_id %u", guc_id); + xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); return NULL; } q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); if (unlikely(!q)) { - drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); + xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); return NULL; } - xe_assert(xe, guc_id >= q->guc->id); - xe_assert(xe, guc_id < (q->guc->id + q->width)); + xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); + xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); return q; } @@ -1547,44 +2424,73 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) q->guc->id, }; + xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); + trace_xe_exec_queue_deregister(q); xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); } -static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q) +static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, + u32 runnable_state) { trace_xe_exec_queue_scheduling_done(q); - if (exec_queue_pending_enable(q)) { + if (runnable_state == 1) { + xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); + q->guc->resume_time = ktime_get(); + clear_exec_queue_pending_resume(q); + clear_exec_queue_pending_tdr_exit(q); clear_exec_queue_pending_enable(q); smp_wmb(); wake_up_all(&guc->ct.wq); } else { - clear_exec_queue_pending_disable(q); + bool check_timeout = exec_queue_check_timeout(q); + + xe_gt_assert(guc_to_gt(guc), runnable_state == 0); + xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); + if (q->guc->suspend_pending) { suspend_fence_signal(q); + clear_exec_queue_pending_disable(q); } else { - if (exec_queue_banned(q)) { + if (exec_queue_banned(q) || check_timeout) { smp_wmb(); wake_up_all(&guc->ct.wq); } - deregister_exec_queue(guc, q); + if (!check_timeout && exec_queue_destroyed(q)) { + /* + * Make sure to clear the pending_disable only + * after sampling the destroyed state. We want + * to ensure we don't trigger the unregister too + * early with something intending to only + * disable scheduling. The caller doing the + * destroy must wait for an ongoing + * pending_disable before marking as destroyed. + */ + clear_exec_queue_pending_disable(q); + deregister_exec_queue(guc, q); + } else { + clear_exec_queue_pending_disable(q); + } } } } int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id, runnable_state; - if (unlikely(len < 2)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 2)) return -EPROTO; - } + + guc_id = msg[0]; + runnable_state = msg[1]; q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -1592,12 +2498,14 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (unlikely(!exec_queue_pending_enable(q) && !exec_queue_pending_disable(q))) { - drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&q->guc->state)); + xe_gt_err(guc_to_gt(guc), + "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", + atomic_read(&q->guc->state), q->guc->id, + runnable_state); return -EPROTO; } - handle_sched_done(guc, q); + handle_sched_done(guc, q, runnable_state); return 0; } @@ -1608,22 +2516,21 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) clear_exec_queue_registered(q); - if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) + if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; - } + + guc_id = msg[0]; q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -1631,8 +2538,9 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || exec_queue_pending_enable(q) || exec_queue_enabled(q)) { - drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&q->guc->state)); + xe_gt_err(guc_to_gt(guc), + "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", + atomic_read(&q->guc->state), q->guc->id); return -EPROTO; } @@ -1643,22 +2551,21 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; - } + + guc_id = msg[0]; q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) return -EPROTO; - drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); - - /* FIXME: Do error capture, most likely async */ + xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); trace_xe_exec_queue_reset(q); @@ -1669,34 +2576,88 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) * guc_exec_queue_timedout_job. */ set_exec_queue_reset(q); - if (!exec_queue_banned(q)) + if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) xe_guc_exec_queue_trigger_cleanup(q); return 0; } +/* + * xe_guc_error_capture_handler - Handler of GuC captured message + * @guc: The GuC object + * @msg: Point to the message + * @len: The message length + * + * When GuC captured data is ready, GuC will send message + * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be + * called 1st to check status before process the data comes with the message. + * + * Returns: error code. 0 if success + */ +int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + u32 status; + + if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) + return -EPROTO; + + status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; + if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) + xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); + + xe_guc_capture_process(guc); + + return 0; +} + int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; + u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(!len || len > 2)) return -EPROTO; + + guc_id = msg[0]; + + if (len == 2) + type = msg[1]; + + if (guc_id == GUC_ID_UNKNOWN) { + /* + * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF + * context. In such case only PF will be notified about that fault. + */ + xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); + return 0; } q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) return -EPROTO; - drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); + /* + * The type is HW-defined and changes based on platform, so we don't + * decode it in the kernel and only check if it is valid. + * See bspec 54047 and 72187 for details. + */ + if (type != XE_GUC_CAT_ERR_TYPE_INVALID) + xe_gt_dbg(gt, + "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", + type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + else + xe_gt_dbg(gt, + "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + trace_xe_exec_queue_memory_cat_error(q); /* Treat the same as engine reset */ set_exec_queue_reset(q); - if (!exec_queue_banned(q)) + if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) xe_guc_exec_queue_trigger_cleanup(q); return 0; @@ -1704,24 +2665,22 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); u8 guc_class, instance; u32 reason; - if (unlikely(len != 3)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len != 3)) return -EPROTO; - } guc_class = msg[0]; instance = msg[1]; reason = msg[2]; /* Unexpected failure of a hardware feature, log an actual error */ - drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", - guc_class, instance, reason); + xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", + guc_class, instance, reason); - xe_gt_reset_async(guc_to_gt(guc)); + xe_gt_reset_async(gt); return 0; } @@ -1732,7 +2691,7 @@ guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); int i; snapshot->guc.wqi_head = q->guc->wqi_head; @@ -1812,7 +2771,7 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) if (snapshot->lrc) { for (i = 0; i < q->width; ++i) { - struct xe_lrc *lrc = q->lrc + i; + struct xe_lrc *lrc = q->lrc[i]; snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); } @@ -1886,7 +2845,7 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps if (!snapshot) return; - drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); + drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); drm_printf(p, "\tName: %s\n", snapshot->name); drm_printf(p, "\tClass: %d\n", snapshot->class); drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); @@ -1949,6 +2908,34 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) } /** + * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. + * @q: Execution queue + * @ctx_type: Type of the context + * + * This function registers the execution queue with the guc. Special context + * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE + * are only applicable for IGPU and in the VF. + * Submits the execution queue to GUC after registering it. + * + * Returns - None. + */ +void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + xe_gt_assert(gt, !IS_DGFX(xe)); + xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || + ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); + xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); + + register_exec_queue(q, ctx_type); + enable_scheduling(q); +} + +/** * xe_guc_submit_print - GuC Submit Print. * @guc: GuC. * @p: drm_printer where it will be printed out. @@ -1968,3 +2955,32 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) guc_exec_queue_print(q, p); mutex_unlock(&guc->submission_state.lock); } + +/** + * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all + * exec queues registered to given GuC. + * @guc: the &xe_guc struct instance + * @scratch: scratch buffer to be used as temporary storage + * + * Returns: zero on success, negative error code on failure. + */ +int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) +{ + struct xe_exec_queue *q; + unsigned long index; + int err = 0; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + /* Prevent redundant attempts to stop parallel queues */ + if (q->guc->id != index) + continue; + + err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); + if (err) + break; + } + mutex_unlock(&guc->submission_state.lock); + + return err; +} |
