diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_gt.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_gt.c | 705 |
1 files changed, 481 insertions, 224 deletions
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 491d0413de15..dbb5e7a9bc6a 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -8,15 +8,20 @@ #include <linux/minmax.h> #include <drm/drm_managed.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> +#include <generated/xe_wa_oob.h> + +#include "instructions/xe_alu_commands.h" #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" +#include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_eu_stall.h" #include "xe_exec_queue.h" #include "xe_execlist.h" #include "xe_force_wake.h" @@ -27,14 +32,14 @@ #include "xe_gt_freq.h" #include "xe_gt_idle.h" #include "xe_gt_mcr.h" -#include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" +#include "xe_gt_sriov_vf.h" #include "xe_gt_sysfs.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_pc.h" +#include "xe_guc_submit.h" #include "xe_hw_fence.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_irq.h" @@ -43,6 +48,7 @@ #include "xe_map.h" #include "xe_migrate.h" #include "xe_mmio.h" +#include "xe_pagefault.h" #include "xe_pat.h" #include "xe_pm.h" #include "xe_mocs.h" @@ -51,22 +57,37 @@ #include "xe_sa.h" #include "xe_sched_job.h" #include "xe_sriov.h" +#include "xe_tlb_inval.h" #include "xe_tuning.h" #include "xe_uc.h" +#include "xe_uc_fw.h" #include "xe_vm.h" #include "xe_wa.h" #include "xe_wopcm.h" struct xe_gt *xe_gt_alloc(struct xe_tile *tile) { + struct xe_device *xe = tile_to_xe(tile); + struct drm_device *drm = &xe->drm; + bool shared_wq = xe->info.needs_shared_vf_gt_wq && tile->primary_gt && + IS_SRIOV_VF(xe); + struct workqueue_struct *ordered_wq; struct xe_gt *gt; - gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL); + gt = drmm_kzalloc(drm, sizeof(*gt), GFP_KERNEL); if (!gt) return ERR_PTR(-ENOMEM); gt->tile = tile; - gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); + if (shared_wq && tile->primary_gt->ordered_wq) + ordered_wq = tile->primary_gt->ordered_wq; + else + ordered_wq = drmm_alloc_ordered_workqueue(drm, "gt-ordered-wq", + WQ_MEM_RECLAIM); + if (IS_ERR(ordered_wq)) + return ERR_CAST(ordered_wq); + + gt->ordered_wq = ordered_wq; return gt; } @@ -77,59 +98,72 @@ void xe_gt_sanitize(struct xe_gt *gt) * FIXME: if xe_uc_sanitize is called here, on TGL driver will not * reload */ - gt->uc.guc.submission_state.enabled = false; + xe_guc_submit_disable(>->uc.guc); } -/** - * xe_gt_remove() - Clean up the GT structures before driver removal - * @gt: the GT object - * - * This function should only act on objects/structures that must be cleaned - * before the driver removal callback is complete and therefore can't be - * deferred to a drmm action. - */ -void xe_gt_remove(struct xe_gt *gt) +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) { - xe_uc_remove(>->uc); + unsigned int fw_ref; + u32 reg; + + if (!XE_GT_WA(gt, 16023588340)) + return; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; + + if (xe_gt_is_main_type(gt)) { + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); + reg |= CG_DIS_CNTLBUS; + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); + } + + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } -static void gt_fini(struct drm_device *drm, void *arg) +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) { - struct xe_gt *gt = arg; - int i; + unsigned int fw_ref; + u32 reg; - destroy_workqueue(gt->ordered_wq); + if (!XE_GT_WA(gt, 16023588340)) + return; - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); + if (xe_gt_is_media_type(gt)) + return; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; + + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); + reg &= ~CG_DIS_CNTLBUS; + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static void gt_reset_worker(struct work_struct *w); -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, + long timeout_jiffies) { struct xe_sched_job *job; - struct xe_bb *bb; struct dma_fence *fence; long timeout; - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); + if (IS_ERR(job)) return PTR_ERR(job); - } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); + timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies); dma_fence_put(fence); - xe_bb_free(bb, NULL); if (timeout < 0) return timeout; else if (!timeout) @@ -138,90 +172,143 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } -/* - * Convert back from encoded value to type-safe, only to be used when reg.mcr - * is true - */ -static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) { - return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; + struct xe_bb *bb; + int ret; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + ret = emit_job_sync(q, bb, HZ); + xe_bb_free(bb, NULL); + + return ret; } static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; + int count_rmw = 0, count = 0, ret; unsigned long idx; - struct xe_sched_job *job; struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count = 0; + size_t bb_len = 0; + u32 *cs; + + /* count RMW registers as those will be handled separately */ + xa_for_each(&sr->xa, idx, entry) { + if (entry->reg.masked || entry->clr_bits == ~0) + ++count; + else + ++count_rmw; + } + + if (count) + bb_len += count * 2 + 1; + + if (count_rmw) + bb_len += count_rmw * 20 + 7; if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); + /* + * Big enough to emit all of the context's 3DSTATE via + * xe_lrc_emit_hwe_state_instructions() + */ + bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + + bb = xe_bb_new(gt, bb_len, false); if (IS_ERR(bb)) return PTR_ERR(bb); - xa_for_each(&sr->xa, idx, entry) - ++count; + cs = bb->cs; if (count) { - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + /* + * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per + * reg + 1 + */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; - struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); u32 val; - /* - * Skip reading the register if it's not really needed - */ if (reg.masked) val = entry->clr_bits << 16; - else if (entry->clr_bits + 1) - val = (reg.mcr ? - xe_gt_mcr_unicast_read_any(gt, reg_mcr) : - xe_mmio_read32(gt, reg)) & (~entry->clr_bits); - else + else if (entry->clr_bits == ~0) val = 0; + else + continue; val |= entry->set_bits; - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; + *cs++ = reg.addr; + *cs++ = val; xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); } } - xe_lrc_emit_hwe_state_instructions(q, bb); + if (count_rmw) { + /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); + xa_for_each(&sr->xa, idx, entry) { + if (entry->reg.masked || entry->clr_bits == ~0) + continue; + + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + *cs++ = entry->reg.addr; + *cs++ = CS_GPR_REG(0, 0).addr; + + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = entry->clr_bits; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = entry->set_bits; + + *cs++ = MI_MATH(8); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1); + *cs++ = CS_ALU_INSTR_AND; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2); + *cs++ = CS_ALU_INSTR_OR; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = entry->reg.addr; + + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", + entry->reg.addr, entry->clr_bits, entry->set_bits); + } + + /* reset used GPR */ + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = 0; } - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); + cs = xe_lrc_emit_hwe_state_instructions(q, cs); + + bb->len = cs - bb->cs; + + ret = emit_job_sync(q, bb, HZ); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - return 0; + return ret; } int xe_gt_record_default_lrcs(struct xe_gt *gt) @@ -244,7 +331,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) xe_tuning_process_lrc(hwe); default_lrc = drmm_kzalloc(&xe->drm, - xe_lrc_size(xe, hwe->class), + xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); if (!default_lrc) return -ENOMEM; @@ -283,18 +370,10 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) goto put_nop_q; } - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - xe_map_memcpy_from(xe, default_lrc, - &q->lrc[0].bo->vmap, - xe_lrc_pphwsp_offset(&q->lrc[0]), - xe_lrc_size(xe, hwe->class)); + &q->lrc[0]->bo->vmap, + xe_lrc_pphwsp_offset(q->lrc[0]), + xe_gt_lrc_size(gt, hwe->class)); gt->default_lrc[hwe->class] = default_lrc; put_nop_q: @@ -310,6 +389,7 @@ put_exec_queue: int xe_gt_init_early(struct xe_gt *gt) { + unsigned int fw_ref; int err; if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -318,23 +398,51 @@ int xe_gt_init_early(struct xe_gt *gt) return err; } - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (IS_SRIOV_VF(gt_to_xe(gt))) { + err = xe_gt_sriov_vf_init_early(gt); + if (err) + return err; + } + + xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); + + err = xe_wa_gt_init(gt); if (err) return err; - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + err = xe_tuning_init(gt); if (err) return err; - xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); + xe_wa_process_gt_oob(gt); - err = xe_wa_init(gt); + xe_force_wake_init_gt(gt, gt_to_fw(gt)); + spin_lock_init(>->global_invl_lock); + + err = xe_gt_tlb_inval_init_early(gt); if (err) return err; - xe_wa_process_gt(gt); - xe_wa_process_oob(gt); - xe_tuning_process_gt(gt); + xe_mocs_init_early(gt); + + /* + * Only after this point can GT-specific MMIO operations + * (including things like communication with the GuC) + * be performed. + */ + xe_gt_mmio_init(gt); + + err = xe_uc_init_noalloc(>->uc); + if (err) + return err; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + + xe_gt_mcr_init_early(gt); + xe_pat_init(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -350,15 +458,24 @@ static void dump_pat_on_error(struct xe_gt *gt) xe_pat_dump(gt, &p); } -static int gt_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_gt_forcewake(struct xe_gt *gt) { - int err, i; + unsigned int fw_ref; + int err; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + err = xe_uc_init(>->uc); if (err) - goto err_hw_fence_irq; + goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); + + if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; @@ -366,10 +483,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt); } - err = xe_gt_idle_sysfs_init(>->gtidle); - if (err) - goto err_force_wake; - /* Enable per hw engine IRQs */ xe_irq_enable_hwe(gt); @@ -377,8 +490,10 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_gt_mcr_init(gt); err = xe_hw_engines_init_early(gt); - if (err) + if (err) { + dump_pat_on_error(gt); goto err_force_wake; + } err = xe_hw_engine_class_sysfs_init(gt); if (err) @@ -393,32 +508,31 @@ static int gt_fw_domain_init(struct xe_gt *gt) * Stash hardware-reported version. Since this register does not exist * on pre-MTL platforms, reading it there will (correctly) return 0. */ - gt->info.gmdid = xe_mmio_read32(gt, GMD_ID); - - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - XE_WARN_ON(err); + gt->info.gmdid = xe_mmio_read32(>->mmio, GMD_ID); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; err_force_wake: - dump_pat_on_error(gt); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } -static int all_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_all_forcewake(struct xe_gt *gt) { - int err, i; + unsigned int fw_ref; + int err; - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_hw_fence_irq; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + err = -ETIMEDOUT; + goto err_force_wake; + } xe_gt_mcr_set_implicit_defaults(gt); + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_gt_clock_init(gt); @@ -434,7 +548,11 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { + err = xe_uc_init_post_hwconfig(>->uc); + if (err) + goto err_force_wake; + + if (xe_gt_is_main_type(gt)) { /* * USM has its only SA pool to non-block behind user operations */ @@ -450,21 +568,15 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { struct xe_tile *tile = gt_to_tile(gt); - tile->migrate = xe_migrate_init(tile); - if (IS_ERR(tile->migrate)) { - err = PTR_ERR(tile->migrate); + err = xe_migrate_init(tile->migrate); + if (err) goto err_force_wake; - } } - err = xe_uc_init_post_hwconfig(>->uc); - if (err) - goto err_force_wake; - - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) goto err_force_wake; @@ -474,54 +586,38 @@ static int all_fw_domain_init(struct xe_gt *gt) xe_gt_apply_ccs_mode(gt); } - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_init_hw(gt); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; err_force_wake: - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } -/* - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and - * enable CTB communication. - */ -int xe_gt_init_hwconfig(struct xe_gt *gt) +static void xe_gt_fini(void *arg) { - int err; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto out; - - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - xe_pat_init(gt); - - err = xe_uc_init(>->uc); - if (err) - goto out_fw; + struct xe_gt *gt = arg; + int i; - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto out_fw; + if (disable_work_sync(>->reset.worker)) + /* + * If gt_reset_worker was halted from executing, take care of + * releasing the rpm reference here. + */ + xe_pm_runtime_put(gt_to_xe(gt)); - /* XXX: Fake that we pull the engine mask from hwconfig blob */ - gt->info.engine_mask = gt->info.__engine_mask; + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); -out_fw: - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -out: - return err; + xe_gt_disable_host_l2_vram(gt); } int xe_gt_init(struct xe_gt *gt) @@ -536,21 +632,19 @@ int xe_gt_init(struct xe_gt *gt) xe_hw_fence_irq_init(>->fence_irq[i]); } - err = xe_gt_tlb_invalidation_init(gt); + err = devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, xe_gt_fini, gt); if (err) return err; - err = xe_gt_pagefault_init(gt); + err = xe_gt_sysfs_init(gt); if (err) return err; - xe_mocs_init_early(gt); - - err = xe_gt_sysfs_init(gt); + err = gt_init_with_gt_forcewake(gt); if (err) return err; - err = gt_fw_domain_init(gt); + err = xe_gt_idle_init(>->gtidle); if (err) return err; @@ -560,21 +654,83 @@ int xe_gt_init(struct xe_gt *gt) xe_force_wake_init_engines(gt, gt_to_fw(gt)); - err = all_fw_domain_init(gt); + err = gt_init_with_all_forcewake(gt); if (err) return err; - return drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); + xe_gt_record_user_engines(gt); + + err = xe_eu_stall_init(gt); + if (err) + return err; + + if (IS_SRIOV_VF(gt_to_xe(gt))) { + err = xe_gt_sriov_vf_init(gt); + if (err) + return err; + } + + return 0; +} + +/** + * xe_gt_mmio_init() - Initialize GT's MMIO access + * @gt: the GT object + * + * Initialize GT's MMIO accessor, which will be used to access registers inside + * this GT. + */ +void xe_gt_mmio_init(struct xe_gt *gt) +{ + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + + xe_mmio_init(>->mmio, tile, tile->mmio.regs, tile->mmio.regs_size); + + if (gt->info.type == XE_GT_TYPE_MEDIA) { + gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; + gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; + } else { + gt->mmio.adj_offset = 0; + gt->mmio.adj_limit = 0; + } + + if (IS_SRIOV_VF(xe)) + gt->mmio.sriov_vf_gt = gt; +} + +void xe_gt_record_user_engines(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + gt->user_engines.mask = 0; + memset(gt->user_engines.instances_per_class, 0, + sizeof(gt->user_engines.instances_per_class)); + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + gt->user_engines.mask |= BIT_ULL(id); + gt->user_engines.instances_per_class[hwe->class]++; + } + + xe_gt_assert(gt, (gt->user_engines.mask | gt->info.engine_mask) + == gt->info.engine_mask); } static int do_gt_reset(struct xe_gt *gt) { int err; + if (IS_SRIOV_VF(gt_to_xe(gt))) + return xe_gt_sriov_vf_reset(gt); + xe_gsc_wa_14015076503(gt, true); - xe_mmio_write32(gt, GDRST, GRDOM_FULL); - err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false); + xe_mmio_write32(>->mmio, GDRST, GRDOM_FULL); + err = xe_mmio_wait32(>->mmio, GDRST, GRDOM_FULL, 0, 5000, NULL, false); if (err) xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n", ERR_PTR(err)); @@ -584,14 +740,38 @@ static int do_gt_reset(struct xe_gt *gt) return err; } +static int vf_gt_restart(struct xe_gt *gt) +{ + int err; + + err = xe_uc_sanitize_reset(>->uc); + if (err) + return err; + + err = xe_uc_load_hw(>->uc); + if (err) + return err; + + err = xe_uc_start(>->uc); + if (err) + return err; + + return 0; +} + static int do_gt_restart(struct xe_gt *gt) { struct xe_hw_engine *hwe; enum xe_hw_engine_id id; int err; + if (IS_SRIOV_VF(gt_to_xe(gt))) + return vf_gt_restart(gt); + xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); + xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); @@ -606,36 +786,48 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_init_hw(gt); + xe_mocs_init(gt); err = xe_uc_start(>->uc); if (err) return err; - for_each_hw_engine(hwe, gt, id) { + for_each_hw_engine(hwe, gt, id) xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); - } /* Get CCS mode in sync between sw/hw */ xe_gt_apply_ccs_mode(gt); + /* Restore GT freq to expected values */ + xe_gt_sanitize_freq(gt); + + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_restart(gt); + return 0; } -static int gt_reset(struct xe_gt *gt) +static void gt_reset_worker(struct work_struct *w) { + struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); + unsigned int fw_ref; int err; + if (xe_device_wedged(gt_to_xe(gt))) + goto err_pm_put; + /* We only support GT resets with GuC submission */ if (!xe_device_uc_enabled(gt_to_xe(gt))) - return -ENODEV; + goto err_pm_put; xe_gt_info(gt, "reset started\n"); @@ -644,22 +836,24 @@ static int gt_reset(struct xe_gt *gt) goto err_fail; } - xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_sanitize(gt); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_msg; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + err = -ETIMEDOUT; + goto err_out; + } + + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); - xe_gt_pagefault_reset(gt); + xe_pagefault_reset(gt_to_xe(gt), gt); - err = xe_uc_stop(>->uc); - if (err) - goto err_out; + xe_uc_stop(>->uc); - xe_gt_tlb_invalidation_reset(gt); + xe_tlb_inval_reset(>->tlb_inval); err = do_gt_reset(gt); if (err) @@ -669,104 +863,141 @@ static int gt_reset(struct xe_gt *gt) if (err) goto err_out; - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + + /* Pair with get while enqueueing the work in xe_gt_reset_async() */ xe_pm_runtime_put(gt_to_xe(gt)); xe_gt_info(gt, "reset done\n"); - return 0; + return; err_out: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -err_msg: + xe_force_wake_put(gt_to_fw(gt), fw_ref); XE_WARN_ON(xe_uc_start(>->uc)); - xe_pm_runtime_put(gt_to_xe(gt)); + err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); - - gt_to_xe(gt)->needs_flr_on_fini = true; - - return err; -} - -static void gt_reset_worker(struct work_struct *w) -{ - struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); - - gt_reset(gt); + xe_device_declare_wedged(gt_to_xe(gt)); +err_pm_put: + xe_pm_runtime_put(gt_to_xe(gt)); } void xe_gt_reset_async(struct xe_gt *gt) { - xe_gt_info(gt, "trying reset\n"); + xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0)); /* Don't do a reset while one is already in flight */ if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc)) return; xe_gt_info(gt, "reset queued\n"); - queue_work(gt->ordered_wq, >->reset.worker); + + /* Pair with put in gt_reset_worker() if work is enqueued */ + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + if (!queue_work(gt->ordered_wq, >->reset.worker)) + xe_pm_runtime_put(gt_to_xe(gt)); } void xe_gt_suspend_prepare(struct xe_gt *gt) { - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + unsigned int fw_ref; - xe_uc_stop_prepare(>->uc); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + + xe_uc_suspend_prepare(>->uc); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } int xe_gt_suspend(struct xe_gt *gt) { + unsigned int fw_ref; int err; xe_gt_dbg(gt, "suspending\n"); xe_gt_sanitize(gt); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) goto err_msg; err = xe_uc_suspend(>->uc); if (err) goto err_force_wake; - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_gt_idle_disable_pg(gt); + + xe_gt_disable_host_l2_vram(gt); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_dbg(gt, "suspended\n"); return 0; -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: + err = -ETIMEDOUT; +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); return err; } +void xe_gt_shutdown(struct xe_gt *gt) +{ + unsigned int fw_ref; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + do_gt_reset(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); +} + +/** + * xe_gt_sanitize_freq() - Restore saved frequencies if necessary. + * @gt: the GT object + * + * Called after driver init/GSC load completes to restore GT frequencies if we + * limited them for any WAs. + */ +int xe_gt_sanitize_freq(struct xe_gt *gt) +{ + int ret = 0; + + if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || + xe_uc_fw_is_loaded(>->uc.gsc.fw) || + xe_uc_fw_is_in_error_state(>->uc.gsc.fw)) && + XE_GT_WA(gt, 22019338487)) + ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); + + return ret; +} + int xe_gt_resume(struct xe_gt *gt) { + unsigned int fw_ref; int err; xe_gt_dbg(gt, "resuming\n"); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) goto err_msg; err = do_gt_restart(gt); if (err) goto err_force_wake; - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_gt_idle_enable_pg(gt); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_dbg(gt, "resumed\n"); return 0; -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: + err = -ETIMEDOUT; +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); return err; @@ -810,3 +1041,29 @@ struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, return NULL; } + +struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) + return hwe; + + return NULL; +} + +/** + * xe_gt_declare_wedged() - Declare GT wedged + * @gt: the GT object + * + * Wedge the GT which stops all submission, saves desired debug state, and + * cleans up anything which could timeout. + */ +void xe_gt_declare_wedged(struct xe_gt *gt) +{ + xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode); + + xe_uc_declare_wedged(>->uc); + xe_tlb_inval_reset(>->tlb_inval); +} |
