diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_hw_engine.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_hw_engine.c | 171 |
1 files changed, 126 insertions, 45 deletions
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 1557acee3523..6a9e2a4272dd 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -8,19 +8,23 @@ #include <linux/nospec.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include <uapi/drm/xe_drm.h> +#include <generated/xe_wa_oob.h> #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_irq_regs.h" #include "xe_assert.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_execlist.h" #include "xe_force_wake.h" #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_ccs_mode.h" +#include "xe_gt_clock.h" #include "xe_gt_printk.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" @@ -324,6 +328,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) { u32 ccs_mask = xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) xe_mmio_write32(&hwe->gt->mmio, RCU_MODE, @@ -332,24 +337,35 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), xe_bo_ggtt_addr(hwe->hwsp)); - xe_hw_engine_mmio_write32(hwe, RING_MODE(0), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + + if (xe_device_has_msix(gt_to_xe(hwe->gt))) + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode); xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), _MASKED_BIT_DISABLE(STOP_RING)); xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } -static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, +static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { + /* + * Xe3p no longer supports load balance mode, so "fixed cslice" mode + * is automatic and no RCU_MODE programming is required. + */ + if (GRAPHICS_VER(gt_to_xe(gt)) >= 35) + return false; + return xe_gt_ccs_mode_enabled(gt) && - xe_rtp_match_first_render_or_compute(gt, hwe); + xe_rtp_match_first_render_or_compute(xe, gt, hwe); } -static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt, +static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_device *xe, + const struct xe_gt *gt, const struct xe_hw_engine *hwe) { - if (GRAPHICS_VER(gt_to_xe(gt)) < 20) + if (GRAPHICS_VER(xe) < 20) return false; if (hwe->class != XE_ENGINE_CLASS_COMPUTE && @@ -383,12 +399,6 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) blit_cctl_val, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - /* Use Fixed slice CCS mode */ - { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), - XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), - XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, - RCU_MODE_FIXED_SLICE_CCS_MODE)) - }, /* Disable WMTP if HW doesn't support it */ { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), @@ -397,10 +407,9 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) }, - {} }; - xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc); } static void @@ -419,7 +428,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) * Bspec: 72161 */ const u8 mocs_write_idx = gt->mocs.uc_index; - const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && + const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? gt->mocs.wb_index : gt->mocs.uc_index; u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | @@ -456,10 +465,15 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - {} + /* Use Fixed slice CCS mode */ + { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), + XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), + XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, + RCU_MODE_FIXED_SLICE_CCS_MODE)) + }, }; - xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); } static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) @@ -563,6 +577,33 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_reg_whitelist_process_engine(hwe); } +static void adjust_idledly(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + u32 idledly, maxcnt; + u32 idledly_units_ps = 8 * gt->info.timestamp_base; + u32 maxcnt_units_ns = 640; + bool inhibit_switch = 0; + + if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_GT_WA(gt, 16023105232)) { + idledly = xe_mmio_read32(>->mmio, RING_IDLEDLY(hwe->mmio_base)); + maxcnt = xe_mmio_read32(>->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base)); + + inhibit_switch = idledly & INHIBIT_SWITCH_UNTIL_PREEMPTED; + idledly = REG_FIELD_GET(IDLE_DELAY, idledly); + idledly = DIV_ROUND_CLOSEST(idledly * idledly_units_ps, 1000); + maxcnt = REG_FIELD_GET(IDLE_WAIT_TIME, maxcnt); + maxcnt *= maxcnt_units_ns; + + if (xe_gt_WARN_ON(gt, idledly >= maxcnt || inhibit_switch)) { + idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns), + idledly_units_ps); + idledly = DIV_ROUND_CLOSEST(idledly, 1000); + xe_mmio_write32(>->mmio, RING_IDLEDLY(hwe->mmio_base), idledly); + } + } +} + static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, enum xe_hw_engine_id id) { @@ -574,7 +615,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, XE_BO_FLAG_VRAM_IF_DGFX(tile) | @@ -604,6 +644,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; + /* Ensure IDLEDLY is lower than MAXCNT */ + adjust_idledly(hwe); + return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); err_hwsp: @@ -660,7 +703,7 @@ static void read_media_fuses(struct xe_gt *gt) if (!(BIT(j) & vdbox_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "vcs%u fused off\n", j); + xe_gt_info(gt, "vcs%u fused off\n", j); } } @@ -670,40 +713,63 @@ static void read_media_fuses(struct xe_gt *gt) if (!(BIT(j) & vebox_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "vecs%u fused off\n", j); + xe_gt_info(gt, "vecs%u fused off\n", j); } } } +static u32 infer_svccopy_from_meml3(struct xe_gt *gt) +{ + u32 meml3 = REG_FIELD_GET(MEML3_EN_MASK, + xe_mmio_read32(>->mmio, MIRROR_FUSE3)); + u32 svccopy_mask = 0; + + /* + * Each of the four meml3 bits determines the fusing of two service + * copy engines. + */ + for (int i = 0; i < 4; i++) + svccopy_mask |= (meml3 & BIT(i)) ? 0b11 << 2 * i : 0; + + return svccopy_mask; +} + +static u32 read_svccopy_fuses(struct xe_gt *gt) +{ + return REG_FIELD_GET(FUSE_SERVICE_COPY_ENABLE_MASK, + xe_mmio_read32(>->mmio, SERVICE_COPY_ENABLE)); +} + static void read_copy_fuses(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); u32 bcs_mask; - if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270) - return; - xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - bcs_mask = xe_mmio_read32(>->mmio, MIRROR_FUSE3); - bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); + if (GRAPHICS_VER(xe) >= 35) + bcs_mask = read_svccopy_fuses(gt); + else if (GRAPHICS_VERx100(xe) == 1260) + bcs_mask = infer_svccopy_from_meml3(gt); + else + return; - /* BCS0 is always present; only BCS1-BCS8 may be fused off */ - for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { + /* Only BCS1-BCS8 may be fused off */ + bcs_mask <<= XE_HW_ENGINE_BCS1; + for (int i = XE_HW_ENGINE_BCS1; i <= XE_HW_ENGINE_BCS8; ++i) { if (!(gt->info.engine_mask & BIT(i))) continue; - if (!(BIT(j / 2) & bcs_mask)) { + if (!(bcs_mask & BIT(i))) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "bcs%u fused off\n", j); + xe_gt_info(gt, "bcs%u fused off\n", + i - XE_HW_ENGINE_BCS0); } } } static void read_compute_fuses_from_dss(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); - /* * CCS fusing based on DSS masks only applies to platforms that can * have more than one CCS. @@ -722,14 +788,13 @@ static void read_compute_fuses_from_dss(struct xe_gt *gt) if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "ccs%u fused off\n", j); + xe_gt_info(gt, "ccs%u fused off\n", j); } } } static void read_compute_fuses_from_reg(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); u32 ccs_mask; ccs_mask = xe_mmio_read32(>->mmio, XEHP_FUSE4); @@ -741,7 +806,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt) if ((ccs_mask & BIT(j)) == 0) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "ccs%u fused off\n", j); + xe_gt_info(gt, "ccs%u fused off\n", j); } } } @@ -756,8 +821,6 @@ static void read_compute_fuses(struct xe_gt *gt) static void check_gsc_availability(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); - if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) return; @@ -773,7 +836,25 @@ static void check_gsc_availability(struct xe_gt *gt) xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); - drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); + xe_gt_dbg(gt, "GSC FW not used, disabling gsccs\n"); + } +} + +static void check_sw_disable(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u64 sw_allowed = xe_configfs_get_engines_allowed(to_pci_dev(xe->drm.dev)); + enum xe_hw_engine_id id; + + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { + if (!(gt->info.engine_mask & BIT(id))) + continue; + + if (!(sw_allowed & BIT(id))) { + gt->info.engine_mask &= ~BIT(id); + xe_gt_info(gt, "%s disabled via configfs\n", + engine_infos[id].name); + } } } @@ -785,6 +866,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt) read_copy_fuses(gt); read_compute_fuses(gt); check_gsc_availability(gt); + check_sw_disable(gt); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); @@ -822,14 +904,14 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) if (hwe->irq_handler) hwe->irq_handler(hwe, intr_vec); - if (intr_vec & GT_RENDER_USER_INTERRUPT) + if (intr_vec & GT_MI_USER_INTERRUPT) xe_hw_fence_irq_run(hwe->fence_irq); } /** * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. * @hwe: Xe HW Engine. - * @job: The job object. + * @q: The exec queue object. * * This can be printed out in a later stage like during dev_coredump * analysis. @@ -838,7 +920,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) * caller, using `xe_hw_engine_snapshot_free`. */ struct xe_hw_engine_snapshot * -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job) +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q) { struct xe_hw_engine_snapshot *snapshot; struct __guc_capture_parsed_output *node; @@ -864,15 +946,14 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) return snapshot; - if (job) { + if (q) { /* If got guc capture, set source to GuC */ - node = xe_guc_capture_get_matching_and_lock(job); + node = xe_guc_capture_get_matching_and_lock(q); if (node) { struct xe_device *xe = gt_to_xe(hwe->gt); struct xe_devcoredump *coredump = &xe->devcoredump; coredump->snapshot.matched_node = node; - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC; xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node"); return snapshot; } @@ -880,7 +961,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job /* otherwise, do manual capture */ xe_engine_manual_capture(hwe, snapshot); - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL; xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot"); return snapshot; @@ -1013,12 +1093,13 @@ struct xe_hw_engine * xe_hw_engine_lookup(struct xe_device *xe, struct drm_xe_engine_class_instance eci) { + struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); unsigned int idx; if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; - if (eci.gt_id >= xe->info.gt_count) + if (!gt) return NULL; idx = array_index_nospec(eci.engine_class, |
