summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_hw_engine.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_hw_engine.c')
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c171
1 files changed, 126 insertions, 45 deletions
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 1557acee3523..6a9e2a4272dd 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -8,19 +8,23 @@
#include <linux/nospec.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include <uapi/drm/xe_drm.h>
+#include <generated/xe_wa_oob.h>
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_irq_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_execlist.h"
#include "xe_force_wake.h"
#include "xe_gsc.h"
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
+#include "xe_gt_clock.h"
#include "xe_gt_printk.h"
#include "xe_gt_mcr.h"
#include "xe_gt_topology.h"
@@ -324,6 +328,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
{
u32 ccs_mask =
xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
+ u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
xe_mmio_write32(&hwe->gt->mmio, RCU_MODE,
@@ -332,24 +337,35 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
xe_bo_ggtt_addr(hwe->hwsp));
- xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
- _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+
+ if (xe_device_has_msix(gt_to_xe(hwe->gt)))
+ ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
+ xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode);
xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
_MASKED_BIT_DISABLE(STOP_RING));
xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
}
-static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
+static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
+ /*
+ * Xe3p no longer supports load balance mode, so "fixed cslice" mode
+ * is automatic and no RCU_MODE programming is required.
+ */
+ if (GRAPHICS_VER(gt_to_xe(gt)) >= 35)
+ return false;
+
return xe_gt_ccs_mode_enabled(gt) &&
- xe_rtp_match_first_render_or_compute(gt, hwe);
+ xe_rtp_match_first_render_or_compute(xe, gt, hwe);
}
-static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
+static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
- if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
+ if (GRAPHICS_VER(xe) < 20)
return false;
if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
@@ -383,12 +399,6 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
blit_cctl_val,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
- /* Use Fixed slice CCS mode */
- { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
- XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
- XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
- RCU_MODE_FIXED_SLICE_CCS_MODE))
- },
/* Disable WMTP if HW doesn't support it */
{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
@@ -397,10 +407,9 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
},
- {}
};
- xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
+ xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc);
}
static void
@@ -419,7 +428,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
* Bspec: 72161
*/
const u8 mocs_write_idx = gt->mocs.uc_index;
- const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
+ const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) &&
(GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
gt->mocs.wb_index : gt->mocs.uc_index;
u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
@@ -456,10 +465,15 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
- {}
+ /* Use Fixed slice CCS mode */
+ { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
+ XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
+ XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
+ RCU_MODE_FIXED_SLICE_CCS_MODE))
+ },
};
- xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
+ xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr);
}
static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance)
@@ -563,6 +577,33 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
xe_reg_whitelist_process_engine(hwe);
}
+static void adjust_idledly(struct xe_hw_engine *hwe)
+{
+ struct xe_gt *gt = hwe->gt;
+ u32 idledly, maxcnt;
+ u32 idledly_units_ps = 8 * gt->info.timestamp_base;
+ u32 maxcnt_units_ns = 640;
+ bool inhibit_switch = 0;
+
+ if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_GT_WA(gt, 16023105232)) {
+ idledly = xe_mmio_read32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base));
+ maxcnt = xe_mmio_read32(&gt->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base));
+
+ inhibit_switch = idledly & INHIBIT_SWITCH_UNTIL_PREEMPTED;
+ idledly = REG_FIELD_GET(IDLE_DELAY, idledly);
+ idledly = DIV_ROUND_CLOSEST(idledly * idledly_units_ps, 1000);
+ maxcnt = REG_FIELD_GET(IDLE_WAIT_TIME, maxcnt);
+ maxcnt *= maxcnt_units_ns;
+
+ if (xe_gt_WARN_ON(gt, idledly >= maxcnt || inhibit_switch)) {
+ idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns),
+ idledly_units_ps);
+ idledly = DIV_ROUND_CLOSEST(idledly, 1000);
+ xe_mmio_write32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base), idledly);
+ }
+ }
+}
+
static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
enum xe_hw_engine_id id)
{
@@ -574,7 +615,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
- xe_reg_sr_apply_whitelist(hwe);
hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
@@ -604,6 +644,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
gt->usm.reserved_bcs_instance = hwe->instance;
+ /* Ensure IDLEDLY is lower than MAXCNT */
+ adjust_idledly(hwe);
+
return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
err_hwsp:
@@ -660,7 +703,7 @@ static void read_media_fuses(struct xe_gt *gt)
if (!(BIT(j) & vdbox_mask)) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "vcs%u fused off\n", j);
+ xe_gt_info(gt, "vcs%u fused off\n", j);
}
}
@@ -670,40 +713,63 @@ static void read_media_fuses(struct xe_gt *gt)
if (!(BIT(j) & vebox_mask)) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "vecs%u fused off\n", j);
+ xe_gt_info(gt, "vecs%u fused off\n", j);
}
}
}
+static u32 infer_svccopy_from_meml3(struct xe_gt *gt)
+{
+ u32 meml3 = REG_FIELD_GET(MEML3_EN_MASK,
+ xe_mmio_read32(&gt->mmio, MIRROR_FUSE3));
+ u32 svccopy_mask = 0;
+
+ /*
+ * Each of the four meml3 bits determines the fusing of two service
+ * copy engines.
+ */
+ for (int i = 0; i < 4; i++)
+ svccopy_mask |= (meml3 & BIT(i)) ? 0b11 << 2 * i : 0;
+
+ return svccopy_mask;
+}
+
+static u32 read_svccopy_fuses(struct xe_gt *gt)
+{
+ return REG_FIELD_GET(FUSE_SERVICE_COPY_ENABLE_MASK,
+ xe_mmio_read32(&gt->mmio, SERVICE_COPY_ENABLE));
+}
+
static void read_copy_fuses(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
u32 bcs_mask;
- if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
- return;
-
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- bcs_mask = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
- bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
+ if (GRAPHICS_VER(xe) >= 35)
+ bcs_mask = read_svccopy_fuses(gt);
+ else if (GRAPHICS_VERx100(xe) == 1260)
+ bcs_mask = infer_svccopy_from_meml3(gt);
+ else
+ return;
- /* BCS0 is always present; only BCS1-BCS8 may be fused off */
- for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
+ /* Only BCS1-BCS8 may be fused off */
+ bcs_mask <<= XE_HW_ENGINE_BCS1;
+ for (int i = XE_HW_ENGINE_BCS1; i <= XE_HW_ENGINE_BCS8; ++i) {
if (!(gt->info.engine_mask & BIT(i)))
continue;
- if (!(BIT(j / 2) & bcs_mask)) {
+ if (!(bcs_mask & BIT(i))) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "bcs%u fused off\n", j);
+ xe_gt_info(gt, "bcs%u fused off\n",
+ i - XE_HW_ENGINE_BCS0);
}
}
}
static void read_compute_fuses_from_dss(struct xe_gt *gt)
{
- struct xe_device *xe = gt_to_xe(gt);
-
/*
* CCS fusing based on DSS masks only applies to platforms that can
* have more than one CCS.
@@ -722,14 +788,13 @@ static void read_compute_fuses_from_dss(struct xe_gt *gt)
if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "ccs%u fused off\n", j);
+ xe_gt_info(gt, "ccs%u fused off\n", j);
}
}
}
static void read_compute_fuses_from_reg(struct xe_gt *gt)
{
- struct xe_device *xe = gt_to_xe(gt);
u32 ccs_mask;
ccs_mask = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
@@ -741,7 +806,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt)
if ((ccs_mask & BIT(j)) == 0) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "ccs%u fused off\n", j);
+ xe_gt_info(gt, "ccs%u fused off\n", j);
}
}
}
@@ -756,8 +821,6 @@ static void read_compute_fuses(struct xe_gt *gt)
static void check_gsc_availability(struct xe_gt *gt)
{
- struct xe_device *xe = gt_to_xe(gt);
-
if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
return;
@@ -773,7 +836,25 @@ static void check_gsc_availability(struct xe_gt *gt)
xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0);
xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0);
- drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
+ xe_gt_dbg(gt, "GSC FW not used, disabling gsccs\n");
+ }
+}
+
+static void check_sw_disable(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u64 sw_allowed = xe_configfs_get_engines_allowed(to_pci_dev(xe->drm.dev));
+ enum xe_hw_engine_id id;
+
+ for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+ if (!(gt->info.engine_mask & BIT(id)))
+ continue;
+
+ if (!(sw_allowed & BIT(id))) {
+ gt->info.engine_mask &= ~BIT(id);
+ xe_gt_info(gt, "%s disabled via configfs\n",
+ engine_infos[id].name);
+ }
}
}
@@ -785,6 +866,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt)
read_copy_fuses(gt);
read_compute_fuses(gt);
check_gsc_availability(gt);
+ check_sw_disable(gt);
BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
@@ -822,14 +904,14 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
if (hwe->irq_handler)
hwe->irq_handler(hwe, intr_vec);
- if (intr_vec & GT_RENDER_USER_INTERRUPT)
+ if (intr_vec & GT_MI_USER_INTERRUPT)
xe_hw_fence_irq_run(hwe->fence_irq);
}
/**
* xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
* @hwe: Xe HW Engine.
- * @job: The job object.
+ * @q: The exec queue object.
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
@@ -838,7 +920,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
* caller, using `xe_hw_engine_snapshot_free`.
*/
struct xe_hw_engine_snapshot *
-xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job)
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q)
{
struct xe_hw_engine_snapshot *snapshot;
struct __guc_capture_parsed_output *node;
@@ -864,15 +946,14 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job
if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
return snapshot;
- if (job) {
+ if (q) {
/* If got guc capture, set source to GuC */
- node = xe_guc_capture_get_matching_and_lock(job);
+ node = xe_guc_capture_get_matching_and_lock(q);
if (node) {
struct xe_device *xe = gt_to_xe(hwe->gt);
struct xe_devcoredump *coredump = &xe->devcoredump;
coredump->snapshot.matched_node = node;
- snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node");
return snapshot;
}
@@ -880,7 +961,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job
/* otherwise, do manual capture */
xe_engine_manual_capture(hwe, snapshot);
- snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot");
return snapshot;
@@ -1013,12 +1093,13 @@ struct xe_hw_engine *
xe_hw_engine_lookup(struct xe_device *xe,
struct drm_xe_engine_class_instance eci)
{
+ struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id);
unsigned int idx;
if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
return NULL;
- if (eci.gt_id >= xe->info.gt_count)
+ if (!gt)
return NULL;
idx = array_index_nospec(eci.engine_class,