summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug12
-rw-r--r--drivers/gpu/drm/xe/Makefile1
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h61
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h17
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h36
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c95
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h12
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c9
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h77
-rw-r--r--drivers/gpu/drm/xe/regs/xe_irq_regs.h82
-rw-r--r--drivers/gpu/drm/xe/regs/xe_reg_defs.h2
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h14
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c239
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c4
-rw-r--r--drivers/gpu/drm/xe/xe_assert.h2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c38
-rw-r--r--drivers/gpu/drm/xe/xe_device.c113
-rw-r--r--drivers/gpu/drm/xe/xe_device.h15
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h81
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c9
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c6
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c19
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c4
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c10
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c24
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c45
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c123
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c41
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c39
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c56
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c44
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c49
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c419
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h24
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h40
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c65
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c54
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c3
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c34
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c46
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c6
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c61
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c16
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c78
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c2
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c8
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c192
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.h6
-rw-r--r--drivers/gpu/drm/xe/xe_memirq_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c139
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h39
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c16
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c57
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c21
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c49
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c4
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c8
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c2
-rw-r--r--drivers/gpu/drm/xe/xe_query.c7
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c17
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c5
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c3
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h7
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h2
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c8
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c30
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c19
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c28
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c7
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c6
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm.c15
99 files changed, 2311 insertions, 741 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index bc177368af6c..2de0de41b8dd 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -40,9 +40,21 @@ config DRM_XE_DEBUG_VM
If in doubt, say "N".
+config DRM_XE_DEBUG_MEMIRQ
+ bool "Enable extra memirq debugging"
+ default n
+ help
+ Choose this option to enable additional debugging info for
+ memory based interrupts.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
config DRM_XE_DEBUG_SRIOV
bool "Enable extra SR-IOV debugging"
default n
+ select DRM_XE_DEBUG_MEMIRQ
help
Enable extra SR-IOV debugging info.
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index edfd812e0f41..8f1c5c329f79 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -129,6 +129,7 @@ xe-$(CONFIG_PCI_IOV) += \
xe_gt_sriov_pf.o \
xe_gt_sriov_pf_config.o \
xe_gt_sriov_pf_control.o \
+ xe_gt_sriov_pf_migration.o \
xe_gt_sriov_pf_monitor.o \
xe_gt_sriov_pf_policy.o \
xe_gt_sriov_pf_service.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index 181180f5945c..b6a1852749dd 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -557,4 +557,65 @@
#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn
#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96 GUC_HXG_REQUEST_MSG_n_DATAn
+/**
+ * DOC: PF2GUC_SAVE_RESTORE_VF
+ *
+ * This message is used by the PF to migrate VF info state maintained by the GuC.
+ *
+ * This message must be sent as `CTB HXG Message`_.
+ *
+ * Available since GuC version 70.25.0
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = **OPCODE** - operation to take: |
+ * | | | |
+ * | | | - _`GUC_PF_OPCODE_VF_SAVE` = 0 |
+ * | | | - _`GUC_PF_OPCODE_VF_RESTORE` = 1 |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_SAVE_RESTORE_VF` = 0x550B |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **VFID** - VF identifier |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **ADDR_LO** - lower 32-bits of GGTT offset to the buffer |
+ * | | | where the VF info will be save to or restored from. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 3 | 31:0 | **ADDR_HI** - upper 32-bits of GGTT offset to the buffer |
+ * | | | where the VF info will be save to or restored from. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 4 | 27:0 | **SIZE** - size of the buffer (in dwords) |
+ * | +-------+--------------------------------------------------------------+
+ * | | 31:28 | MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = **USED** - size of used buffer space (in dwords) |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_PF2GUC_SAVE_RESTORE_VF 0x550Bu
+
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 4u)
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_0_OPCODE GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC_PF_OPCODE_VF_SAVE 0u
+#define GUC_PF_OPCODE_VF_RESTORE 1u
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_2_ADDR_LO GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_3_ADDR_HI GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_SIZE (0xfffffffu << 0)
+#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_MBZ (0xfu << 28)
+
+#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_0_USED GUC_HXG_RESPONSE_MSG_0_DATA0
+
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
deleted file mode 100644
index 98e9dd78f670..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _I915_GPU_ERROR_H_
-#define _I915_GPU_ERROR_H_
-
-struct drm_i915_error_state_buf;
-
-__printf(2, 3)
-static inline void
-i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
-{
-}
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
index eb5b5f0e4bd9..ee3469d4ae73 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -10,11 +10,11 @@
#include "xe_device_types.h"
#include "xe_mmio.h"
-static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore)
+static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncore)
{
struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
- return xe_root_mmio_gt(xe);
+ return xe_root_tile_mmio(xe);
}
static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore)
@@ -29,7 +29,7 @@ static inline u32 intel_uncore_read(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline u8 intel_uncore_read8(struct intel_uncore *uncore,
@@ -37,7 +37,7 @@ static inline u8 intel_uncore_read8(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read8(__compat_uncore_to_mmio(uncore), reg);
}
static inline u16 intel_uncore_read16(struct intel_uncore *uncore,
@@ -45,7 +45,7 @@ static inline u16 intel_uncore_read16(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read16(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read16(__compat_uncore_to_mmio(uncore), reg);
}
static inline u64
@@ -57,11 +57,11 @@ intel_uncore_read64_2x32(struct intel_uncore *uncore,
u32 upper, lower, old_upper;
int loop = 0;
- upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+ upper = xe_mmio_read32(__compat_uncore_to_mmio(uncore), upper_reg);
do {
old_upper = upper;
- lower = xe_mmio_read32(__compat_uncore_to_gt(uncore), lower_reg);
- upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+ lower = xe_mmio_read32(__compat_uncore_to_mmio(uncore), lower_reg);
+ upper = xe_mmio_read32(__compat_uncore_to_mmio(uncore), upper_reg);
} while (upper != old_upper && loop++ < 2);
return (u64)upper << 32 | lower;
@@ -72,7 +72,7 @@ static inline void intel_uncore_posting_read(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline void intel_uncore_write(struct intel_uncore *uncore,
@@ -80,7 +80,7 @@ static inline void intel_uncore_write(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+ xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val);
}
static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
@@ -88,7 +88,7 @@ static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_rmw32(__compat_uncore_to_gt(uncore), reg, clear, set);
+ return xe_mmio_rmw32(__compat_uncore_to_mmio(uncore), reg, clear, set);
}
static inline int intel_wait_for_register(struct intel_uncore *uncore,
@@ -97,7 +97,7 @@ static inline int intel_wait_for_register(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+ return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
timeout * USEC_PER_MSEC, NULL, false);
}
@@ -107,7 +107,7 @@ static inline int intel_wait_for_register_fw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+ return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
timeout * USEC_PER_MSEC, NULL, false);
}
@@ -118,7 +118,7 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+ return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
fast_timeout_us + 1000 * slow_timeout_ms,
out_value, false);
}
@@ -128,7 +128,7 @@ static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline void intel_uncore_write_fw(struct intel_uncore *uncore,
@@ -136,7 +136,7 @@ static inline void intel_uncore_write_fw(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+ xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val);
}
static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore,
@@ -144,7 +144,7 @@ static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+ return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg);
}
static inline void intel_uncore_write_notrace(struct intel_uncore *uncore,
@@ -152,7 +152,7 @@ static inline void intel_uncore_write_notrace(struct intel_uncore *uncore,
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
- xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+ xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val);
}
static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore)
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 75736faf2a80..26b2cae11d46 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -4,7 +4,7 @@
*/
#include "xe_display.h"
-#include "regs/xe_regs.h"
+#include "regs/xe_irq_regs.h"
#include <linux/fb.h>
@@ -13,7 +13,6 @@
#include <uapi/drm/xe_drm.h>
#include "soc/intel_dram.h"
-#include "i915_drv.h" /* FIXME: HAS_DISPLAY() depends on this */
#include "intel_acpi.h"
#include "intel_audio.h"
#include "intel_bw.h"
@@ -34,7 +33,7 @@
static bool has_display(struct xe_device *xe)
{
- return HAS_DISPLAY(xe);
+ return HAS_DISPLAY(&xe->display);
}
/**
@@ -309,18 +308,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe)
}
/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
-void xe_display_pm_runtime_suspend(struct xe_device *xe)
-{
- if (!xe->info.probe_display)
- return;
-
- if (xe->d3cold.allowed)
- xe_display_pm_suspend(xe, true);
-
- intel_hpd_poll_enable(xe);
-}
-
-void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
+static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
bool s2idle = suspend_to_idle();
@@ -355,6 +343,52 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
intel_dmc_suspend(xe);
}
+void xe_display_pm_suspend(struct xe_device *xe)
+{
+ __xe_display_pm_suspend(xe, false);
+}
+
+void xe_display_pm_shutdown(struct xe_device *xe)
+{
+ struct intel_display *display = &xe->display;
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_power_domains_disable(xe);
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+ if (has_display(xe)) {
+ drm_kms_helper_poll_disable(&xe->drm);
+ intel_display_driver_disable_user_access(xe);
+ intel_display_driver_suspend(xe);
+ }
+
+ xe_display_flush_cleanup_work(xe);
+ intel_dp_mst_suspend(xe);
+ intel_hpd_cancel_work(xe);
+
+ if (has_display(xe))
+ intel_display_driver_suspend_access(xe);
+
+ intel_encoder_suspend_all(display);
+ intel_encoder_shutdown_all(display);
+
+ intel_opregion_suspend(display, PCI_D3cold);
+
+ intel_dmc_suspend(xe);
+}
+
+void xe_display_pm_runtime_suspend(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (xe->d3cold.allowed)
+ __xe_display_pm_suspend(xe, true);
+
+ intel_hpd_poll_enable(xe);
+}
+
void xe_display_pm_suspend_late(struct xe_device *xe)
{
bool s2idle = suspend_to_idle();
@@ -366,15 +400,17 @@ void xe_display_pm_suspend_late(struct xe_device *xe)
intel_display_power_suspend_late(xe);
}
-void xe_display_pm_runtime_resume(struct xe_device *xe)
+void xe_display_pm_shutdown_late(struct xe_device *xe)
{
if (!xe->info.probe_display)
return;
- intel_hpd_poll_disable(xe);
-
- if (xe->d3cold.allowed)
- xe_display_pm_resume(xe, true);
+ /*
+ * The only requirement is to reboot with display DC states disabled,
+ * for now leaving all display power wells in the INIT power domain
+ * enabled.
+ */
+ intel_power_domains_driver_remove(xe);
}
void xe_display_pm_resume_early(struct xe_device *xe)
@@ -387,7 +423,7 @@ void xe_display_pm_resume_early(struct xe_device *xe)
intel_power_domains_resume(xe);
}
-void xe_display_pm_resume(struct xe_device *xe, bool runtime)
+static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
@@ -421,6 +457,23 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_power_domains_enable(xe);
}
+void xe_display_pm_resume(struct xe_device *xe)
+{
+ __xe_display_pm_resume(xe, false);
+}
+
+void xe_display_pm_runtime_resume(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_hpd_poll_disable(xe);
+
+ if (xe->d3cold.allowed)
+ __xe_display_pm_resume(xe, true);
+}
+
+
static void display_device_remove(struct drm_device *dev, void *arg)
{
struct xe_device *xe = arg;
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 53d727fd792b..17afa537aee5 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -34,10 +34,12 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
void xe_display_irq_reset(struct xe_device *xe);
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
-void xe_display_pm_suspend(struct xe_device *xe, bool runtime);
+void xe_display_pm_suspend(struct xe_device *xe);
+void xe_display_pm_shutdown(struct xe_device *xe);
void xe_display_pm_suspend_late(struct xe_device *xe);
+void xe_display_pm_shutdown_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe);
-void xe_display_pm_resume(struct xe_device *xe, bool runtime);
+void xe_display_pm_resume(struct xe_device *xe);
void xe_display_pm_runtime_suspend(struct xe_device *xe);
void xe_display_pm_runtime_resume(struct xe_device *xe);
@@ -65,10 +67,12 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
static inline void xe_display_irq_reset(struct xe_device *xe) {}
static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
-static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {}
+static inline void xe_display_pm_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_shutdown(struct xe_device *xe) {}
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
+static inline void xe_display_pm_shutdown_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
-static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {}
+static inline void xe_display_pm_resume(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index f99d901a3214..f95375451e2f 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
if (!vma)
return false;
+ /* Set scanout flag for WC mapping */
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
NULL, PAGE_ALIGN(size),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_GGTT);
+ XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
if (IS_ERR(obj)) {
kfree(vma);
return false;
@@ -73,5 +74,9 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
{
- /* TODO: add xe specific flush_map() for dsb buffer object. */
+ /*
+ * The memory barrier here is to ensure coherency of DSB vs MMIO,
+ * both for weak ordering archs and discrete cards.
+ */
+ xe_device_wmb(dsb_buf->vma->bo->tile->xe);
}
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 660ff42e45a6..fb80042cbe0d 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -169,6 +169,8 @@
#define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED)
#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
+#define XE2LPM_CCCHKNREG1 XE_REG(0x82a8)
+
#define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED)
#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
@@ -342,6 +344,14 @@
#define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0)
#define FORCEWAKE_RENDER XE_REG(0xa278)
+
+#define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0)
+#define MEDIA_SLICE3_AWAKE_STATUS REG_BIT(4)
+#define MEDIA_SLICE2_AWAKE_STATUS REG_BIT(3)
+#define MEDIA_SLICE1_AWAKE_STATUS REG_BIT(2)
+#define RENDER_AWAKE_STATUS REG_BIT(1)
+#define MEDIA_SLICE0_AWAKE_STATUS REG_BIT(0)
+
#define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4)
#define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4)
#define FORCEWAKE_GSC XE_REG(0xa618)
@@ -378,6 +388,9 @@
#define L3SQCREG3 XE_REG_MCR(0xb108)
#define COMPPWOVERFETCHEN REG_BIT(28)
+#define SCRATCH3_LBCF XE_REG_MCR(0xb154)
+#define RWFLUSHALLEN REG_BIT(17)
+
#define XEHP_L3SQCREG5 XE_REG_MCR(0xb158)
#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
@@ -391,6 +404,12 @@
#define SCRATCH1LPFC XE_REG(0xb474)
#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
+#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604)
+
+#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608)
+
+#define XE2LPM_SCRATCH3_LBCF XE_REG_MCR(0xb654)
+
#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
#define XE2_TDF_CTRL XE_REG(0xb418)
@@ -548,62 +567,4 @@
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define VOLTAGE_MASK REG_GENMASK(10, 0)
-/*
- * Note: Interrupt registers 1900xx are VF accessible only until version 12.50.
- * On newer platforms, VFs are using memory-based interrupts instead.
- * However, for simplicity we keep this XE_REG_OPTION_VF tag intact.
- */
-
-#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF)
-#define INTR_GSC REG_BIT(31)
-#define INTR_GUC REG_BIT(25)
-#define INTR_MGUC REG_BIT(24)
-#define INTR_BCS8 REG_BIT(23)
-#define INTR_BCS(x) REG_BIT(15 - (x))
-#define INTR_CCS(x) REG_BIT(4 + (x))
-#define INTR_RCS0 REG_BIT(0)
-#define INTR_VECS(x) REG_BIT(31 - (x))
-#define INTR_VCS(x) REG_BIT(x)
-
-#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF)
-#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF)
-#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF)
-#define ENGINE1_MASK REG_GENMASK(31, 16)
-#define ENGINE0_MASK REG_GENMASK(15, 0)
-#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF)
-#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF)
-#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF)
-
-#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF)
-#define INTR_DATA_VALID REG_BIT(31)
-#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x)
-#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x)
-#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x)
-#define OTHER_GUC_INSTANCE 0
-#define OTHER_GSC_HECI2_INSTANCE 3
-#define OTHER_GSC_INSTANCE 6
-
-#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF)
-#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF)
-#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF)
-#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF)
-#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF)
-#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF)
-#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4)
-#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF)
-#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF)
-#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF)
-#define CCS0_CCS1_INTR_MASK XE_REG(0x190100)
-#define CCS2_CCS3_INTR_MASK XE_REG(0x190104)
-#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110)
-#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114)
-#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118)
-#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c)
-#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11)
-#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8)
-#define GSC_ER_COMPLETE REG_BIT(5)
-#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4)
-#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
-#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
-
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
new file mode 100644
index 000000000000..1776b3f78ccb
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#ifndef _XE_IRQ_REGS_H_
+#define _XE_IRQ_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define PCU_IRQ_OFFSET 0x444e0
+#define GU_MISC_IRQ_OFFSET 0x444f0
+#define GU_MISC_GSE REG_BIT(27)
+
+#define DG1_MSTR_TILE_INTR XE_REG(0x190008)
+#define DG1_MSTR_IRQ REG_BIT(31)
+#define DG1_MSTR_TILE(t) REG_BIT(t)
+
+#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF)
+#define MASTER_IRQ REG_BIT(31)
+#define GU_MISC_IRQ REG_BIT(29)
+#define DISPLAY_IRQ REG_BIT(16)
+#define GT_DW_IRQ(x) REG_BIT(x)
+
+/*
+ * Note: Interrupt registers 1900xx are VF accessible only until version 12.50.
+ * On newer platforms, VFs are using memory-based interrupts instead.
+ * However, for simplicity we keep this XE_REG_OPTION_VF tag intact.
+ */
+
+#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF)
+#define INTR_GSC REG_BIT(31)
+#define INTR_GUC REG_BIT(25)
+#define INTR_MGUC REG_BIT(24)
+#define INTR_BCS8 REG_BIT(23)
+#define INTR_BCS(x) REG_BIT(15 - (x))
+#define INTR_CCS(x) REG_BIT(4 + (x))
+#define INTR_RCS0 REG_BIT(0)
+#define INTR_VECS(x) REG_BIT(31 - (x))
+#define INTR_VCS(x) REG_BIT(x)
+
+#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF)
+#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF)
+#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF)
+#define ENGINE1_MASK REG_GENMASK(31, 16)
+#define ENGINE0_MASK REG_GENMASK(15, 0)
+#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF)
+#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF)
+
+#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF)
+#define INTR_DATA_VALID REG_BIT(31)
+#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x)
+#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x)
+#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x)
+#define OTHER_GUC_INSTANCE 0
+#define OTHER_GSC_HECI2_INSTANCE 3
+#define OTHER_GSC_INSTANCE 6
+
+#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF)
+#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF)
+#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF)
+#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF)
+#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF)
+#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF)
+#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4)
+#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF)
+#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF)
+#define CCS0_CCS1_INTR_MASK XE_REG(0x190100)
+#define CCS2_CCS3_INTR_MASK XE_REG(0x190104)
+#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110)
+#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114)
+#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118)
+#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c)
+#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11)
+#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8)
+#define GSC_ER_COMPLETE REG_BIT(5)
+#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4)
+#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
+#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 23f7dc5bbe99..51fd40ffafcb 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -128,7 +128,7 @@ struct xe_reg_mcr {
* options.
*/
#define XE_REG_MCR(r_, ...) ((const struct xe_reg_mcr){ \
- .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \
+ .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \
})
static inline bool xe_reg_is_valid(struct xe_reg r)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index dfa869f0dddd..3293172b0128 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -11,10 +11,6 @@
#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12)
#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0)
-#define PCU_IRQ_OFFSET 0x444e0
-#define GU_MISC_IRQ_OFFSET 0x444f0
-#define GU_MISC_GSE REG_BIT(27)
-
#define GU_CNTL_PROTECTED XE_REG(0x10100C)
#define DRIVERINT_FLR_DIS REG_BIT(31)
@@ -57,16 +53,6 @@
#define MTL_MPE_FREQUENCY XE_REG(0x13802c)
#define MTL_RPE_MASK REG_GENMASK(8, 0)
-#define DG1_MSTR_TILE_INTR XE_REG(0x190008)
-#define DG1_MSTR_IRQ REG_BIT(31)
-#define DG1_MSTR_TILE(t) REG_BIT(t)
-
-#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF)
-#define MASTER_IRQ REG_BIT(31)
-#define GU_MISC_IRQ REG_BIT(29)
-#define DISPLAY_IRQ REG_BIT(16)
-#define GT_DW_IRQ(x) REG_BIT(x)
-
#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF)
#define VF_CAP REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 8dac069483e8..7d3fd720478b 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,6 +6,12 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
+#include <linux/iosys-map.h>
+#include <linux/random.h>
+#include <linux/swap.h>
+
+#include <uapi/linux/sysinfo.h>
+
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "tests/xe_test.h"
@@ -358,9 +364,242 @@ static void xe_bo_evict_kunit(struct kunit *test)
evict_test_run_device(xe);
}
+struct xe_bo_link {
+ struct list_head link;
+ struct xe_bo *bo;
+ u32 val;
+};
+
+#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M)
+
+static int shrink_test_fill_random(struct xe_bo *bo, struct rnd_state *state,
+ struct xe_bo_link *link)
+{
+ struct iosys_map map;
+ int ret = ttm_bo_vmap(&bo->ttm, &map);
+ size_t __maybe_unused i;
+
+ if (ret)
+ return ret;
+
+ for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
+ u32 val = prandom_u32_state(state);
+
+ iosys_map_wr(&map, i, u32, val);
+ if (i == 0)
+ link->val = val;
+ }
+
+ ttm_bo_vunmap(&bo->ttm, &map);
+ return 0;
+}
+
+static bool shrink_test_verify(struct kunit *test, struct xe_bo *bo,
+ unsigned int bo_nr, struct rnd_state *state,
+ struct xe_bo_link *link)
+{
+ struct iosys_map map;
+ int ret = ttm_bo_vmap(&bo->ttm, &map);
+ size_t i;
+ bool failed = false;
+
+ if (ret) {
+ KUNIT_FAIL(test, "Error mapping bo %u for content check.\n", bo_nr);
+ return true;
+ }
+
+ for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
+ u32 val = prandom_u32_state(state);
+
+ if (iosys_map_rd(&map, i, u32) != val) {
+ KUNIT_FAIL(test, "Content not preserved, bo %u offset 0x%016llx",
+ bo_nr, (unsigned long long)i);
+ kunit_info(test, "Failed value is 0x%08x, recorded 0x%08x\n",
+ (unsigned int)iosys_map_rd(&map, i, u32), val);
+ if (i == 0 && val != link->val)
+ kunit_info(test, "Looks like PRNG is out of sync.\n");
+ failed = true;
+ break;
+ }
+ }
+
+ ttm_bo_vunmap(&bo->ttm, &map);
+
+ return failed;
+}
+
+/*
+ * Try to create system bos corresponding to twice the amount
+ * of available system memory to test shrinker functionality.
+ * If no swap space is available to accommodate the
+ * memory overcommit, mark bos purgeable.
+ */
+static int shrink_test_run_device(struct xe_device *xe)
+{
+ struct kunit *test = kunit_get_current_test();
+ LIST_HEAD(bos);
+ struct xe_bo_link *link, *next;
+ struct sysinfo si;
+ size_t ram, ram_and_swap, purgeable, alloced, to_alloc, limit;
+ unsigned int interrupted = 0, successful = 0, count = 0;
+ struct rnd_state prng;
+ u64 rand_seed;
+ bool failed = false;
+
+ rand_seed = get_random_u64();
+ prandom_seed_state(&prng, rand_seed);
+ kunit_info(test, "Random seed is 0x%016llx.\n",
+ (unsigned long long)rand_seed);
+
+ /* Skip if execution time is expected to be too long. */
+
+ limit = SZ_32G;
+ /* IGFX with flat CCS needs to copy when swapping / shrinking */
+ if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
+ limit = SZ_16G;
+
+ si_meminfo(&si);
+ ram = (size_t)si.freeram * si.mem_unit;
+ if (ram > limit) {
+ kunit_skip(test, "Too long expected execution time.\n");
+ return 0;
+ }
+ to_alloc = ram * 2;
+
+ ram_and_swap = ram + get_nr_swap_pages() * PAGE_SIZE;
+ if (to_alloc > ram_and_swap)
+ purgeable = to_alloc - ram_and_swap;
+ purgeable += purgeable / 5;
+
+ kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
+ (unsigned long)ram);
+ for (alloced = 0; alloced < to_alloc; alloced += XE_BO_SHRINK_SIZE) {
+ struct xe_bo *bo;
+ unsigned int mem_type;
+ struct xe_ttm_tt *xe_tt;
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link) {
+ KUNIT_FAIL(test, "Unexpected link allocation failure\n");
+ failed = true;
+ break;
+ }
+
+ INIT_LIST_HEAD(&link->link);
+
+ /* We can create bos using WC caching here. But it is slower. */
+ bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
+ DRM_XE_GEM_CPU_CACHING_WB,
+ XE_BO_FLAG_SYSTEM);
+ if (IS_ERR(bo)) {
+ if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
+ bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
+ KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
+ kfree(link);
+ failed = true;
+ break;
+ }
+ xe_bo_lock(bo, false);
+ xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+
+ /*
+ * Allocate purgeable bos first, because if we do it the
+ * other way around, they may not be subject to swapping...
+ */
+ if (alloced < purgeable) {
+ xe_tt->purgeable = true;
+ bo->ttm.priority = 0;
+ } else {
+ int ret = shrink_test_fill_random(bo, &prng, link);
+
+ if (ret) {
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
+ KUNIT_FAIL(test, "Error filling bo with random data: %pe\n",
+ ERR_PTR(ret));
+ kfree(link);
+ failed = true;
+ break;
+ }
+ }
+
+ mem_type = bo->ttm.resource->mem_type;
+ xe_bo_unlock(bo);
+ link->bo = bo;
+ list_add_tail(&link->link, &bos);
+
+ if (mem_type != XE_PL_TT) {
+ KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
+ bo->ttm.resource->mem_type);
+ failed = true;
+ }
+ cond_resched();
+ if (signal_pending(current))
+ break;
+ }
+
+ /*
+ * Read back and destroy bos. Reset the pseudo-random seed to get an
+ * identical pseudo-random number sequence for readback.
+ */
+ prandom_seed_state(&prng, rand_seed);
+ list_for_each_entry_safe(link, next, &bos, link) {
+ static struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct xe_bo *bo = link->bo;
+ struct xe_ttm_tt *xe_tt;
+ int ret;
+
+ count++;
+ if (!signal_pending(current) && !failed) {
+ bool purgeable, intr = false;
+
+ xe_bo_lock(bo, NULL);
+
+ /* xe_tt->purgeable is cleared on validate. */
+ xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+ purgeable = xe_tt->purgeable;
+ do {
+ ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
+ if (ret == -EINTR)
+ intr = true;
+ } while (ret == -EINTR && !signal_pending(current));
+
+ if (!ret && !purgeable)
+ failed = shrink_test_verify(test, bo, count, &prng, link);
+
+ xe_bo_unlock(bo);
+ if (ret) {
+ KUNIT_FAIL(test, "Validation failed: %pe\n",
+ ERR_PTR(ret));
+ failed = true;
+ } else if (intr) {
+ interrupted++;
+ } else {
+ successful++;
+ }
+ }
+ xe_bo_put(link->bo);
+ list_del(&link->link);
+ kfree(link);
+ }
+ kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
+ interrupted, successful);
+
+ return 0;
+}
+
+static void xe_bo_shrink_kunit(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ shrink_test_run_device(xe);
+}
+
static struct kunit_case xe_bo_tests[] = {
KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
+ {.speed = KUNIT_SPEED_SLOW}),
{}
};
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 79be73b4a02b..ea932c051cc7 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -55,7 +55,7 @@ static void read_l3cc_table(struct xe_gt *gt,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i >> 1));
mocs_dbg(gt, "reg_val=0x%x\n", reg_val);
} else {
@@ -94,7 +94,7 @@ static void read_mocs_table(struct xe_gt *gt,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
mocs_expected = get_entry_control(info, i);
mocs = reg_val;
diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
index e22bbf57fca7..04d6b95c6d87 100644
--- a/drivers/gpu/drm/xe/xe_assert.h
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -10,7 +10,7 @@
#include <drm/drm_print.h>
-#include "xe_device_types.h"
+#include "xe_gt_types.h"
#include "xe_step.h"
/**
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index f379df3a12bf..5e8f60a8d431 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -283,6 +283,8 @@ struct xe_ttm_tt {
struct device *dev;
struct sg_table sgt;
struct sg_table *sg;
+ /** @purgeable: Whether the content of the pages of @ttm is purgeable. */
+ bool purgeable;
};
static int xe_tt_map_sg(struct ttm_tt *tt)
@@ -468,7 +470,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.offset += vram->io_start;
mem->bus.is_iomem = true;
-#if !defined(CONFIG_X86)
+#if !IS_ENABLED(CONFIG_X86)
mem->bus.caching = ttm_write_combined;
#endif
return 0;
@@ -680,8 +682,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
(ttm->page_flags & TTM_TT_FLAG_SWAPPED));
- move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) :
- (!mem_type_is_vram(old_mem_type) && !tt_has_data);
+ move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
+ (!mem_type_is_vram(old_mem_type) && !tt_has_data));
needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
(!ttm && ttm_bo->type == ttm_bo_type_device);
@@ -761,7 +763,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
if (xe_rpm_reclaim_safe(xe)) {
/*
* We might be called through swapout in the validation path of
- * another TTM device, so unconditionally acquire rpm here.
+ * another TTM device, so acquire rpm here.
*/
xe_pm_runtime_get(xe);
} else {
@@ -1082,6 +1084,33 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
}
}
+static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
+{
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+
+ if (ttm_bo->ttm) {
+ struct ttm_placement place = {};
+ int ret = ttm_bo_validate(ttm_bo, &place, ctx);
+
+ drm_WARN_ON(&xe->drm, ret);
+ }
+}
+
+static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
+{
+ struct ttm_operation_ctx ctx = {
+ .interruptible = false
+ };
+
+ if (ttm_bo->ttm) {
+ struct xe_ttm_tt *xe_tt =
+ container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
+
+ if (xe_tt->purgeable)
+ xe_ttm_bo_purge(ttm_bo, &ctx);
+ }
+}
+
const struct ttm_device_funcs xe_ttm_funcs = {
.ttm_tt_create = xe_ttm_tt_create,
.ttm_tt_populate = xe_ttm_tt_populate,
@@ -1094,6 +1123,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
.release_notify = xe_ttm_bo_release_notify,
.eviction_valuable = ttm_bo_eviction_valuable,
.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
+ .swap_notify = xe_ttm_bo_swap_notify,
};
static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 70d4e4d46c3c..1940cd3229f4 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -6,6 +6,7 @@
#include "xe_device.h"
#include <linux/delay.h>
+#include <linux/fault-inject.h>
#include <linux/units.h>
#include <drm/drm_aperture.h>
@@ -171,10 +172,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
xe_exec_queue_kill(q);
xe_exec_queue_put(q);
}
- mutex_lock(&xef->vm.lock);
xa_for_each(&xef->vm.xa, idx, vm)
xe_vm_close_and_put(vm);
- mutex_unlock(&xef->vm.lock);
xe_file_put(xef);
@@ -298,6 +297,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
if (xe->unordered_wq)
destroy_workqueue(xe->unordered_wq);
+ if (xe->destroy_wq)
+ destroy_workqueue(xe->destroy_wq);
+
ttm_device_fini(&xe->ttm);
}
@@ -336,9 +338,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
init_waitqueue_head(&xe->ufence_wq);
- err = drmm_mutex_init(&xe->drm, &xe->usm.lock);
- if (err)
- goto err;
+ init_rwsem(&xe->usm.lock);
xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
@@ -363,8 +363,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+ xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
if (!xe->ordered_wq || !xe->unordered_wq ||
- !xe->preempt_fence_wq) {
+ !xe->preempt_fence_wq || !xe->destroy_wq) {
/*
* Cleanup done in xe_device_destroy via
* drmm_add_action_or_reset register above
@@ -383,6 +384,12 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
err:
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
+
+static bool xe_driver_flr_disabled(struct xe_device *xe)
+{
+ return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
+}
/*
* The driver-initiated FLR is the highest level of reset that we can trigger
@@ -397,17 +404,12 @@ err:
* if/when a new instance of i915 is bound to the device it will do a full
* re-init anyway.
*/
-static void xe_driver_flr(struct xe_device *xe)
+static void __xe_driver_flr(struct xe_device *xe)
{
const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
int ret;
- if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
- drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
- return;
- }
-
drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
/*
@@ -419,25 +421,25 @@ static void xe_driver_flr(struct xe_device *xe)
* is still pending (unless the HW is totally dead), but better to be
* safe in case something unexpected happens
*/
- ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
if (ret) {
drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
return;
}
- xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
/* Trigger the actual Driver-FLR */
- xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
+ xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR);
/* Wait for hardware teardown to complete */
- ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+ ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
if (ret) {
drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
return;
}
/* Wait for hardware/firmware re-init to complete */
- ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+ ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
flr_timeout, NULL, false);
if (ret) {
drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
@@ -445,7 +447,17 @@ static void xe_driver_flr(struct xe_device *xe)
}
/* Clear sticky completion status */
- xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+ xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr(struct xe_device *xe)
+{
+ if (xe_driver_flr_disabled(xe)) {
+ drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+ return;
+ }
+
+ __xe_driver_flr(xe);
}
static void xe_driver_flr_fini(void *arg)
@@ -488,16 +500,15 @@ mask_err:
return err;
}
-static bool verify_lmem_ready(struct xe_gt *gt)
+static bool verify_lmem_ready(struct xe_device *xe)
{
- u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
+ u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
return !!val;
}
static int wait_for_lmem_ready(struct xe_device *xe)
{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
unsigned long timeout, start;
if (!IS_DGFX(xe))
@@ -506,7 +517,7 @@ static int wait_for_lmem_ready(struct xe_device *xe)
if (IS_SRIOV_VF(xe))
return 0;
- if (verify_lmem_ready(gt))
+ if (verify_lmem_ready(xe))
return 0;
drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
@@ -535,13 +546,14 @@ static int wait_for_lmem_ready(struct xe_device *xe)
msleep(20);
- } while (!verify_lmem_ready(gt));
+ } while (!verify_lmem_ready(xe));
drm_dbg(&xe->drm, "lmem ready after %ums",
jiffies_to_msecs(jiffies - start));
return 0;
}
+ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
static void update_device_info(struct xe_device *xe)
{
@@ -589,15 +601,17 @@ int xe_device_probe_early(struct xe_device *xe)
return 0;
}
-static int xe_device_set_has_flat_ccs(struct xe_device *xe)
+static int probe_has_flat_ccs(struct xe_device *xe)
{
+ struct xe_gt *gt;
u32 reg;
int err;
+ /* Always enabled/disabled, no runtime check to do */
if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
return 0;
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ gt = xe_root_mmio_gt(xe);
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
@@ -646,6 +660,13 @@ int xe_device_probe(struct xe_device *xe)
err = xe_gt_init_early(gt);
if (err)
return err;
+
+ /*
+ * Only after this point can GT-specific MMIO operations
+ * (including things like communication with the GuC)
+ * be performed.
+ */
+ xe_gt_mmio_init(gt);
}
for_each_tile(tile, xe, id) {
@@ -661,11 +682,9 @@ int xe_device_probe(struct xe_device *xe)
err = xe_ggtt_init_early(tile->mem.ggtt);
if (err)
return err;
- if (IS_SRIOV_VF(xe)) {
- err = xe_memirq_init(&tile->sriov.vf.memirq);
- if (err)
- return err;
- }
+ err = xe_memirq_init(&tile->memirq);
+ if (err)
+ return err;
}
for_each_gt(gt, xe, id) {
@@ -689,7 +708,7 @@ int xe_device_probe(struct xe_device *xe)
if (err)
goto err;
- err = xe_device_set_has_flat_ccs(xe);
+ err = probe_has_flat_ccs(xe);
if (err)
goto err;
@@ -799,6 +818,24 @@ void xe_device_remove(struct xe_device *xe)
void xe_device_shutdown(struct xe_device *xe)
{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_dbg(&xe->drm, "Shutting down device\n");
+
+ if (xe_driver_flr_disabled(xe)) {
+ xe_display_pm_shutdown(xe);
+
+ xe_irq_suspend(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_shutdown(gt);
+
+ xe_display_pm_shutdown_late(xe);
+ } else {
+ /* BOOM! */
+ __xe_driver_flr(xe);
+ }
}
/**
@@ -812,11 +849,9 @@ void xe_device_shutdown(struct xe_device *xe)
*/
void xe_device_wmb(struct xe_device *xe)
{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
-
wmb();
if (IS_DGFX(xe))
- xe_mmio_write32(gt, VF_CAP_REG, 0);
+ xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
}
/**
@@ -857,7 +892,7 @@ void xe_device_td_flush(struct xe_device *xe)
if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
return;
- xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+ xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
/*
* FIXME: We can likely do better here with our choice of
* timeout. Currently we just assume the worst case, i.e. 150us,
@@ -865,7 +900,7 @@ void xe_device_td_flush(struct xe_device *xe)
* scenario on current platforms if all cache entries are
* transient and need to be flushed..
*/
- if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
+ if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
150, NULL, false))
xe_gt_err_once(gt, "TD flush timeout\n");
@@ -888,9 +923,9 @@ void xe_device_l2_flush(struct xe_device *xe)
return;
spin_lock(&gt->global_invl_lock);
- xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
- if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 894f04770454..4c3f0ebe78a9 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -9,6 +9,8 @@
#include <drm/drm_util.h>
#include "xe_device_types.h"
+#include "xe_gt_types.h"
+#include "xe_sriov.h"
static inline struct xe_device *to_xe_device(const struct drm_device *dev)
{
@@ -138,7 +140,7 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe)
static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
{
- return &gt->mmio.fw;
+ return &gt->pm.fw;
}
void xe_device_assert_mem_access(struct xe_device *xe);
@@ -153,11 +155,22 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
return xe->info.has_sriov;
}
+static inline bool xe_device_has_msix(struct xe_device *xe)
+{
+ /* TODO: change this when MSI-X support is fully integrated */
+ return false;
+}
+
static inline bool xe_device_has_memirq(struct xe_device *xe)
{
return GRAPHICS_VERx100(xe) >= 1250;
}
+static inline bool xe_device_uses_memirq(struct xe_device *xe)
+{
+ return xe_device_has_memirq(xe) && (IS_SRIOV_VF(xe) || xe_device_has_msix(xe));
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index ec7eb7811126..85bede4dd646 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -14,7 +14,6 @@
#include "xe_devcoredump_types.h"
#include "xe_heci_gsc.h"
-#include "xe_gt_types.h"
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
#include "xe_oa.h"
@@ -108,6 +107,45 @@ struct xe_mem_region {
};
/**
+ * struct xe_mmio - register mmio structure
+ *
+ * Represents an MMIO region that the CPU may use to access registers. A
+ * region may share its IO map with other regions (e.g., all GTs within a
+ * tile share the same map with their parent tile, but represent different
+ * subregions of the overall IO space).
+ */
+struct xe_mmio {
+ /** @tile: Backpointer to tile, used for tracing */
+ struct xe_tile *tile;
+
+ /** @regs: Map used to access registers. */
+ void __iomem *regs;
+
+ /**
+ * @sriov_vf_gt: Backpointer to GT.
+ *
+ * This pointer is only set for GT MMIO regions and only when running
+ * as an SRIOV VF structure
+ */
+ struct xe_gt *sriov_vf_gt;
+
+ /**
+ * @regs_size: Length of the register region within the map.
+ *
+ * The size of the iomap set in *regs is generally larger than the
+ * register mmio space since it includes unused regions and/or
+ * non-register regions such as the GGTT PTEs.
+ */
+ size_t regs_size;
+
+ /** @adj_limit: adjust MMIO address if address is below this value */
+ u32 adj_limit;
+
+ /** @adj_offset: offset to add to MMIO address when adjusting */
+ u32 adj_offset;
+};
+
+/**
* struct xe_tile - hardware tile structure
*
* From a driver perspective, a "tile" is effectively a complete GPU, containing
@@ -148,26 +186,14 @@ struct xe_tile {
* * 4MB-8MB: reserved
* * 8MB-16MB: global GTT
*/
- struct {
- /** @mmio.size: size of tile's MMIO space */
- size_t size;
-
- /** @mmio.regs: pointer to tile's MMIO space (starting with registers) */
- void __iomem *regs;
- } mmio;
+ struct xe_mmio mmio;
/**
* @mmio_ext: MMIO-extension info for a tile.
*
* Each tile has its own additional 256MB (28-bit) MMIO-extension space.
*/
- struct {
- /** @mmio_ext.size: size of tile's additional MMIO-extension space */
- size_t size;
-
- /** @mmio_ext.regs: pointer to tile's additional MMIO-extension space */
- void __iomem *regs;
- } mmio_ext;
+ struct xe_mmio mmio_ext;
/** @mem: memory management info for tile */
struct {
@@ -200,14 +226,14 @@ struct xe_tile {
struct xe_lmtt lmtt;
} pf;
struct {
- /** @sriov.vf.memirq: Memory Based Interrupts. */
- struct xe_memirq memirq;
-
/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
struct xe_ggtt_node *ggtt_balloon[2];
} vf;
} sriov;
+ /** @memirq: Memory Based Interrupts. */
+ struct xe_memirq memirq;
+
/** @pcode: tile's PCODE */
struct {
/** @pcode.lock: protecting tile's PCODE mailbox data */
@@ -369,7 +395,7 @@ struct xe_device {
/** @usm.next_asid: next ASID, used to cyclical alloc asids */
u32 next_asid;
/** @usm.lock: protects UM state */
- struct mutex lock;
+ struct rw_semaphore lock;
} usm;
/** @pinned: pinned BO state */
@@ -396,6 +422,9 @@ struct xe_device {
/** @unordered_wq: used to serialize unordered work, mostly display */
struct workqueue_struct *unordered_wq;
+ /** @destroy_wq: used to serialize user destroy work, like queue */
+ struct workqueue_struct *destroy_wq;
+
/** @tiles: device tiles */
struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
@@ -567,15 +596,23 @@ struct xe_file {
struct {
/** @vm.xe: xarray to store VMs */
struct xarray xa;
- /** @vm.lock: protects file VM state */
+ /**
+ * @vm.lock: Protects VM lookup + reference and removal a from
+ * file xarray. Not an intended to be an outer lock which does
+ * thing while being held.
+ */
struct mutex lock;
} vm;
/** @exec_queue: Submission exec queue state for file */
struct {
- /** @exec_queue.xe: xarray to store engines */
+ /** @exec_queue.xa: xarray to store exece queues */
struct xarray xa;
- /** @exec_queue.lock: protects file engine state */
+ /**
+ * @exec_queue.lock: Protects exec queue lookup + reference and
+ * removal a frommfile xarray. Not an intended to be an outer
+ * lock which does thing while being held.
+ */
struct mutex lock;
} exec_queue;
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index c4add8b38bbd..fb52a23e28f8 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -283,8 +283,15 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
- xa_for_each(&xef->exec_queue.xa, i, q)
+ xa_for_each(&xef->exec_queue.xa, i, q) {
+ xe_exec_queue_get(q);
+ mutex_unlock(&xef->exec_queue.lock);
+
xe_exec_queue_update_run_ticks(q);
+
+ mutex_lock(&xef->exec_queue.lock);
+ xe_exec_queue_put(q);
+ }
mutex_unlock(&xef->exec_queue.lock);
/* Get the total GPU cycles */
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 7f28b7fc68d5..d098d2dd1b2d 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -635,14 +635,14 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
}
}
- mutex_lock(&xef->exec_queue.lock);
+ q->xef = xe_file_get(xef);
+
+ /* user id alloc must always be last in ioctl to prevent UAF */
err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
- mutex_unlock(&xef->exec_queue.lock);
if (err)
goto kill_exec_queue;
args->exec_queue_id = id;
- q->xef = xe_file_get(xef);
return 0;
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 6a59165b9569..f3b71fe7a96d 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -44,6 +44,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
u32 ctx_id)
{
struct xe_gt *gt = hwe->gt;
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_device *xe = gt_to_xe(gt);
u64 lrc_desc;
@@ -58,7 +59,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
}
if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
- xe_mmio_write32(hwe->gt, RCU_MODE,
+ xe_mmio_write32(mmio, RCU_MODE,
_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
@@ -76,17 +77,17 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
*/
wmb();
- xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
xe_bo_ggtt_addr(hwe->hwsp));
- xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
- xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
+ xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
+ xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base),
_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
- xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
lower_32_bits(lrc_desc));
- xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
upper_32_bits(lrc_desc));
- xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
+ xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
EL_CTRL_LOAD);
}
@@ -168,8 +169,8 @@ static u64 read_execlist_status(struct xe_hw_engine *hwe)
struct xe_gt *gt = hwe->gt;
u32 hi, lo;
- lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
- hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
+ lo = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
+ hi = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
return lo | (u64)hi << 32;
}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index b263fff15273..a64c14757c84 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -100,7 +100,7 @@ static void __domain_ctl(struct xe_gt *gt, struct xe_force_wake_domain *domain,
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
- xe_mmio_write32(gt, domain->reg_ctl, domain->mask | (wake ? domain->val : 0));
+ xe_mmio_write32(&gt->mmio, domain->reg_ctl, domain->mask | (wake ? domain->val : 0));
}
static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake)
@@ -111,7 +111,7 @@ static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain,
if (IS_SRIOV_VF(gt_to_xe(gt)))
return 0;
- ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, wake ? domain->val : 0,
+ ret = xe_mmio_wait32(&gt->mmio, domain->reg_ack, domain->val, wake ? domain->val : 0,
XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
&value, true);
if (ret)
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 2895f154654c..47bfd9d2635d 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -5,6 +5,7 @@
#include "xe_ggtt.h"
+#include <linux/fault-inject.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sizes.h>
@@ -107,8 +108,10 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
{
- struct xe_gt *gt = XE_WA(ggtt->tile->primary_gt, 22019338487) ? ggtt->tile->primary_gt :
- ggtt->tile->media_gt;
+ struct xe_tile *tile = ggtt->tile;
+ struct xe_gt *affected_gt = XE_WA(tile->primary_gt, 22019338487) ?
+ tile->primary_gt : tile->media_gt;
+ struct xe_mmio *mmio = &affected_gt->mmio;
u32 max_gtt_writes = XE_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63;
/*
* Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit
@@ -118,7 +121,7 @@ static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
lockdep_assert_held(&ggtt->lock);
if ((++ggtt->access_count % max_gtt_writes) == 0) {
- xe_mmio_write32(gt, GMD_ID, 0x0);
+ xe_mmio_write32(mmio, GMD_ID, 0x0);
ggtt->access_count = 0;
}
}
@@ -262,6 +265,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */
static void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
index c518d1d16d82..50361b4638f9 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
@@ -90,6 +90,11 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
cancel_work_sync(&sched->work_process_msg);
}
+void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched)
+{
+ drm_sched_resume_timeout(&sched->base, sched->base.timeout);
+}
+
void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
struct xe_sched_msg *msg)
{
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index cee9c6809fc0..5ad5629a6c60 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -22,6 +22,8 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched);
void xe_sched_submission_start(struct xe_gpu_scheduler *sched);
void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
+void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched);
+
void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
struct xe_sched_msg *msg);
void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched,
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 6fbea70d3d36..783b09bf3681 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -34,6 +34,7 @@
#include "instructions/xe_gsc_commands.h"
#include "regs/xe_gsc_regs.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_irq_regs.h"
static struct xe_gt *
gsc_to_gt(struct xe_gsc *gsc)
@@ -179,7 +180,7 @@ out_bo:
static int gsc_fw_is_loaded(struct xe_gt *gt)
{
- return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
+ return xe_mmio_read32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
HECI1_FWSTS1_INIT_COMPLETE;
}
@@ -190,7 +191,7 @@ static int gsc_fw_wait(struct xe_gt *gt)
* executed by the GSCCS. To account for possible submission delays or
* other issues, we use a 500ms timeout in the wait here.
*/
- return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
+ return xe_mmio_wait32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
HECI1_FWSTS1_INIT_COMPLETE,
HECI1_FWSTS1_INIT_COMPLETE,
500 * USEC_PER_MSEC, NULL, false);
@@ -330,7 +331,7 @@ static int gsc_er_complete(struct xe_gt *gt)
* so in that scenario we're always guaranteed to find the correct
* value.
*/
- er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;
+ er_status = xe_mmio_read32(&gt->mmio, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;
if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) {
/*
@@ -581,11 +582,11 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
return;
- xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
+ xe_mmio_rmw32(&gt->mmio, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
if (prep) {
/* make sure the reset bit is clear when writing the CSR reg */
- xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
+ xe_mmio_rmw32(&gt->mmio, HECI_H_CSR(MTL_GSC_HECI2_BASE),
HECI_H_CSR_RST, HECI_H_CSR_IG);
msleep(200);
}
@@ -599,6 +600,7 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
{
struct xe_gt *gt = gsc_to_gt(gsc);
+ struct xe_mmio *mmio = &gt->mmio;
int err;
xe_uc_fw_print(&gsc->fw, p);
@@ -613,12 +615,12 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
return;
drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
- xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS2(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS3(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS4(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS5(MTL_GSC_HECI1_BASE)),
- xe_mmio_read32(gt, HECI_FWSTS6(MTL_GSC_HECI1_BASE)));
+ xe_mmio_read32(mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS2(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS3(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS4(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS5(MTL_GSC_HECI1_BASE)),
+ xe_mmio_read32(mmio, HECI_FWSTS6(MTL_GSC_HECI1_BASE)));
xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 2d6ea8c01445..6d89c22ae811 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -65,7 +65,7 @@ gsc_to_gt(struct xe_gsc *gsc)
bool xe_gsc_proxy_init_done(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
- u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
+ u32 fwsts1 = xe_mmio_read32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fwsts1) ==
HECI1_FWSTS1_PROXY_STATE_NORMAL;
@@ -78,7 +78,7 @@ static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set)
/* make sure we never accidentally write the RST bit */
clr |= HECI_H_CSR_RST;
- xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set);
+ xe_mmio_rmw32(&gt->mmio, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set);
}
static void gsc_proxy_irq_clear(struct xe_gsc *gsc)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index f0dc2bf24c7b..2d5e78311b76 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -108,7 +108,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
return;
if (!xe_gt_is_media_type(gt)) {
- xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
+ xe_mmio_write32(&gt->mmio, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg |= CG_DIS_CNTLBUS;
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
@@ -245,7 +245,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
else if (entry->clr_bits + 1)
val = (reg.mcr ?
xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
- xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+ xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
else
val = 0;
@@ -440,7 +440,7 @@ static int gt_fw_domain_init(struct xe_gt *gt)
* Stash hardware-reported version. Since this register does not exist
* on pre-MTL platforms, reading it there will (correctly) return 0.
*/
- gt->info.gmdid = xe_mmio_read32(gt, GMD_ID);
+ gt->info.gmdid = xe_mmio_read32(&gt->mmio, GMD_ID);
err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
XE_WARN_ON(err);
@@ -623,6 +623,30 @@ int xe_gt_init(struct xe_gt *gt)
return 0;
}
+/**
+ * xe_gt_mmio_init() - Initialize GT's MMIO access
+ * @gt: the GT object
+ *
+ * Initialize GT's MMIO accessor, which will be used to access registers inside
+ * this GT.
+ */
+void xe_gt_mmio_init(struct xe_gt *gt)
+{
+ struct xe_tile *tile = gt_to_tile(gt);
+
+ gt->mmio.regs = tile->mmio.regs;
+ gt->mmio.regs_size = tile->mmio.regs_size;
+ gt->mmio.tile = tile;
+
+ if (gt->info.type == XE_GT_TYPE_MEDIA) {
+ gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
+ gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
+ }
+
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ gt->mmio.sriov_vf_gt = gt;
+}
+
void xe_gt_record_user_engines(struct xe_gt *gt)
{
struct xe_hw_engine *hwe;
@@ -650,8 +674,8 @@ static int do_gt_reset(struct xe_gt *gt)
xe_gsc_wa_14015076503(gt, true);
- xe_mmio_write32(gt, GDRST, GRDOM_FULL);
- err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
+ xe_mmio_write32(&gt->mmio, GDRST, GRDOM_FULL);
+ err = xe_mmio_wait32(&gt->mmio, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
if (err)
xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
ERR_PTR(err));
@@ -862,6 +886,13 @@ err_msg:
return err;
}
+void xe_gt_shutdown(struct xe_gt *gt)
+{
+ xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ do_gt_reset(gt);
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+}
+
/**
* xe_gt_sanitize_freq() - Restore saved frequencies if necessary.
* @gt: the GT object
@@ -874,7 +905,9 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
int ret = 0;
if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
- xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) && XE_WA(gt, 22019338487))
+ xe_uc_fw_is_loaded(&gt->uc.gsc.fw) ||
+ xe_uc_fw_is_in_error_state(&gt->uc.gsc.fw)) &&
+ XE_WA(gt, 22019338487))
ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
return ret;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index ee138e9768a2..82b9b7f82fca 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -31,6 +31,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
int xe_gt_init_hwconfig(struct xe_gt *gt);
int xe_gt_init_early(struct xe_gt *gt);
int xe_gt_init(struct xe_gt *gt);
+void xe_gt_mmio_init(struct xe_gt *gt);
void xe_gt_declare_wedged(struct xe_gt *gt);
int xe_gt_record_default_lrcs(struct xe_gt *gt);
@@ -48,6 +49,7 @@ void xe_gt_record_user_engines(struct xe_gt *gt);
void xe_gt_suspend_prepare(struct xe_gt *gt);
int xe_gt_suspend(struct xe_gt *gt);
+void xe_gt_shutdown(struct xe_gt *gt);
int xe_gt_resume(struct xe_gt *gt);
void xe_gt_reset_async(struct xe_gt *gt);
void xe_gt_sanitize(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index d2e4dc3aaf61..9360ac4de489 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -68,7 +68,7 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
}
}
- xe_mmio_write32(gt, CCS_MODE, mode);
+ xe_mmio_write32(&gt->mmio, CCS_MODE, mode);
xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
mode, config, num_engines, num_slices);
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 86c2d62b4bdc..cc2ae159298e 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -17,7 +17,7 @@
static u32 read_reference_ts_freq(struct xe_gt *gt)
{
- u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE);
+ u32 ts_override = xe_mmio_read32(&gt->mmio, TIMESTAMP_OVERRIDE);
u32 base_freq, frac_freq;
base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK,
@@ -57,7 +57,7 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
int xe_gt_clock_init(struct xe_gt *gt)
{
- u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE);
+ u32 ctc_reg = xe_mmio_read32(&gt->mmio, CTC_MODE);
u32 freq = 0;
/* Assuming gen11+ so assert this assumption is correct */
@@ -66,7 +66,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
freq = read_reference_ts_freq(gt);
} else {
- u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0);
+ u32 c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
freq = get_crystal_clock_freq(c0);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 8f95d3a5949b..cbc43973ff7e 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -15,6 +15,7 @@
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
+#include "xe_gt_idle.h"
#include "xe_gt_sriov_pf_debugfs.h"
#include "xe_gt_sriov_vf_debugfs.h"
#include "xe_gt_stats.h"
@@ -109,6 +110,17 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
return 0;
}
+static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
+{
+ int ret;
+
+ xe_pm_runtime_get(gt_to_xe(gt));
+ ret = xe_gt_idle_pg_print(gt, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
+
+ return ret;
+}
+
static int force_reset(struct xe_gt *gt, struct drm_printer *p)
{
xe_pm_runtime_get(gt_to_xe(gt));
@@ -288,6 +300,7 @@ static const struct drm_info_list debugfs_list[] = {
{"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
{"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
{"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
+ {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info},
{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
{"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
{"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 68a5778b4319..6bd39b2c5003 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -11,9 +11,9 @@
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
-#include "xe_device_types.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle.h"
+#include "xe_gt_types.h"
#include "xe_guc_pc.h"
#include "xe_pm.h"
@@ -237,11 +237,11 @@ int xe_gt_freq_init(struct xe_gt *gt)
if (!gt->freq)
return -ENOMEM;
- err = devm_add_action(xe->drm.dev, freq_fini, gt->freq);
+ err = sysfs_create_files(gt->freq, freq_attrs);
if (err)
return err;
- err = sysfs_create_files(gt->freq, freq_attrs);
+ err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 67aba4140510..746812aee8ff 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -98,7 +98,9 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency)
void xe_gt_idle_enable_pg(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- u32 pg_enable;
+ struct xe_gt_idle *gtidle = &gt->gtidle;
+ struct xe_mmio *mmio = &gt->mmio;
+ u32 vcs_mask, vecs_mask;
int i, j;
if (IS_SRIOV_VF(xe))
@@ -110,12 +112,19 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
xe_device_assert_mem_access(gt_to_xe(gt));
- pg_enable = RENDER_POWERGATE_ENABLE | MEDIA_POWERGATE_ENABLE;
+ vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE);
+ vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
+
+ if (vcs_mask || vecs_mask)
+ gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE;
+
+ if (!xe_gt_is_media_type(gt))
+ gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
if ((gt->info.engine_mask & BIT(i)))
- pg_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
- VDN_MFXVDENC_POWERGATE_ENABLE(j));
+ gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
+ VDN_MFXVDENC_POWERGATE_ENABLE(j));
}
XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
@@ -124,25 +133,113 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
* GuC sets the hysteresis value when GuC PC is enabled
* else set it to 25 (25 * 1.28us)
*/
- xe_mmio_write32(gt, MEDIA_POWERGATE_IDLE_HYSTERESIS, 25);
- xe_mmio_write32(gt, RENDER_POWERGATE_IDLE_HYSTERESIS, 25);
+ xe_mmio_write32(mmio, MEDIA_POWERGATE_IDLE_HYSTERESIS, 25);
+ xe_mmio_write32(mmio, RENDER_POWERGATE_IDLE_HYSTERESIS, 25);
}
- xe_mmio_write32(gt, POWERGATE_ENABLE, pg_enable);
+ xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
}
void xe_gt_idle_disable_pg(struct xe_gt *gt)
{
+ struct xe_gt_idle *gtidle = &gt->gtidle;
+
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
xe_device_assert_mem_access(gt_to_xe(gt));
+ gtidle->powergate_enable = 0;
+
XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ xe_mmio_write32(&gt->mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+}
- xe_mmio_write32(gt, POWERGATE_ENABLE, 0);
+/**
+ * xe_gt_idle_pg_print - Xe powergating info
+ * @gt: GT object
+ * @p: drm_printer.
+ *
+ * This function prints the powergating information
+ *
+ * Return: 0 on success, negative error code otherwise
+ */
+int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
+{
+ struct xe_gt_idle *gtidle = &gt->gtidle;
+ struct xe_device *xe = gt_to_xe(gt);
+ enum xe_gt_idle_state state;
+ u32 pg_enabled, pg_status = 0;
+ u32 vcs_mask, vecs_mask;
+ int err, n;
+ /*
+ * Media Slices
+ *
+ * Slice 0: VCS0, VCS1, VECS0
+ * Slice 1: VCS2, VCS3, VECS1
+ * Slice 2: VCS4, VCS5, VECS2
+ * Slice 3: VCS6, VCS7, VECS3
+ */
+ static const struct {
+ u64 engines;
+ u32 status_bit;
+ } media_slices[] = {
+ {(BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) |
+ BIT(XE_HW_ENGINE_VECS0)), MEDIA_SLICE0_AWAKE_STATUS},
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+ {(BIT(XE_HW_ENGINE_VCS2) | BIT(XE_HW_ENGINE_VCS3) |
+ BIT(XE_HW_ENGINE_VECS1)), MEDIA_SLICE1_AWAKE_STATUS},
+
+ {(BIT(XE_HW_ENGINE_VCS4) | BIT(XE_HW_ENGINE_VCS5) |
+ BIT(XE_HW_ENGINE_VECS2)), MEDIA_SLICE2_AWAKE_STATUS},
+
+ {(BIT(XE_HW_ENGINE_VCS6) | BIT(XE_HW_ENGINE_VCS7) |
+ BIT(XE_HW_ENGINE_VECS3)), MEDIA_SLICE3_AWAKE_STATUS},
+ };
+
+ if (xe->info.platform == XE_PVC) {
+ drm_printf(p, "Power Gating not supported\n");
+ return 0;
+ }
+
+ state = gtidle->idle_status(gtidle_to_pc(gtidle));
+ pg_enabled = gtidle->powergate_enable;
+
+ /* Do not wake the GT to read powergating status */
+ if (state != GT_IDLE_C6) {
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+
+ pg_enabled = xe_mmio_read32(&gt->mmio, POWERGATE_ENABLE);
+ pg_status = xe_mmio_read32(&gt->mmio, POWERGATE_DOMAIN_STATUS);
+
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+ }
+
+ if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) {
+ drm_printf(p, "Render Power Gating Enabled: %s\n",
+ str_yes_no(pg_enabled & RENDER_POWERGATE_ENABLE));
+
+ drm_printf(p, "Render Power Gate Status: %s\n",
+ str_up_down(pg_status & RENDER_AWAKE_STATUS));
+ }
+
+ vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE);
+ vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
+
+ /* Print media CPG status only if media is present */
+ if (vcs_mask || vecs_mask) {
+ drm_printf(p, "Media Power Gating Enabled: %s\n",
+ str_yes_no(pg_enabled & MEDIA_POWERGATE_ENABLE));
+
+ for (n = 0; n < ARRAY_SIZE(media_slices); n++)
+ if (gt->info.engine_mask & media_slices[n].engines)
+ drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n,
+ str_up_down(pg_status & media_slices[n].status_bit));
+ }
+ return 0;
}
static ssize_t name_show(struct device *dev,
@@ -260,9 +357,9 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt)
return;
/* Units of 1280 ns for a total of 5s */
- xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA);
+ xe_mmio_write32(&gt->mmio, RC_IDLE_HYSTERSIS, 0x3B9ACA);
/* Enable RC6 */
- xe_mmio_write32(gt, RC_CONTROL,
+ xe_mmio_write32(&gt->mmio, RC_CONTROL,
RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE);
}
@@ -274,6 +371,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt)
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
- xe_mmio_write32(gt, RC_CONTROL, 0);
- xe_mmio_write32(gt, RC_STATE, 0);
+ xe_mmio_write32(&gt->mmio, RC_CONTROL, 0);
+ xe_mmio_write32(&gt->mmio, RC_STATE, 0);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
index 554447b5d46d..4455a6501cb0 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -8,6 +8,7 @@
#include "xe_gt_idle_types.h"
+struct drm_printer;
struct xe_gt;
int xe_gt_idle_init(struct xe_gt_idle *gtidle);
@@ -15,5 +16,6 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt);
void xe_gt_idle_disable_c6(struct xe_gt *gt);
void xe_gt_idle_enable_pg(struct xe_gt *gt);
void xe_gt_idle_disable_pg(struct xe_gt *gt);
+int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p);
#endif /* _XE_GT_IDLE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h
index f99b447534f3..b8b297a3f884 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h
@@ -23,6 +23,8 @@ enum xe_gt_idle_state {
struct xe_gt_idle {
/** @name: name */
char name[16];
+ /** @powergate_enable: copy of powergate enable bits */
+ u32 powergate_enable;
/** @residency_multiplier: residency multiplier in ns */
u32 residency_multiplier;
/** @cur_residency: raw driver copy of idle residency */
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 7d7bd0be6233..4c0767403881 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -239,11 +239,13 @@ static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = {
static void init_steering_l3bank(struct xe_gt *gt)
{
+ struct xe_mmio *mmio = &gt->mmio;
+
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(mmio, MIRROR_FUSE3));
u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
- xe_mmio_read32(gt, XEHP_FUSE4));
+ xe_mmio_read32(mmio, XEHP_FUSE4));
/*
* Group selects mslice, instance selects bank within mslice.
@@ -254,7 +256,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
bank_mask & BIT(0) ? 0 : 2;
} else if (gt_to_xe(gt)->info.platform == XE_DG2) {
u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(mmio, MIRROR_FUSE3));
u32 bank = __ffs(mslice_mask) * 8;
/*
@@ -266,7 +268,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
gt->steering[L3BANK].instance_target = bank & 0x3;
} else {
u32 fuse = REG_FIELD_GET(L3BANK_MASK,
- ~xe_mmio_read32(gt, MIRROR_FUSE3));
+ ~xe_mmio_read32(mmio, MIRROR_FUSE3));
gt->steering[L3BANK].group_target = 0; /* unused */
gt->steering[L3BANK].instance_target = __ffs(fuse);
@@ -276,7 +278,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
static void init_steering_mslice(struct xe_gt *gt)
{
u32 mask = REG_FIELD_GET(MEML3_EN_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(&gt->mmio, MIRROR_FUSE3));
/*
* mslice registers are valid (not terminated) if either the meml3
@@ -380,7 +382,7 @@ static void init_steering_oaddrm(struct xe_gt *gt)
static void init_steering_sqidi_psmi(struct xe_gt *gt)
{
u32 mask = REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
- xe_mmio_read32(gt, MIRROR_FUSE3));
+ xe_mmio_read32(&gt->mmio, MIRROR_FUSE3));
u32 select = __ffs(mask);
gt->steering[SQIDI_PSMI].group_target = select >> 1;
@@ -439,7 +441,7 @@ void xe_gt_mcr_init(struct xe_gt *gt)
if (gt->info.type == XE_GT_TYPE_MEDIA) {
drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
- if (MEDIA_VER(xe) >= 20) {
+ if (MEDIA_VERx100(xe) >= 1301) {
gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table;
} else {
@@ -494,8 +496,8 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
- xe_mmio_write32(gt, MCFG_MCR_SELECTOR, steer_val);
- xe_mmio_write32(gt, SF_MCR_SELECTOR, steer_val);
+ xe_mmio_write32(&gt->mmio, MCFG_MCR_SELECTOR, steer_val);
+ xe_mmio_write32(&gt->mmio, SF_MCR_SELECTOR, steer_val);
/*
* For GAM registers, all reads should be directed to instance 1
* (unicast reads against other instances are not allowed),
@@ -533,7 +535,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
continue;
for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
- if (xe_mmio_in_range(gt, &gt->steering[type].ranges[i], reg)) {
+ if (xe_mmio_in_range(&gt->mmio, &gt->steering[type].ranges[i], reg)) {
*group = gt->steering[type].group_target;
*instance = gt->steering[type].instance_target;
return true;
@@ -544,7 +546,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges;
if (implicit_ranges)
for (int i = 0; implicit_ranges[i].end > 0; i++)
- if (xe_mmio_in_range(gt, &implicit_ranges[i], reg))
+ if (xe_mmio_in_range(&gt->mmio, &implicit_ranges[i], reg))
return false;
/*
@@ -579,7 +581,7 @@ static void mcr_lock(struct xe_gt *gt) __acquires(&gt->mcr_lock)
* when a read to the relevant register returns 1.
*/
if (GRAPHICS_VERx100(xe) >= 1270)
- ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
+ ret = xe_mmio_wait32(&gt->mmio, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
true);
drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
@@ -589,7 +591,7 @@ static void mcr_unlock(struct xe_gt *gt) __releases(&gt->mcr_lock)
{
/* Release hardware semaphore - this is done by writing 1 to the register */
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
- xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
+ xe_mmio_write32(&gt->mmio, STEER_SEMAPHORE, 0x1);
spin_unlock(&gt->mcr_lock);
}
@@ -603,6 +605,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
u8 rw_flag, int group, int instance, u32 value)
{
const struct xe_reg reg = to_xe_reg(reg_mcr);
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_reg steer_reg;
u32 steer_val, val = 0;
@@ -635,12 +638,12 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
if (rw_flag == MCR_OP_READ)
steer_val |= MCR_MULTICAST;
- xe_mmio_write32(gt, steer_reg, steer_val);
+ xe_mmio_write32(mmio, steer_reg, steer_val);
if (rw_flag == MCR_OP_READ)
- val = xe_mmio_read32(gt, reg);
+ val = xe_mmio_read32(mmio, reg);
else
- xe_mmio_write32(gt, reg, value);
+ xe_mmio_write32(mmio, reg, value);
/*
* If we turned off the multicast bit (during a write) we're required
@@ -649,7 +652,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
* operation.
*/
if (rw_flag == MCR_OP_WRITE)
- xe_mmio_write32(gt, steer_reg, MCR_MULTICAST);
+ xe_mmio_write32(mmio, steer_reg, MCR_MULTICAST);
return val;
}
@@ -684,7 +687,7 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr)
group, instance, 0);
mcr_unlock(gt);
} else {
- val = xe_mmio_read32(gt, reg);
+ val = xe_mmio_read32(&gt->mmio, reg);
}
return val;
@@ -757,7 +760,7 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
* to touch the steering register.
*/
mcr_lock(gt);
- xe_mmio_write32(gt, reg, value);
+ xe_mmio_write32(&gt->mmio, reg, value);
mcr_unlock(gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 00af059a8971..79c426dc2505 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -185,6 +185,21 @@ unlock_dma_resv:
return err;
}
+static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid)
+{
+ struct xe_vm *vm;
+
+ down_read(&xe->usm.lock);
+ vm = xa_load(&xe->usm.asid_to_vm, asid);
+ if (vm && xe_vm_in_fault_mode(vm))
+ xe_vm_get(vm);
+ else
+ vm = ERR_PTR(-EINVAL);
+ up_read(&xe->usm.lock);
+
+ return vm;
+}
+
static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
{
struct xe_device *xe = gt_to_xe(gt);
@@ -197,16 +212,9 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
if (pf->trva_fault)
return -EFAULT;
- /* ASID to VM */
- mutex_lock(&xe->usm.lock);
- vm = xa_load(&xe->usm.asid_to_vm, pf->asid);
- if (vm && xe_vm_in_fault_mode(vm))
- xe_vm_get(vm);
- else
- vm = NULL;
- mutex_unlock(&xe->usm.lock);
- if (!vm)
- return -EINVAL;
+ vm = asid_to_vm(xe, pf->asid);
+ if (IS_ERR(vm))
+ return PTR_ERR(vm);
/*
* TODO: Change to read lock? Using write lock for simplicity.
@@ -548,14 +556,9 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
if (acc->access_type != ACC_TRIGGER)
return -EINVAL;
- /* ASID to VM */
- mutex_lock(&xe->usm.lock);
- vm = xa_load(&xe->usm.asid_to_vm, acc->asid);
- if (vm)
- xe_vm_get(vm);
- mutex_unlock(&xe->usm.lock);
- if (!vm || !xe_vm_in_fault_mode(vm))
- return -EINVAL;
+ vm = asid_to_vm(xe, acc->asid);
+ if (IS_ERR(vm))
+ return PTR_ERR(vm);
down_read(&vm->lock);
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index d6228baaff1e..5dc71394372d 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -8,7 +8,7 @@
#include <drm/drm_print.h>
-#include "xe_device_types.h"
+#include "xe_gt_types.h"
#define xe_gt_printk(_gt, _level, _fmt, ...) \
drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 905f409db74b..e71fc3d2bda2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -5,12 +5,15 @@
#include <drm/drm_managed.h>
+#include "regs/xe_guc_regs.h"
#include "regs/xe_regs.h"
+#include "xe_gt.h"
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_service.h"
#include "xe_mmio.h"
@@ -72,7 +75,7 @@ static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe)
static void pf_enable_ggtt_guest_update(struct xe_gt *gt)
{
- xe_mmio_write32(gt, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN);
+ xe_mmio_write32(&gt->mmio, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN);
}
/**
@@ -87,6 +90,57 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
pf_enable_ggtt_guest_update(gt);
xe_gt_sriov_pf_service_update(gt);
+ xe_gt_sriov_pf_migration_init(gt);
+}
+
+static u32 pf_get_vf_regs_stride(struct xe_device *xe)
+{
+ return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000;
+}
+
+static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride)
+{
+ struct xe_reg pf_reg = vf_reg;
+
+ pf_reg.vf = 0;
+ pf_reg.addr += stride * vfid;
+
+ return pf_reg;
+}
+
+static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt));
+ struct xe_reg scratch;
+ int n, count;
+
+ if (xe_gt_is_media_type(gt)) {
+ count = MED_VF_SW_FLAG_COUNT;
+ for (n = 0; n < count; n++) {
+ scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride);
+ xe_mmio_write32(&gt->mmio, scratch, 0);
+ }
+ } else {
+ count = VF_SW_FLAG_COUNT;
+ for (n = 0; n < count; n++) {
+ scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride);
+ xe_mmio_write32(&gt->mmio, scratch, 0);
+ }
+ }
+}
+
+/**
+ * xe_gt_sriov_pf_sanitize_hw() - Reset hardware state related to a VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ pf_clear_vf_scratch_regs(gt, vfid);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index f0cb726a6919..96fab779a906 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -11,6 +11,7 @@ struct xe_gt;
#ifdef CONFIG_PCI_IOV
int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
+void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
index 02f7328bd6ce..1f50aec3a059 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
@@ -9,9 +9,11 @@
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_sriov_pf_service.h"
#include "xe_gt_sriov_printk.h"
@@ -176,6 +178,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
CASE2STR(PAUSE_SEND_PAUSE);
CASE2STR(PAUSE_WAIT_GUC);
CASE2STR(PAUSE_GUC_DONE);
+ CASE2STR(PAUSE_SAVE_GUC);
CASE2STR(PAUSE_FAILED);
CASE2STR(PAUSED);
CASE2STR(RESUME_WIP);
@@ -415,6 +418,10 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
* : | : /
* : v : /
* : PAUSE_GUC_DONE o-----restart
+ * : | :
+ * : | o---<--busy :
+ * : v / / :
+ * : PAUSE_SAVE_GUC :
* : / :
* : / :
* :....o..............o...............o...........:
@@ -434,6 +441,7 @@ static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
+ pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC);
}
}
@@ -464,12 +472,41 @@ static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
pf_enter_vf_pause_failed(gt, vfid);
}
+static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
+ pf_enter_vf_state_machine_bug(gt, vfid);
+}
+
+static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
+{
+ int err;
+
+ if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
+ return false;
+
+ err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid);
+ if (err) {
+ /* retry if busy */
+ if (err == -EBUSY) {
+ pf_enter_vf_pause_save_guc(gt, vfid);
+ return true;
+ }
+ /* give up on error */
+ if (err == -EIO)
+ pf_enter_vf_mismatch(gt, vfid);
+ }
+
+ pf_enter_vf_pause_completed(gt, vfid);
+ return true;
+}
+
static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
return false;
- pf_enter_vf_pause_completed(gt, vfid);
+ pf_enter_vf_pause_save_guc(gt, vfid);
return true;
}
@@ -1008,7 +1045,7 @@ static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid)
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO))
return false;
- /* XXX: placeholder */
+ xe_gt_sriov_pf_sanitize_hw(gt, vfid);
pf_enter_vf_flr_send_finish(gt, vfid);
return true;
@@ -1338,6 +1375,9 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
if (pf_exit_vf_pause_guc_done(gt, vfid))
return true;
+ if (pf_exit_vf_pause_save_guc(gt, vfid))
+ return true;
+
if (pf_exit_vf_resume_send_resume(gt, vfid))
return true;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
index 11830aafea45..f02f941b4ad2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
@@ -27,6 +27,7 @@
* @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command.
* @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC.
* @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC.
+ * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state.
* @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed.
* @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused.
* @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress.
@@ -56,6 +57,7 @@ enum xe_gt_sriov_control_bits {
XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE,
XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC,
XE_GT_SRIOV_STATE_PAUSE_GUC_DONE,
+ XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC,
XE_GT_SRIOV_STATE_PAUSE_FAILED,
XE_GT_SRIOV_STATE_PAUSED,
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 2290ddaf9594..ccbcf6e572d0 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -17,6 +17,7 @@
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_debugfs.h"
#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_sriov_pf_policy.h"
#include "xe_gt_sriov_pf_service.h"
@@ -312,6 +313,9 @@ static const struct {
{ "stop", xe_gt_sriov_pf_control_stop_vf },
{ "pause", xe_gt_sriov_pf_control_pause_vf },
{ "resume", xe_gt_sriov_pf_control_resume_vf },
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+ { "restore!", xe_gt_sriov_pf_migration_restore_guc_state },
+#endif
};
static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
@@ -375,6 +379,44 @@ static const struct file_operations control_ops = {
.llseek = default_llseek,
};
+/*
+ * /sys/kernel/debug/dri/0/
+ * ├── gt0
+ * │   ├── vf1
+ * │   │   ├── guc_state
+ */
+static ssize_t guc_state_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+
+ return xe_gt_sriov_pf_migration_read_guc_state(gt, vfid, buf, count, pos);
+}
+
+static ssize_t guc_state_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct dentry *parent = dent->d_parent;
+ struct xe_gt *gt = extract_gt(parent);
+ unsigned int vfid = extract_vfid(parent);
+
+ if (*pos)
+ return -EINVAL;
+
+ return xe_gt_sriov_pf_migration_write_guc_state(gt, vfid, buf, count);
+}
+
+static const struct file_operations guc_state_ops = {
+ .owner = THIS_MODULE,
+ .read = guc_state_read,
+ .write = guc_state_write,
+ .llseek = default_llseek,
+};
+
/**
* xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs.
* @gt: the &xe_gt to register
@@ -423,5 +465,12 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
pf_add_config_attrs(gt, vfdentry, VFID(n));
debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops);
+
+ /* for testing/debugging purposes only! */
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ debugfs_create_file("guc_state",
+ IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400,
+ vfdentry, NULL, &guc_state_ops);
+ }
}
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
new file mode 100644
index 000000000000..c712111aa30d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "xe_bo.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_migration.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_sriov.h"
+
+/* Return: number of dwords saved/restored/required or a negative error code on failure */
+static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode,
+ u64 addr, u32 ndwords)
+{
+ u32 request[PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_SAVE_RESTORE_VF) |
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_0_OPCODE, opcode),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_1_VFID, vfid),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_2_ADDR_LO, lower_32_bits(addr)),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_3_ADDR_HI, upper_32_bits(addr)),
+ FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_SIZE, ndwords),
+ };
+
+ return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
+}
+
+/* Return: size of the state in dwords or a negative error code on failure */
+static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ ret = guc_action_vf_save_restore(&gt->uc.guc, vfid, GUC_PF_OPCODE_VF_SAVE, 0, 0);
+ return ret ?: -ENODATA;
+}
+
+/* Return: number of state dwords saved or a negative error code on failure */
+static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
+ void *buff, size_t size)
+{
+ const int ndwords = size / sizeof(u32);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_bo *bo;
+ int ret;
+
+ xe_gt_assert(gt, size % sizeof(u32) == 0);
+ xe_gt_assert(gt, size == ndwords * sizeof(u32));
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE,
+ xe_bo_ggtt_addr(bo), ndwords);
+ if (!ret)
+ ret = -ENODATA;
+ else if (ret > ndwords)
+ ret = -EPROTO;
+ else if (ret > 0)
+ xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32));
+
+ xe_bo_unpin_map_no_vm(bo);
+ return ret;
+}
+
+/* Return: number of state dwords restored or a negative error code on failure */
+static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid,
+ const void *buff, size_t size)
+{
+ const int ndwords = size / sizeof(u32);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_bo *bo;
+ int ret;
+
+ xe_gt_assert(gt, size % sizeof(u32) == 0);
+ xe_gt_assert(gt, size == ndwords * sizeof(u32));
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size);
+
+ ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE,
+ xe_bo_ggtt_addr(bo), ndwords);
+ if (!ret)
+ ret = -ENODATA;
+ else if (ret > ndwords)
+ ret = -EPROTO;
+
+ xe_bo_unpin_map_no_vm(bo);
+ return ret;
+}
+
+static bool pf_migration_supported(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ return gt->sriov.pf.migration.supported;
+}
+
+static struct mutex *pf_migration_mutex(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ return &gt->sriov.pf.migration.snapshot_lock;
+}
+
+static struct xe_gt_sriov_state_snapshot *pf_pick_vf_snapshot(struct xe_gt *gt,
+ unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+ lockdep_assert_held(pf_migration_mutex(gt));
+
+ return &gt->sriov.pf.vfs[vfid].snapshot;
+}
+
+static unsigned int pf_snapshot_index(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+{
+ return container_of(snapshot, struct xe_gt_sriov_metadata, snapshot) - gt->sriov.pf.vfs;
+}
+
+static void pf_free_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ drmm_kfree(&xe->drm, snapshot->guc.buff);
+ snapshot->guc.buff = NULL;
+ snapshot->guc.size = 0;
+}
+
+static int pf_alloc_guc_state(struct xe_gt *gt,
+ struct xe_gt_sriov_state_snapshot *snapshot,
+ size_t size)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ void *p;
+
+ pf_free_guc_state(gt, snapshot);
+
+ if (!size)
+ return -ENODATA;
+
+ if (size % sizeof(u32))
+ return -EINVAL;
+
+ if (size > SZ_2M)
+ return -EFBIG;
+
+ p = drmm_kzalloc(&xe->drm, size, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ snapshot->guc.buff = p;
+ snapshot->guc.size = size;
+ return 0;
+}
+
+static void pf_dump_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+{
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+ unsigned int vfid __maybe_unused = pf_snapshot_index(gt, snapshot);
+
+ xe_gt_sriov_dbg_verbose(gt, "VF%u GuC state is %zu dwords:\n",
+ vfid, snapshot->guc.size / sizeof(u32));
+ print_hex_dump_bytes("state: ", DUMP_PREFIX_OFFSET,
+ snapshot->guc.buff, min(SZ_64, snapshot->guc.size));
+ }
+}
+
+static int pf_save_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
+ size_t size;
+ int ret;
+
+ ret = pf_send_guc_query_vf_state_size(gt, vfid);
+ if (ret < 0)
+ goto fail;
+ size = ret * sizeof(u32);
+ xe_gt_sriov_dbg_verbose(gt, "VF%u state size is %d dwords (%zu bytes)\n", vfid, ret, size);
+
+ ret = pf_alloc_guc_state(gt, snapshot, size);
+ if (ret < 0)
+ goto fail;
+
+ ret = pf_send_guc_save_vf_state(gt, vfid, snapshot->guc.buff, size);
+ if (ret < 0)
+ goto fail;
+ size = ret * sizeof(u32);
+ xe_gt_assert(gt, size);
+ xe_gt_assert(gt, size <= snapshot->guc.size);
+ snapshot->guc.size = size;
+
+ pf_dump_guc_state(gt, snapshot);
+ return 0;
+
+fail:
+ xe_gt_sriov_dbg(gt, "Unable to save VF%u state (%pe)\n", vfid, ERR_PTR(ret));
+ pf_free_guc_state(gt, snapshot);
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_save_guc_state() - Take a GuC VF state snapshot.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ err = pf_save_vf_guc_state(gt, vfid);
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return err;
+}
+
+static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
+ int ret;
+
+ if (!snapshot->guc.size)
+ return -ENODATA;
+
+ xe_gt_sriov_dbg_verbose(gt, "restoring %zu dwords of VF%u GuC state\n",
+ snapshot->guc.size / sizeof(u32), vfid);
+ ret = pf_send_guc_restore_vf_state(gt, vfid, snapshot->guc.buff, snapshot->guc.size);
+ if (ret < 0)
+ goto fail;
+
+ xe_gt_sriov_dbg_verbose(gt, "restored %d dwords of VF%u GuC state\n", ret, vfid);
+ return 0;
+
+fail:
+ xe_gt_sriov_dbg(gt, "Failed to restore VF%u GuC state (%pe)\n", vfid, ERR_PTR(ret));
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_restore_guc_state() - Restore a GuC VF state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ ret = pf_restore_vf_guc_state(gt, vfid);
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return ret;
+}
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * xe_gt_sriov_pf_migration_read_guc_state() - Read a GuC VF state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer to read to
+ * @count: the maximum number of bytes to read
+ * @pos: the current position in the buffer
+ *
+ * This function is for PF only.
+ *
+ * This function reads up to @count bytes from the saved VF GuC state buffer
+ * at offset @pos into the user space address starting at @buf.
+ *
+ * Return: the number of bytes read or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
+ char __user *buf, size_t count, loff_t *pos)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot;
+ ssize_t ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ snapshot = pf_pick_vf_snapshot(gt, vfid);
+ if (snapshot->guc.size)
+ ret = simple_read_from_buffer(buf, count, pos, snapshot->guc.buff,
+ snapshot->guc.size);
+ else
+ ret = -ENODATA;
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_write_guc_state() - Write a GuC VF state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer with GuC VF state
+ * @size: the size of GuC VF state (in bytes)
+ *
+ * This function is for PF only.
+ *
+ * This function reads @size bytes of the VF GuC state stored at user space
+ * address @buf and writes it into a internal VF state buffer.
+ *
+ * Return: the number of bytes used or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
+ const char __user *buf, size_t size)
+{
+ struct xe_gt_sriov_state_snapshot *snapshot;
+ loff_t pos = 0;
+ ssize_t ret;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ mutex_lock(pf_migration_mutex(gt));
+ snapshot = pf_pick_vf_snapshot(gt, vfid);
+ ret = pf_alloc_guc_state(gt, snapshot, size);
+ if (!ret) {
+ ret = simple_write_to_buffer(snapshot->guc.buff, size, &pos, buf, size);
+ if (ret < 0)
+ pf_free_guc_state(gt, snapshot);
+ else
+ pf_dump_guc_state(gt, snapshot);
+ }
+ mutex_unlock(pf_migration_mutex(gt));
+
+ return ret;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static bool pf_check_migration_support(struct xe_gt *gt)
+{
+ /* GuC 70.25 with save/restore v2 is required */
+ xe_gt_assert(gt, GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 25, 0));
+
+ /* XXX: for now this is for feature enabling only */
+ return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_init() - Initialize support for VF migration.
+ * @gt: the &xe_gt
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(xe));
+
+ gt->sriov.pf.migration.supported = pf_check_migration_support(gt);
+
+ if (!pf_migration_supported(gt))
+ return 0;
+
+ err = drmm_mutex_init(&xe->drm, &gt->sriov.pf.migration.snapshot_lock);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
new file mode 100644
index 000000000000..09faeae00ddb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MIGRATION_H_
+#define _XE_GT_SRIOV_PF_MIGRATION_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+
+int xe_gt_sriov_pf_migration_init(struct xe_gt *gt);
+int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid);
+
+#ifdef CONFIG_DEBUG_FS
+ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
+ char __user *buf, size_t count, loff_t *pos);
+ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
+ const char __user *buf, size_t count);
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
new file mode 100644
index 000000000000..1f3110b6d44f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_
+#define _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/**
+ * struct xe_gt_sriov_state_snapshot - GT-level per-VF state snapshot data.
+ *
+ * Used by the PF driver to maintain per-VF migration data.
+ */
+struct xe_gt_sriov_state_snapshot {
+ /** @guc: GuC VF state snapshot */
+ struct {
+ /** @guc.buff: buffer with the VF state */
+ u32 *buff;
+ /** @guc.size: size of the buffer (must be dwords aligned) */
+ u32 size;
+ } guc;
+};
+
+/**
+ * struct xe_gt_sriov_pf_migration - GT-level data.
+ *
+ * Used by the PF driver to maintain non-VF specific per-GT data.
+ */
+struct xe_gt_sriov_pf_migration {
+ /** @supported: indicates whether the feature is supported */
+ bool supported;
+
+ /** @snapshot_lock: protects all VFs snapshots */
+ struct mutex snapshot_lock;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
index 0e23b7ea4f3e..924e75b94aec 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
@@ -237,7 +237,7 @@ static void read_many(struct xe_gt *gt, unsigned int count,
const struct xe_reg *regs, u32 *values)
{
while (count--)
- *values++ = xe_mmio_read32(gt, *regs++);
+ *values++ = xe_mmio_read32(&gt->mmio, *regs++);
}
static void pf_prepare_runtime_info(struct xe_gt *gt)
@@ -402,7 +402,7 @@ static int pf_service_runtime_query(struct xe_gt *gt, u32 start, u32 limit,
for (i = 0; i < count; ++i, ++data) {
addr = runtime->regs[start + i].addr;
- data->offset = xe_mmio_adjusted_addr(gt, addr);
+ data->offset = xe_mmio_adjusted_addr(&gt->mmio, addr);
data->value = runtime->values[start + i];
}
@@ -513,7 +513,7 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p
for (; size--; regs++, values++) {
drm_printf(p, "reg[%#x] = %#x\n",
- xe_mmio_adjusted_addr(gt, regs->addr), *values);
+ xe_mmio_adjusted_addr(&gt->mmio, regs->addr), *values);
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
index 28e1b130bf87..0426b1a77069 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -10,6 +10,7 @@
#include "xe_gt_sriov_pf_config_types.h"
#include "xe_gt_sriov_pf_control_types.h"
+#include "xe_gt_sriov_pf_migration_types.h"
#include "xe_gt_sriov_pf_monitor_types.h"
#include "xe_gt_sriov_pf_policy_types.h"
#include "xe_gt_sriov_pf_service_types.h"
@@ -29,6 +30,9 @@ struct xe_gt_sriov_metadata {
/** @version: negotiated VF/PF ABI version */
struct xe_gt_sriov_pf_service_version version;
+
+ /** @snapshot: snapshot of the VF state data */
+ struct xe_gt_sriov_state_snapshot snapshot;
};
/**
@@ -36,6 +40,7 @@ struct xe_gt_sriov_metadata {
* @service: service data.
* @control: control data.
* @policy: policy data.
+ * @migration: migration data.
* @spare: PF-only provisioning configuration.
* @vfs: metadata for all VFs.
*/
@@ -43,6 +48,7 @@ struct xe_gt_sriov_pf {
struct xe_gt_sriov_pf_service service;
struct xe_gt_sriov_pf_control control;
struct xe_gt_sriov_pf_policy policy;
+ struct xe_gt_sriov_pf_migration migration;
struct xe_gt_sriov_spare_config spare;
struct xe_gt_sriov_metadata *vfs;
};
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 4ebc82e607af..d3baba50f085 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -881,7 +881,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr)
*/
u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
{
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(&gt->mmio, reg.addr);
struct vf_runtime_reg *rr;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
@@ -917,7 +917,7 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
*/
void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
{
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(&gt->mmio, reg.addr);
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
xe_gt_assert(gt, !reg.vf);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
index f3ddcbefc6bc..2ed5b6780d30 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
@@ -33,7 +33,7 @@ static const struct drm_info_list vf_info[] = {
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_vf_print_version,
},
-#if defined(CONFIG_DRM_XE_DEBUG) || defined(CONFIG_DRM_XE_DEBUG_SRIOV)
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) || IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)
{
"runtime_regs",
.show = xe_gt_debugfs_simple_show,
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index a05c3699e8b9..ec2b8246204b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -51,5 +51,5 @@ int xe_gt_sysfs_init(struct xe_gt *gt)
gt->sysfs = &kg->base;
- return devm_add_action(xe->drm.dev, gt_sysfs_fini, gt);
+ return devm_add_action_or_reset(xe->drm.dev, gt_sysfs_fini, gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index 25963e33a383..03b225364101 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -41,9 +41,9 @@ u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
xe_pm_runtime_get(gt_to_xe(gt));
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MEDIA_PERF_LIMIT_REASONS);
else
- reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
+ reg = xe_mmio_read32(&gt->mmio, GT0_PERF_LIMIT_REASONS);
xe_pm_runtime_put(gt_to_xe(gt));
return reg;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index cca9cf536f76..98616de0c5bb 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -274,17 +274,19 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
xe_gt_tlb_invalidation_fence_wait(&fence);
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
+ struct xe_mmio *mmio = &gt->mmio;
+
if (IS_SRIOV_VF(xe))
return 0;
xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
- xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
+ xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
PVC_GUC_TLB_INV_DESC1_INVALIDATE);
- xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
+ xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
PVC_GUC_TLB_INV_DESC0_VALID);
} else {
- xe_mmio_write32(gt, GUC_TLB_INV_CR,
+ xe_mmio_write32(mmio, GUC_TLB_INV_CR,
GUC_TLB_INV_CR_INVALIDATE);
}
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 0662f71c6ede..651ba53623e5 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -25,7 +25,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
va_start(argp, numregs);
for (i = 0; i < numregs; i++)
- fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
+ fuse_val[i] = xe_mmio_read32(&gt->mmio, va_arg(argp, struct xe_reg));
va_end(argp);
bitmap_from_arr32(mask, fuse_val, numregs * 32);
@@ -35,7 +35,7 @@ static void
load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
{
struct xe_device *xe = gt_to_xe(gt);
- u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
+ u32 reg_val = xe_mmio_read32(&gt->mmio, XELP_EU_ENABLE);
u32 val = 0;
int i;
@@ -127,7 +127,7 @@ static void
load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
{
struct xe_device *xe = gt_to_xe(gt);
- u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
+ u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
if (GRAPHICS_VER(xe) >= 20) {
xe_l3_bank_mask_t per_node = {};
@@ -141,7 +141,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
xe_l3_bank_mask_t per_node = {};
xe_l3_bank_mask_t per_mask_bit = {};
u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
- u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
+ u32 fuse4 = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
bitmap_set_value8(per_mask_bit, 0x3, 0);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 3d1c51de0268..a287b98ee70b 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -6,6 +6,7 @@
#ifndef _XE_GT_TYPES_H_
#define _XE_GT_TYPES_H_
+#include "xe_device_types.h"
#include "xe_force_wake_types.h"
#include "xe_gt_idle_types.h"
#include "xe_gt_sriov_pf_types.h"
@@ -145,19 +146,20 @@ struct xe_gt {
/**
* @mmio: mmio info for GT. All GTs within a tile share the same
* register space, but have their own copy of GSI registers at a
- * specific offset, as well as their own forcewake handling.
+ * specific offset.
+ */
+ struct xe_mmio mmio;
+
+ /**
+ * @pm: power management info for GT. The driver uses the GT's
+ * "force wake" interface to wake up specific parts of the GT hardware
+ * from C6 sleep states and ensure the hardware remains awake while it
+ * is being actively used.
*/
struct {
- /** @mmio.fw: force wake for GT */
+ /** @pm.fw: force wake for GT */
struct xe_force_wake fw;
- /**
- * @mmio.adj_limit: adjust MMIO address if address is below this
- * value
- */
- u32 adj_limit;
- /** @mmio.adj_offset: offect to add to MMIO address when adjusting */
- u32 adj_offset;
- } mmio;
+ } pm;
/** @sriov: virtualization data related to GT */
union {
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 52df28032a6f..c2ddf883702b 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -14,6 +14,7 @@
#include "regs/xe_gt_regs.h"
#include "regs/xe_gtt_defs.h"
#include "regs/xe_guc_regs.h"
+#include "regs/xe_irq_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_force_wake.h"
@@ -236,10 +237,10 @@ static void guc_write_params(struct xe_guc *guc)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- xe_mmio_write32(gt, SOFT_SCRATCH(0), 0);
+ xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(0), 0);
for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
- xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
+ xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(1 + i), guc->params[i]);
}
static void guc_fini_hw(void *arg)
@@ -425,6 +426,7 @@ int xe_guc_post_load_init(struct xe_guc *guc)
int xe_guc_reset(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
u32 guc_status, gdrst;
int ret;
@@ -433,15 +435,15 @@ int xe_guc_reset(struct xe_guc *guc)
if (IS_SRIOV_VF(gt_to_xe(gt)))
return xe_gt_sriov_vf_bootstrap(gt);
- xe_mmio_write32(gt, GDRST, GRDOM_GUC);
+ xe_mmio_write32(mmio, GDRST, GRDOM_GUC);
- ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
+ ret = xe_mmio_wait32(mmio, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
if (ret) {
xe_gt_err(gt, "GuC reset timed out, GDRST=%#x\n", gdrst);
goto err_out;
}
- guc_status = xe_mmio_read32(gt, GUC_STATUS);
+ guc_status = xe_mmio_read32(mmio, GUC_STATUS);
if (!(guc_status & GS_MIA_IN_RESET)) {
xe_gt_err(gt, "GuC status: %#x, MIA core expected to be in reset\n",
guc_status);
@@ -459,6 +461,7 @@ err_out:
static void guc_prepare_xfer(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_device *xe = guc_to_xe(guc);
u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
@@ -473,12 +476,12 @@ static void guc_prepare_xfer(struct xe_guc *guc)
shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index);
/* Must program this register before loading the ucode with DMA */
- xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags);
+ xe_mmio_write32(mmio, GUC_SHIM_CONTROL, shim_flags);
- xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE);
+ xe_mmio_write32(mmio, GT_PM_CONFIG, GT_DOORBELL_ENABLE);
/* Make sure GuC receives ARAT interrupts */
- xe_mmio_rmw32(gt, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0);
+ xe_mmio_rmw32(mmio, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0);
}
/*
@@ -494,7 +497,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
if (guc->fw.rsa_size > 256) {
u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) +
xe_uc_fw_rsa_offset(&guc->fw);
- xe_mmio_write32(gt, UOS_RSA_SCRATCH(0), rsa_ggtt_addr);
+ xe_mmio_write32(&gt->mmio, UOS_RSA_SCRATCH(0), rsa_ggtt_addr);
return 0;
}
@@ -503,7 +506,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
return -ENOMEM;
for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
- xe_mmio_write32(gt, UOS_RSA_SCRATCH(i), rsa[i]);
+ xe_mmio_write32(&gt->mmio, UOS_RSA_SCRATCH(i), rsa[i]);
return 0;
}
@@ -583,7 +586,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
* extreme thermal throttling. And a system that is that hot during boot is probably
* dead anyway!
*/
-#if defined(CONFIG_DRM_XE_DEBUG)
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
#define GUC_LOAD_RETRY_LIMIT 20
#else
#define GUC_LOAD_RETRY_LIMIT 3
@@ -593,6 +596,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
static void guc_wait_ucode(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
ktime_t before, after, delta;
int load_done;
@@ -619,7 +623,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
* timeouts rather than allowing a huge timeout each time. So basically, need
* to treat a timeout no different to a value change.
*/
- ret = xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
+ ret = xe_mmio_wait32_not(mmio, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
last_status, 1000 * 1000, &status, false);
if (ret < 0)
count++;
@@ -657,7 +661,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
switch (bootrom) {
case XE_BOOTROM_STATUS_NO_KEY_FOUND:
xe_gt_err(gt, "invalid key requested, header = 0x%08X\n",
- xe_mmio_read32(gt, GUC_HEADER_INFO));
+ xe_mmio_read32(mmio, GUC_HEADER_INFO));
break;
case XE_BOOTROM_STATUS_RSA_FAILED:
@@ -672,7 +676,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
switch (ukernel) {
case XE_GUC_LOAD_STATUS_EXCEPTION:
xe_gt_err(gt, "firmware exception. EIP: %#x\n",
- xe_mmio_read32(gt, SOFT_SCRATCH(13)));
+ xe_mmio_read32(mmio, SOFT_SCRATCH(13)));
break;
case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
@@ -824,10 +828,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- msg = xe_mmio_read32(gt, SOFT_SCRATCH(15));
+ msg = xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(15));
msg &= XE_GUC_RECV_MSG_EXCEPTION |
XE_GUC_RECV_MSG_CRASH_DUMP_POSTED;
- xe_mmio_write32(gt, SOFT_SCRATCH(15), 0);
+ xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(15), 0);
if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
xe_gt_err(gt, "Received early GuC crash dump notification!\n");
@@ -844,14 +848,14 @@ static void guc_enable_irq(struct xe_guc *guc)
REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
/* Primary GuC and media GuC share a single enable bit */
- xe_mmio_write32(gt, GUC_SG_INTR_ENABLE,
+ xe_mmio_write32(&gt->mmio, GUC_SG_INTR_ENABLE,
REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
/*
* There are separate mask bits for primary and media GuCs, so use
* a RMW operation to avoid clobbering the other GuC's setting.
*/
- xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0);
+ xe_mmio_rmw32(&gt->mmio, GUC_SG_INTR_MASK, events, 0);
}
int xe_guc_enable_communication(struct xe_guc *guc)
@@ -863,7 +867,7 @@ int xe_guc_enable_communication(struct xe_guc *guc)
struct xe_gt *gt = guc_to_gt(guc);
struct xe_tile *tile = gt_to_tile(gt);
- err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc);
+ err = xe_memirq_init_guc(&tile->memirq, guc);
if (err)
return err;
} else {
@@ -907,7 +911,7 @@ void xe_guc_notify(struct xe_guc *guc)
* additional payload data to the GuC but this capability is not
* used by the firmware yet. Use default value in the meantime.
*/
- xe_mmio_write32(gt, guc->notify_reg, default_notify_data);
+ xe_mmio_write32(&gt->mmio, guc->notify_reg, default_notify_data);
}
int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
@@ -925,6 +929,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_mmio *mmio = &gt->mmio;
u32 header, reply;
struct xe_reg reply_reg = xe_gt_is_media_type(gt) ?
MED_VF_SW_FLAG(0) : VF_SW_FLAG(0);
@@ -947,19 +952,19 @@ retry:
/* Not in critical data-path, just do if else for GT type */
if (xe_gt_is_media_type(gt)) {
for (i = 0; i < len; ++i)
- xe_mmio_write32(gt, MED_VF_SW_FLAG(i),
+ xe_mmio_write32(mmio, MED_VF_SW_FLAG(i),
request[i]);
- xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX));
+ xe_mmio_read32(mmio, MED_VF_SW_FLAG(LAST_INDEX));
} else {
for (i = 0; i < len; ++i)
- xe_mmio_write32(gt, VF_SW_FLAG(i),
+ xe_mmio_write32(mmio, VF_SW_FLAG(i),
request[i]);
- xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX));
+ xe_mmio_read32(mmio, VF_SW_FLAG(LAST_INDEX));
}
xe_guc_notify(guc);
- ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_ORIGIN,
+ ret = xe_mmio_wait32(mmio, reply_reg, GUC_HXG_MSG_0_ORIGIN,
FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC),
50000, &reply, false);
if (ret) {
@@ -969,7 +974,7 @@ timeout:
return ret;
}
- header = xe_mmio_read32(gt, reply_reg);
+ header = xe_mmio_read32(mmio, reply_reg);
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
/*
@@ -985,7 +990,7 @@ timeout:
BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS);
BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1);
- ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask,
+ ret = xe_mmio_wait32(mmio, reply_reg, resp_mask, resp_mask,
1000000, &header, false);
if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
@@ -1032,7 +1037,7 @@ proto:
for (i = 1; i < VF_SW_FLAG_COUNT; i++) {
reply_reg.addr += sizeof(u32);
- response_buf[i] = xe_mmio_read32(gt, reply_reg);
+ response_buf[i] = xe_mmio_read32(mmio, reply_reg);
}
}
@@ -1155,7 +1160,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
if (err)
return;
- status = xe_mmio_read32(gt, GUC_STATUS);
+ status = xe_mmio_read32(&gt->mmio, GUC_STATUS);
drm_printf(p, "\nGuC status 0x%08x:\n", status);
drm_printf(p, "\tBootrom status = 0x%x\n",
@@ -1170,7 +1175,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
drm_puts(p, "\nScratch registers:\n");
for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
drm_printf(p, "\t%2d: \t0x%x\n",
- i, xe_mmio_read32(gt, SOFT_SCRATCH(i)));
+ i, xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(i)));
}
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index d1902a8581ca..04485461aa20 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -5,6 +5,8 @@
#include "xe_guc_ads.h"
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include <generated/xe_wa_oob.h>
@@ -418,6 +420,7 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */
/**
* xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
@@ -684,7 +687,7 @@ static void guc_doorbell_init(struct xe_guc_ads *ads)
if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
u32 distdbreg =
- xe_mmio_read32(gt, DIST_DBS_POPULATED);
+ xe_mmio_read32(&gt->mmio, DIST_DBS_POPULATED);
ads_blob_write(ads,
system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index f24dd5223926..5aef2be2d027 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -8,6 +8,7 @@
#include <linux/bitfield.h>
#include <linux/circ_buf.h>
#include <linux/delay.h>
+#include <linux/fault-inject.h>
#include <kunit/static_stub.h>
@@ -182,7 +183,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
spin_lock_init(&ct->fast_lock);
xa_init(&ct->fence_lookup);
INIT_WORK(&ct->g2h_worker, g2h_worker_func);
- INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
+ INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
init_waitqueue_head(&ct->wq);
init_waitqueue_head(&ct->g2h_fence_wq);
@@ -209,6 +210,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
ct->state = XE_GUC_CT_STATE_DISABLED;
return 0;
}
+ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
#define desc_read(xe_, guc_ctb__, field_) \
xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
@@ -667,16 +669,12 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
num_g2h = 1;
if (g2h_fence_needs_alloc(g2h_fence)) {
- void *ptr;
-
g2h_fence->seqno = next_ct_seqno(ct, true);
- ptr = xa_store(&ct->fence_lookup,
- g2h_fence->seqno,
- g2h_fence, GFP_ATOMIC);
- if (IS_ERR(ptr)) {
- ret = PTR_ERR(ptr);
+ ret = xa_err(xa_store(&ct->fence_lookup,
+ g2h_fence->seqno, g2h_fence,
+ GFP_ATOMIC));
+ if (ret)
goto out;
- }
}
seqno = g2h_fence->seqno;
@@ -852,7 +850,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
#define ct_alive(ct) \
(xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
!ct->ctbs.g2h.info.broken)
- if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
+ if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
return false;
#undef ct_alive
@@ -879,14 +877,11 @@ retry:
retry_same_fence:
ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
if (unlikely(ret == -ENOMEM)) {
- void *ptr;
-
/* Retry allocation /w GFP_KERNEL */
- ptr = xa_store(&ct->fence_lookup,
- g2h_fence.seqno,
- &g2h_fence, GFP_KERNEL);
- if (IS_ERR(ptr))
- return PTR_ERR(ptr);
+ ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
+ &g2h_fence, GFP_KERNEL));
+ if (ret)
+ return ret;
goto retry_same_fence;
} else if (unlikely(ret)) {
@@ -897,22 +892,32 @@ retry_same_fence:
goto retry_same_fence;
if (!g2h_fence_needs_alloc(&g2h_fence))
- xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
return ret;
}
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
+ /*
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
if (!ret) {
- xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
- g2h_fence.seqno, action[0]);
- xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
+ xa_erase(&ct->fence_lookup, g2h_fence.seqno);
+ mutex_unlock(&ct->lock);
return -ETIME;
}
if (g2h_fence.retry) {
xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
action[0], g2h_fence.reason);
+ mutex_unlock(&ct->lock);
goto retry;
}
if (g2h_fence.fail) {
@@ -921,7 +926,12 @@ retry_same_fence:
ret = -EIO;
}
- return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
+ if (ret > 0)
+ ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+ mutex_unlock(&ct->lock);
+
+ return ret;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index a37ee3419428..651543721ce5 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -5,6 +5,8 @@
#include "xe_guc_log.h"
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include "xe_bo.h"
@@ -96,3 +98,4 @@ int xe_guc_log_init(struct xe_guc_log *log)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_guc_log_init, ERRNO); /* See xe_pci_probe() */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 034b29984d5e..2b654f820ae2 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -262,7 +262,7 @@ static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable)
u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE;
/* Allow/Disallow punit to process software freq requests */
- xe_mmio_write32(gt, RP_CONTROL, state);
+ xe_mmio_write32(&gt->mmio, RP_CONTROL, state);
}
static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
@@ -274,7 +274,7 @@ static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
/* Req freq is in units of 16.66 Mhz */
rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq));
- xe_mmio_write32(gt, RPNSWREQ, rpnswreq);
+ xe_mmio_write32(&gt->mmio, RPNSWREQ, rpnswreq);
/* Sleep for a small time to allow pcode to respond */
usleep_range(100, 300);
@@ -334,9 +334,9 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc)
u32 reg;
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MPE_FREQUENCY);
else
- reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY);
+ reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPE_FREQUENCY);
pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
}
@@ -353,9 +353,9 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc)
* PCODE at a different register
*/
if (xe->info.platform == XE_PVC)
- reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
else
- reg = xe_mmio_read32(gt, FREQ_INFO_REC);
+ reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
@@ -392,10 +392,10 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
/* When in RC6, actual frequency reported will be 0. */
if (GRAPHICS_VERx100(xe) >= 1270) {
- freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+ freq = xe_mmio_read32(&gt->mmio, MTL_MIRROR_TARGET_WP1);
freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
} else {
- freq = xe_mmio_read32(gt, GT_PERF_STATUS);
+ freq = xe_mmio_read32(&gt->mmio, GT_PERF_STATUS);
freq = REG_FIELD_GET(CAGF_MASK, freq);
}
@@ -425,7 +425,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
if (ret)
return ret;
- *freq = xe_mmio_read32(gt, RPNSWREQ);
+ *freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
*freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
*freq = decode_freq(*freq);
@@ -612,10 +612,10 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
u32 reg, gt_c_state;
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
- reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MIRROR_TARGET_WP1);
gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
} else {
- reg = xe_mmio_read32(gt, GT_CORE_STATUS);
+ reg = xe_mmio_read32(&gt->mmio, GT_CORE_STATUS);
gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
}
@@ -638,7 +638,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 reg;
- reg = xe_mmio_read32(gt, GT_GFX_RC6);
+ reg = xe_mmio_read32(&gt->mmio, GT_GFX_RC6);
return reg;
}
@@ -652,7 +652,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u64 reg;
- reg = xe_mmio_read32(gt, MTL_MEDIA_MC6);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MEDIA_MC6);
return reg;
}
@@ -665,9 +665,9 @@ static void mtl_init_fused_rp_values(struct xe_guc_pc *pc)
xe_device_assert_mem_access(pc_to_xe(pc));
if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, MTL_MEDIAP_STATE_CAP);
else
- reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, MTL_RP_STATE_CAP);
pc->rp0_freq = decode_freq(REG_FIELD_GET(MTL_RP0_CAP_MASK, reg));
@@ -683,9 +683,9 @@ static void tgl_init_fused_rp_values(struct xe_guc_pc *pc)
xe_device_assert_mem_access(pc_to_xe(pc));
if (xe->info.platform == XE_PVC)
- reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
else
- reg = xe_mmio_read32(gt, RP_STATE_CAP);
+ reg = xe_mmio_read32(&gt->mmio, RP_STATE_CAP);
pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
index ade6162dc259..8f62de026724 100644
--- a/drivers/gpu/drm/xe/xe_guc_relay.c
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -5,6 +5,7 @@
#include <linux/bitfield.h>
#include <linux/delay.h>
+#include <linux/fault-inject.h>
#include <drm/drm_managed.h>
@@ -355,6 +356,7 @@ int xe_guc_relay_init(struct xe_guc_relay *relay)
return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay);
}
+ALLOW_ERROR_INJECTION(xe_guc_relay_init, ERRNO); /* See xe_pci_probe() */
static u32 to_relay_error(int err)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index fbbe6a487bbb..8a5c21a87977 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -276,10 +276,26 @@ static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
}
#endif
+static void xe_guc_submit_fini(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ int ret;
+
+ ret = wait_event_timeout(guc->submission_state.fini_wq,
+ xa_empty(&guc->submission_state.exec_queue_lookup),
+ HZ * 5);
+
+ drain_workqueue(xe->destroy_wq);
+
+ xe_gt_assert(gt, ret);
+}
+
static void guc_submit_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
+ xe_guc_submit_fini(guc);
xa_destroy(&guc->submission_state.exec_queue_lookup);
free_submit_wq(guc);
}
@@ -290,9 +306,15 @@ static void guc_submit_wedged_fini(void *arg)
struct xe_exec_queue *q;
unsigned long index;
- xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
- if (exec_queue_wedged(q))
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ if (exec_queue_wedged(q)) {
+ mutex_unlock(&guc->submission_state.lock);
xe_exec_queue_put(q);
+ mutex_lock(&guc->submission_state.lock);
+ }
+ }
+ mutex_unlock(&guc->submission_state.lock);
}
static const struct xe_exec_queue_ops guc_exec_queue_ops;
@@ -345,6 +367,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
xa_init(&guc->submission_state.exec_queue_lookup);
+ init_waitqueue_head(&guc->submission_state.fini_wq);
+
primelockdep(guc);
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
@@ -361,12 +385,14 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
q->guc->id, q->width);
+
+ if (xa_empty(&guc->submission_state.exec_queue_lookup))
+ wake_up(&guc->submission_state.fini_wq);
}
static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
{
int ret;
- void *ptr;
int i;
/*
@@ -386,12 +412,10 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
q->guc->id = ret;
for (i = 0; i < q->width; ++i) {
- ptr = xa_store(&guc->submission_state.exec_queue_lookup,
- q->guc->id + i, q, GFP_NOWAIT);
- if (IS_ERR(ptr)) {
- ret = PTR_ERR(ptr);
+ ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
+ q->guc->id + i, q, GFP_NOWAIT));
+ if (ret)
goto err_release;
- }
}
return 0;
@@ -1268,13 +1292,16 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
{
+ struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_device *xe = guc_to_xe(guc);
+
INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
/* We must block on kernel engines so slabs are empty on driver unload */
if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
__guc_exec_queue_fini_async(&q->guc->fini_async);
else
- queue_work(system_wq, &q->guc->fini_async);
+ queue_work(xe->destroy_wq, &q->guc->fini_async);
}
static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
@@ -1796,6 +1823,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q)
}
xe_sched_submission_start(sched);
+ xe_sched_submission_resume_tdr(sched);
}
int xe_guc_submit_start(struct xe_guc *guc)
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 546ac6350a31..69046f698271 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -81,6 +81,8 @@ struct xe_guc {
#endif
/** @submission_state.enabled: submission is enabled */
bool enabled;
+ /** @submission_state.fini_wq: submit fini wait queue */
+ wait_queue_head_t fini_wq;
} submission_state;
/** @hwconfig: Hardware config state */
struct {
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index f5459f97af23..77c5830309cf 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -229,7 +229,7 @@ bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type)
{
struct xe_gt *gt = huc_to_gt(huc);
- return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
+ return xe_mmio_read32(&gt->mmio, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
}
int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
@@ -268,7 +268,7 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
goto fail;
}
- ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val,
+ ret = xe_mmio_wait32(&gt->mmio, huc_auth_modes[type].reg, huc_auth_modes[type].val,
huc_auth_modes[type].val, 100000, NULL, false);
if (ret) {
xe_gt_err(gt, "HuC: firmware not verified: %pe\n", ERR_PTR(ret));
@@ -308,7 +308,7 @@ void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
return;
drm_printf(p, "\nHuC status: 0x%08x\n",
- xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO));
+ xe_mmio_read32(&gt->mmio, HUC_KERNEL_LOAD_INFO));
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index c9c3beb3ce8d..ea6d9ef7fab6 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -12,6 +12,7 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_irq_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_device.h"
@@ -295,7 +296,7 @@ void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe,
reg.addr += hwe->mmio_base;
- xe_mmio_write32(hwe->gt, reg, val);
+ xe_mmio_write32(&hwe->gt->mmio, reg, val);
}
/**
@@ -315,7 +316,7 @@ u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
reg.addr += hwe->mmio_base;
- return xe_mmio_read32(hwe->gt, reg);
+ return xe_mmio_read32(&hwe->gt->mmio, reg);
}
void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
@@ -324,7 +325,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
- xe_mmio_write32(hwe->gt, RCU_MODE,
+ xe_mmio_write32(&hwe->gt->mmio, RCU_MODE,
_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
@@ -354,7 +355,7 @@ static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
hwe->class != XE_ENGINE_CLASS_RENDER)
return false;
- return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
+ return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
}
void
@@ -460,6 +461,30 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
}
+static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance)
+{
+ const struct engine_info *info;
+ enum xe_hw_engine_id id;
+
+ for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+ info = &engine_infos[id];
+ if (info->class == class && info->instance == instance)
+ return info;
+ }
+
+ return NULL;
+}
+
+static u16 get_msix_irq_offset(struct xe_gt *gt, enum xe_engine_class class)
+{
+ /* For MSI-X, hw engines report to offset of engine instance zero */
+ const struct engine_info *info = find_engine_info(class, 0);
+
+ xe_gt_assert(gt, info);
+
+ return info ? info->irq_offset : 0;
+}
+
static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
enum xe_hw_engine_id id)
{
@@ -479,7 +504,9 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
hwe->class = info->class;
hwe->instance = info->instance;
hwe->mmio_base = info->mmio_base;
- hwe->irq_offset = info->irq_offset;
+ hwe->irq_offset = xe_device_has_msix(gt_to_xe(gt)) ?
+ get_msix_irq_offset(gt, info->class) :
+ info->irq_offset;
hwe->domain = info->domain;
hwe->name = info->name;
hwe->fence_irq = &gt->fence_irq[info->class];
@@ -612,7 +639,7 @@ static void read_media_fuses(struct xe_gt *gt)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
+ media_fuse = xe_mmio_read32(&gt->mmio, GT_VEBOX_VDBOX_DISABLE);
/*
* Pre-Xe_HP platforms had register bits representing absent engines,
@@ -657,7 +684,7 @@ static void read_copy_fuses(struct xe_gt *gt)
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
+ bcs_mask = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
/* BCS0 is always present; only BCS1-BCS8 may be fused off */
@@ -704,7 +731,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt)
struct xe_device *xe = gt_to_xe(gt);
u32 ccs_mask;
- ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
+ ccs_mask = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
@@ -742,8 +769,8 @@ static void check_gsc_availability(struct xe_gt *gt)
gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
/* interrupts where previously enabled, so turn them off */
- xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
- xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
+ xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0);
+ xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0);
drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
}
@@ -809,6 +836,7 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
struct xe_hw_engine_snapshot *snapshot)
{
struct xe_gt *gt = hwe->gt;
+ struct xe_mmio *mmio = &gt->mmio;
struct xe_device *xe = gt_to_xe(gt);
unsigned int dss;
u16 group, instance;
@@ -820,11 +848,11 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
if (is_slice_common_per_gslice(xe) == false) {
snapshot->reg.instdone.slice_common[0] =
- xe_mmio_read32(gt, SC_INSTDONE);
+ xe_mmio_read32(mmio, SC_INSTDONE);
snapshot->reg.instdone.slice_common_extra[0] =
- xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
+ xe_mmio_read32(mmio, SC_INSTDONE_EXTRA);
snapshot->reg.instdone.slice_common_extra2[0] =
- xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
+ xe_mmio_read32(mmio, SC_INSTDONE_EXTRA2);
} else {
for_each_geometry_dss(dss, gt, group, instance) {
snapshot->reg.instdone.slice_common[dss] =
@@ -903,6 +931,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
hwe->domain);
snapshot->mmio_base = hwe->mmio_base;
+ snapshot->kernel_reserved = xe_hw_engine_is_reserved(hwe);
/* no more VF accessible data below this point */
if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
@@ -959,7 +988,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
- snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
+ snapshot->reg.rcu_mode = xe_mmio_read32(&hwe->gt->mmio, RCU_MODE);
return snapshot;
}
@@ -1025,6 +1054,8 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
snapshot->logical_instance);
drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
snapshot->forcewake.domain, snapshot->forcewake.ref);
+ drm_printf(p, "\tReserved: %s\n",
+ str_yes_no(snapshot->kernel_reserved));
drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
@@ -1150,7 +1181,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
{
- return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
+ return xe_mmio_read64_2x32(&hwe->gt->mmio, RING_TIMESTAMP(hwe->mmio_base));
}
enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 8be6d420ece4..be60edb3e673 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -173,6 +173,8 @@ struct xe_hw_engine_snapshot {
} forcewake;
/** @mmio_base: MMIO base address of this hw engine*/
u32 mmio_base;
+ /** @kernel_reserved: Engine reserved, can't be used by userspace */
+ bool kernel_reserved;
/** @reg: Useful MMIO register snapshot */
struct {
/** @reg.ring_execlist_status: RING_EXECLIST_STATUS */
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index aa11728e7e79..fde56dad3ab7 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -149,7 +149,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *v
u64 reg_val, min, max;
struct xe_device *xe = hwmon->xe;
struct xe_reg rapl_limit, pkg_power_sku;
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
@@ -190,7 +190,7 @@ unlock:
static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
int ret = 0;
u64 reg_val;
struct xe_reg rapl_limit;
@@ -222,7 +222,7 @@ unlock:
static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
u64 reg_val;
@@ -259,7 +259,7 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, l
static void
xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
struct xe_hwmon_energy_info *ei = &hwmon->ei[channel];
u64 reg_val;
@@ -282,7 +282,7 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
char *buf)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u32 x, y, x_w = 2; /* 2 bits */
u64 r, tau4, out;
int sensor_index = to_sensor_dev_attr(attr)->index;
@@ -323,7 +323,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
const char *buf, size_t count)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u32 x, y, rxy, x_w = 2; /* 2 bits */
u64 tau4, r, max_win;
unsigned long val;
@@ -498,7 +498,7 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel,
static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value)
{
- struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u64 reg_val;
reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel));
@@ -781,7 +781,7 @@ static const struct hwmon_chip_info hwmon_chip_info = {
static void
xe_hwmon_get_preregistration_info(struct xe_device *xe)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct xe_hwmon *hwmon = xe->hwmon;
long energy;
u64 val_sku_unit = 0;
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 5f2c368c35ad..b7995ebd54ab 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -10,8 +10,7 @@
#include <drm/drm_managed.h>
#include "display/xe_display.h"
-#include "regs/xe_gt_regs.h"
-#include "regs/xe_regs.h"
+#include "regs/xe_irq_regs.h"
#include "xe_device.h"
#include "xe_drv.h"
#include "xe_gsc_proxy.h"
@@ -30,14 +29,14 @@
#define IIR(offset) XE_REG(offset + 0x8)
#define IER(offset) XE_REG(offset + 0xc)
-static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
+static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg)
{
u32 val = xe_mmio_read32(mmio, reg);
if (val == 0)
return;
- drm_WARN(&gt_to_xe(mmio)->drm, 1,
+ drm_WARN(&mmio->tile->xe->drm, 1,
"Interrupt register 0x%x is not zero: 0x%08x\n",
reg.addr, val);
xe_mmio_write32(mmio, reg, 0xffffffff);
@@ -52,7 +51,7 @@ static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
*/
static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
/*
* If we're just enabling an interrupt now, it shouldn't already
@@ -70,7 +69,7 @@ static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits)
/* Mask and disable all interrupts. */
static void mask_and_disable(struct xe_tile *tile, u32 irqregs)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
xe_mmio_write32(mmio, IMR(irqregs), ~0);
/* Posting read */
@@ -87,7 +86,7 @@ static void mask_and_disable(struct xe_tile *tile, u32 irqregs)
static u32 xelp_intr_disable(struct xe_device *xe)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
xe_mmio_write32(mmio, GFX_MSTR_IRQ, 0);
@@ -103,7 +102,7 @@ static u32 xelp_intr_disable(struct xe_device *xe)
static u32
gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
u32 iir;
if (!(master_ctl & GU_MISC_IRQ))
@@ -118,7 +117,7 @@ gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl)
static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
xe_mmio_write32(mmio, GFX_MSTR_IRQ, MASTER_IRQ);
if (stall)
@@ -129,12 +128,13 @@ static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
void xe_irq_enable_hwe(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ struct xe_mmio *mmio = &gt->mmio;
u32 ccs_mask, bcs_mask;
u32 irqs, dmask, smask;
u32 gsc_mask = 0;
u32 heci_mask = 0;
- if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe))
+ if (xe_device_uses_memirq(xe))
return;
if (xe_device_uc_enabled(xe)) {
@@ -155,35 +155,35 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
if (!xe_gt_is_media_type(gt)) {
/* Enable interrupts for each engine class */
- xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask);
+ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask);
if (ccs_mask)
- xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask);
+ xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask);
/* Unmask interrupts for each engine instance */
- xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask);
- xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask);
+ xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask);
+ xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask);
if (bcs_mask & (BIT(1)|BIT(2)))
- xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
if (bcs_mask & (BIT(3)|BIT(4)))
- xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
if (bcs_mask & (BIT(5)|BIT(6)))
- xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
if (bcs_mask & (BIT(7)|BIT(8)))
- xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
if (ccs_mask & (BIT(0)|BIT(1)))
- xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask);
if (ccs_mask & (BIT(2)|BIT(3)))
- xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask);
}
if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) {
/* Enable interrupts for each engine class */
- xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask);
+ xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask);
/* Unmask interrupts for each engine instance */
- xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask);
- xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
- xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask);
+ xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask);
/*
* the heci2 interrupt is enabled via the same register as the
@@ -197,17 +197,17 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
}
if (gsc_mask) {
- xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask);
- xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask);
+ xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask);
+ xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~gsc_mask);
}
if (heci_mask)
- xe_mmio_write32(gt, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16));
+ xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16));
}
}
static u32
gt_engine_identity(struct xe_device *xe,
- struct xe_gt *mmio,
+ struct xe_mmio *mmio,
const unsigned int bank,
const unsigned int bit)
{
@@ -279,7 +279,7 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
return tile->media_gt;
default:
break;
- };
+ }
fallthrough;
default:
return tile->primary_gt;
@@ -291,7 +291,7 @@ static void gt_irq_handler(struct xe_tile *tile,
u32 *identity)
{
struct xe_device *xe = tile_to_xe(tile);
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
unsigned int bank, bit;
u16 instance, intr_vec;
enum xe_engine_class class;
@@ -376,7 +376,7 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
static u32 dg1_intr_disable(struct xe_device *xe)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
u32 val;
/* First disable interrupts */
@@ -394,7 +394,7 @@ static u32 dg1_intr_disable(struct xe_device *xe)
static void dg1_intr_enable(struct xe_device *xe, bool stall)
{
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ);
if (stall)
@@ -431,7 +431,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
}
for_each_tile(tile, xe, id) {
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
if ((master_tile_ctl & DG1_MSTR_TILE(tile->id)) == 0)
continue;
@@ -474,7 +474,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
static void gt_irq_reset(struct xe_tile *tile)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
XE_ENGINE_CLASS_COMPUTE);
@@ -504,7 +504,7 @@ static void gt_irq_reset(struct xe_tile *tile)
if (ccs_mask & (BIT(0)|BIT(1)))
xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0);
if (ccs_mask & (BIT(2)|BIT(3)))
- xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0);
+ xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0);
if ((tile->media_gt &&
xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) ||
@@ -547,7 +547,7 @@ static void dg1_irq_reset(struct xe_tile *tile)
static void dg1_irq_reset_mstr(struct xe_tile *tile)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0);
}
@@ -566,7 +566,7 @@ static void vf_irq_reset(struct xe_device *xe)
for_each_tile(tile, xe, id) {
if (xe_device_has_memirq(xe))
- xe_memirq_reset(&tile->sriov.vf.memirq);
+ xe_memirq_reset(&tile->memirq);
else
gt_irq_reset(tile);
}
@@ -609,7 +609,7 @@ static void vf_irq_postinstall(struct xe_device *xe)
for_each_tile(tile, xe, id)
if (xe_device_has_memirq(xe))
- xe_memirq_postinstall(&tile->sriov.vf.memirq);
+ xe_memirq_postinstall(&tile->memirq);
if (GRAPHICS_VERx100(xe) < 1210)
xelp_intr_enable(xe, true);
@@ -652,7 +652,7 @@ static irqreturn_t vf_mem_irq_handler(int irq, void *arg)
spin_unlock(&xe->irq.lock);
for_each_tile(tile, xe, id)
- xe_memirq_handler(&tile->sriov.vf.memirq);
+ xe_memirq_handler(&tile->memirq);
return IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 8999ac511555..a60ceae4c6dd 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -193,7 +193,7 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt)
lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo));
lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K));
- xe_mmio_write32(tile->primary_gt,
+ xe_mmio_write32(&tile->mmio,
GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
}
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index aec7db39c061..f0976230012a 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -599,10 +599,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
{
- struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
+ struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
struct xe_device *xe = gt_to_xe(hwe->gt);
- if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
+ if (!xe_device_uses_memirq(xe))
return;
regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
@@ -613,9 +613,9 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
- regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
+ regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
- regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
+ regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
}
static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index 95b6e9d7b7db..e3610cb90bb9 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -5,8 +5,8 @@
#include <drm/drm_managed.h>
-#include "regs/xe_gt_regs.h"
#include "regs/xe_guc_regs.h"
+#include "regs/xe_irq_regs.h"
#include "regs/xe_regs.h"
#include "xe_assert.h"
@@ -19,15 +19,25 @@
#include "xe_hw_engine.h"
#include "xe_map.h"
#include "xe_memirq.h"
-#include "xe_sriov.h"
-#include "xe_sriov_printk.h"
#define memirq_assert(m, condition) xe_tile_assert(memirq_to_tile(m), condition)
-#define memirq_debug(m, msg...) xe_sriov_dbg_verbose(memirq_to_xe(m), "MEMIRQ: " msg)
+#define memirq_printk(m, _level, _fmt, ...) \
+ drm_##_level(&memirq_to_xe(m)->drm, "MEMIRQ%u: " _fmt, \
+ memirq_to_tile(m)->id, ##__VA_ARGS__)
+
+#ifdef CONFIG_DRM_XE_DEBUG_MEMIRQ
+#define memirq_debug(m, _fmt, ...) memirq_printk(m, dbg, _fmt, ##__VA_ARGS__)
+#else
+#define memirq_debug(...)
+#endif
+
+#define memirq_err(m, _fmt, ...) memirq_printk(m, err, _fmt, ##__VA_ARGS__)
+#define memirq_err_ratelimited(m, _fmt, ...) \
+ memirq_printk(m, err_ratelimited, _fmt, ##__VA_ARGS__)
static struct xe_tile *memirq_to_tile(struct xe_memirq *memirq)
{
- return container_of(memirq, struct xe_tile, sriov.vf.memirq);
+ return container_of(memirq, struct xe_tile, memirq);
}
static struct xe_device *memirq_to_xe(struct xe_memirq *memirq)
@@ -105,6 +115,44 @@ static const char *guc_name(struct xe_guc *guc)
* | |
* | |
* +-----------+
+ *
+ *
+ * MSI-X use case
+ *
+ * When using MSI-X, hw engines report interrupt status and source to engine
+ * instance 0. For this scenario, in order to differentiate between the
+ * engines, we need to pass different status/source pointers in the LRC.
+ *
+ * The requirements on those pointers are:
+ * - Interrupt status should be 4KiB aligned
+ * - Interrupt source should be 64 bytes aligned
+ *
+ * To accommodate this, we duplicate the memirq page layout above -
+ * allocating a page for each engine instance and pass this page in the LRC.
+ * Note that the same page can be reused for different engine types.
+ * For example, an LRC executing on CCS #x will have pointers to page #x,
+ * and an LRC executing on BCS #x will have the same pointers.
+ *
+ * ::
+ *
+ * 0x0000 +==============================+ <== page for instance 0 (BCS0, CCS0, etc.)
+ * | Interrupt Status Report Page |
+ * 0x0400 +==============================+
+ * | Interrupt Source Report Page |
+ * 0x0440 +==============================+
+ * | Interrupt Enable Mask |
+ * +==============================+
+ * | Not used |
+ * 0x1000 +==============================+ <== page for instance 1 (BCS1, CCS1, etc.)
+ * | Interrupt Status Report Page |
+ * 0x1400 +==============================+
+ * | Interrupt Source Report Page |
+ * 0x1440 +==============================+
+ * | Not used |
+ * 0x2000 +==============================+ <== page for instance 2 (BCS2, CCS2, etc.)
+ * | ... |
+ * +==============================+
+ *
*/
static void __release_xe_bo(struct drm_device *drm, void *arg)
@@ -114,18 +162,30 @@ static void __release_xe_bo(struct drm_device *drm, void *arg)
xe_bo_unpin_map_no_vm(bo);
}
+static inline bool hw_reports_to_instance_zero(struct xe_memirq *memirq)
+{
+ /*
+ * When the HW engines are configured to use MSI-X,
+ * they report interrupt status and source to the offset of
+ * engine instance 0.
+ */
+ return xe_device_has_msix(memirq_to_xe(memirq));
+}
+
static int memirq_alloc_pages(struct xe_memirq *memirq)
{
struct xe_device *xe = memirq_to_xe(memirq);
struct xe_tile *tile = memirq_to_tile(memirq);
+ size_t bo_size = hw_reports_to_instance_zero(memirq) ?
+ XE_HW_ENGINE_MAX_INSTANCE * SZ_4K : SZ_4K;
struct xe_bo *bo;
int err;
- BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET, SZ_64));
- BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET, SZ_4K));
+ BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET(0), SZ_64));
+ BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET(0), SZ_4K));
/* XXX: convert to managed bo */
- bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
+ bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
ttm_bo_type_kernel,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
@@ -140,25 +200,25 @@ static int memirq_alloc_pages(struct xe_memirq *memirq)
memirq_assert(memirq, !xe_bo_is_vram(bo));
memirq_assert(memirq, !memirq->bo);
- iosys_map_memset(&bo->vmap, 0, 0, SZ_4K);
+ iosys_map_memset(&bo->vmap, 0, 0, bo_size);
memirq->bo = bo;
- memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET);
- memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET);
+ memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET(0));
+ memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET(0));
memirq->mask = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_ENABLE_OFFSET);
memirq_assert(memirq, !memirq->source.is_iomem);
memirq_assert(memirq, !memirq->status.is_iomem);
memirq_assert(memirq, !memirq->mask.is_iomem);
- memirq_debug(memirq, "page offsets: source %#x status %#x\n",
- xe_memirq_source_ptr(memirq), xe_memirq_status_ptr(memirq));
+ memirq_debug(memirq, "page offsets: bo %#x bo_size %zu source %#x status %#x\n",
+ xe_bo_ggtt_addr(bo), bo_size, XE_MEMIRQ_SOURCE_OFFSET(0),
+ XE_MEMIRQ_STATUS_OFFSET(0));
return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo);
out:
- xe_sriov_err(memirq_to_xe(memirq),
- "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
+ memirq_err(memirq, "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
return err;
}
@@ -178,9 +238,7 @@ static void memirq_set_enable(struct xe_memirq *memirq, bool enable)
*
* These allocations are managed and will be implicitly released on unload.
*
- * Note: This function shall be called only by the VF driver.
- *
- * If this function fails then VF driver won't be able to operate correctly.
+ * If this function fails then the driver won't be able to operate correctly.
* If `Memory Based Interrupts`_ are not used this function will return 0.
*
* Return: 0 on success or a negative error code on failure.
@@ -190,9 +248,7 @@ int xe_memirq_init(struct xe_memirq *memirq)
struct xe_device *xe = memirq_to_xe(memirq);
int err;
- memirq_assert(memirq, IS_SRIOV_VF(xe));
-
- if (!xe_device_has_memirq(xe))
+ if (!xe_device_uses_memirq(xe))
return 0;
err = memirq_alloc_pages(memirq);
@@ -208,52 +264,59 @@ int xe_memirq_init(struct xe_memirq *memirq)
/**
* xe_memirq_source_ptr - Get GGTT's offset of the `Interrupt Source Report Page`_.
* @memirq: the &xe_memirq to query
+ * @hwe: the hw engine for which we want the report page
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: GGTT's offset of the `Interrupt Source Report Page`_.
*/
-u32 xe_memirq_source_ptr(struct xe_memirq *memirq)
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ u16 instance;
+
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
memirq_assert(memirq, memirq->bo);
- return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET;
+ instance = hw_reports_to_instance_zero(memirq) ? hwe->instance : 0;
+
+ return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET(instance);
}
/**
* xe_memirq_status_ptr - Get GGTT's offset of the `Interrupt Status Report Page`_.
* @memirq: the &xe_memirq to query
+ * @hwe: the hw engine for which we want the report page
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: GGTT's offset of the `Interrupt Status Report Page`_.
*/
-u32 xe_memirq_status_ptr(struct xe_memirq *memirq)
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ u16 instance;
+
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
memirq_assert(memirq, memirq->bo);
- return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET;
+ instance = hw_reports_to_instance_zero(memirq) ? hwe->instance : 0;
+
+ return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET(instance);
}
/**
* xe_memirq_enable_ptr - Get GGTT's offset of the Interrupt Enable Mask.
* @memirq: the &xe_memirq to query
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: GGTT's offset of the Interrupt Enable Mask.
*/
u32 xe_memirq_enable_ptr(struct xe_memirq *memirq)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
memirq_assert(memirq, memirq->bo);
return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_ENABLE_OFFSET;
@@ -267,7 +330,7 @@ u32 xe_memirq_enable_ptr(struct xe_memirq *memirq)
* Register `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_
* to be used by the GuC when `Memory Based Interrupts`_ are required.
*
- * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * Shall be called when `Memory Based Interrupts`_ are used
* and xe_memirq_init() didn't fail.
*
* Return: 0 on success or a negative error code on failure.
@@ -279,12 +342,11 @@ int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc)
u32 source, status;
int err;
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
memirq_assert(memirq, memirq->bo);
- source = xe_memirq_source_ptr(memirq) + offset;
- status = xe_memirq_status_ptr(memirq) + offset * SZ_16;
+ source = xe_memirq_source_ptr(memirq, NULL) + offset;
+ status = xe_memirq_status_ptr(memirq, NULL) + offset * SZ_16;
err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY,
source);
@@ -299,9 +361,8 @@ int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc)
return 0;
failed:
- xe_sriov_err(memirq_to_xe(memirq),
- "Failed to setup report pages in %s (%pe)\n",
- guc_name(guc), ERR_PTR(err));
+ memirq_err(memirq, "Failed to setup report pages in %s (%pe)\n",
+ guc_name(guc), ERR_PTR(err));
return err;
}
@@ -311,13 +372,12 @@ failed:
*
* This is part of the driver IRQ setup flow.
*
- * This function shall only be used by the VF driver on platforms that use
+ * This function shall only be used on platforms that use
* `Memory Based Interrupts`_.
*/
void xe_memirq_reset(struct xe_memirq *memirq)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
if (memirq->bo)
memirq_set_enable(memirq, false);
@@ -329,13 +389,12 @@ void xe_memirq_reset(struct xe_memirq *memirq)
*
* This is part of the driver IRQ setup flow.
*
- * This function shall only be used by the VF driver on platforms that use
+ * This function shall only be used on platforms that use
* `Memory Based Interrupts`_.
*/
void xe_memirq_postinstall(struct xe_memirq *memirq)
{
- memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
- memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq)));
if (memirq->bo)
memirq_set_enable(memirq, true);
@@ -349,9 +408,9 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
value = iosys_map_rd(vector, offset, u8);
if (value) {
if (value != 0xff)
- xe_sriov_err_ratelimited(memirq_to_xe(memirq),
- "Unexpected memirq value %#x from %s at %u\n",
- value, name, offset);
+ memirq_err_ratelimited(memirq,
+ "Unexpected memirq value %#x from %s at %u\n",
+ value, name, offset);
iosys_map_wr(vector, offset, u8, 0x00);
}
@@ -379,6 +438,28 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
}
/**
+ * xe_memirq_hwe_handler - Check and process interrupts for a specific HW engine.
+ * @memirq: the &xe_memirq
+ * @hwe: the hw engine to process
+ *
+ * This function reads and dispatches `Memory Based Interrupts` for the provided HW engine.
+ */
+void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
+{
+ u16 offset = hwe->irq_offset;
+ u16 instance = hw_reports_to_instance_zero(memirq) ? hwe->instance : 0;
+ struct iosys_map src_offset = IOSYS_MAP_INIT_OFFSET(&memirq->bo->vmap,
+ XE_MEMIRQ_SOURCE_OFFSET(instance));
+
+ if (memirq_received(memirq, &src_offset, offset, "SRC")) {
+ struct iosys_map status_offset =
+ IOSYS_MAP_INIT_OFFSET(&memirq->bo->vmap,
+ XE_MEMIRQ_STATUS_OFFSET(instance) + offset * SZ_16);
+ memirq_dispatch_engine(memirq, &status_offset, hwe);
+ }
+}
+
+/**
* xe_memirq_handler - The `Memory Based Interrupts`_ Handler.
* @memirq: the &xe_memirq
*
@@ -405,13 +486,8 @@ void xe_memirq_handler(struct xe_memirq *memirq)
if (gt->tile != tile)
continue;
- for_each_hw_engine(hwe, gt, id) {
- if (memirq_received(memirq, &memirq->source, hwe->irq_offset, "SRC")) {
- map = IOSYS_MAP_INIT_OFFSET(&memirq->status,
- hwe->irq_offset * SZ_16);
- memirq_dispatch_engine(memirq, &map, hwe);
- }
- }
+ for_each_hw_engine(hwe, gt, id)
+ xe_memirq_hwe_handler(memirq, hwe);
}
/* GuC and media GuC (if present) must be checked separately */
diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h
index 2d40d03c3095..06130650e9d6 100644
--- a/drivers/gpu/drm/xe/xe_memirq.h
+++ b/drivers/gpu/drm/xe/xe_memirq.h
@@ -9,16 +9,18 @@
#include <linux/types.h>
struct xe_guc;
+struct xe_hw_engine;
struct xe_memirq;
int xe_memirq_init(struct xe_memirq *memirq);
-u32 xe_memirq_source_ptr(struct xe_memirq *memirq);
-u32 xe_memirq_status_ptr(struct xe_memirq *memirq);
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe);
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe);
u32 xe_memirq_enable_ptr(struct xe_memirq *memirq);
void xe_memirq_reset(struct xe_memirq *memirq);
void xe_memirq_postinstall(struct xe_memirq *memirq);
+void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe);
void xe_memirq_handler(struct xe_memirq *memirq);
int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_memirq_types.h b/drivers/gpu/drm/xe/xe_memirq_types.h
index 625b6b8736cc..9d0f6c1cdb9d 100644
--- a/drivers/gpu/drm/xe/xe_memirq_types.h
+++ b/drivers/gpu/drm/xe/xe_memirq_types.h
@@ -11,9 +11,9 @@
struct xe_bo;
/* ISR */
-#define XE_MEMIRQ_STATUS_OFFSET 0x0
+#define XE_MEMIRQ_STATUS_OFFSET(inst) ((inst) * SZ_4K + 0x0)
/* IIR */
-#define XE_MEMIRQ_SOURCE_OFFSET 0x400
+#define XE_MEMIRQ_SOURCE_OFFSET(inst) ((inst) * SZ_4K + 0x400)
/* IMR */
#define XE_MEMIRQ_ENABLE_OFFSET 0x440
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 3fd462fda625..a48f239cad1c 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -36,13 +36,19 @@ static void tiles_fini(void *arg)
/*
* On multi-tile devices, partition the BAR space for MMIO on each tile,
* possibly accounting for register override on the number of tiles available.
+ * tile_mmio_size contains both the tile's 4MB register space, as well as
+ * additional space for the GTT and other (possibly unused) regions).
* Resulting memory layout is like below:
*
* .----------------------. <- tile_count * tile_mmio_size
* | .... |
* |----------------------| <- 2 * tile_mmio_size
+ * | tile1 GTT + other |
+ * |----------------------| <- 1 * tile_mmio_size + 4MB
* | tile1->mmio.regs |
* |----------------------| <- 1 * tile_mmio_size
+ * | tile0 GTT + other |
+ * |----------------------| <- 4MB
* | tile0->mmio.regs |
* '----------------------' <- 0MB
*/
@@ -61,16 +67,16 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
/* Possibly override number of tile based on configuration register */
if (!xe->info.skip_mtcfg) {
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
u8 tile_count;
u32 mtcfg;
/*
* Although the per-tile mmio regs are not yet initialized, this
- * is fine as it's going to the root gt, that's guaranteed to be
- * initialized earlier in xe_mmio_init()
+ * is fine as it's going to the root tile's mmio, that's
+ * guaranteed to be initialized earlier in xe_mmio_init()
*/
- mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
+ mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR);
tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
if (tile_count < xe->info.tile_count) {
@@ -90,8 +96,9 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
regs = xe->mmio.regs;
for_each_tile(tile, xe, id) {
- tile->mmio.size = tile_mmio_size;
+ tile->mmio.regs_size = SZ_4M;
tile->mmio.regs = regs;
+ tile->mmio.tile = tile;
regs += tile_mmio_size;
}
}
@@ -126,8 +133,9 @@ static void mmio_extension_setup(struct xe_device *xe, size_t tile_mmio_size,
regs = xe->mmio.regs + tile_mmio_size * xe->info.tile_count;
for_each_tile(tile, xe, id) {
- tile->mmio_ext.size = tile_mmio_ext_size;
+ tile->mmio_ext.regs_size = tile_mmio_ext_size;
tile->mmio_ext.regs = regs;
+ tile->mmio_ext.tile = tile;
regs += tile_mmio_ext_size;
}
}
@@ -157,137 +165,132 @@ int xe_mmio_init(struct xe_device *xe)
{
struct xe_tile *root_tile = xe_device_get_root_tile(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- const int mmio_bar = 0;
/*
* Map the entire BAR.
* The first 16MB of the BAR, belong to the root tile, and include:
* registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
*/
- xe->mmio.size = pci_resource_len(pdev, mmio_bar);
- xe->mmio.regs = pci_iomap(pdev, mmio_bar, GTTMMADR_BAR);
+ xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR);
+ xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0);
if (xe->mmio.regs == NULL) {
drm_err(&xe->drm, "failed to map registers\n");
return -EIO;
}
/* Setup first tile; other tiles (if present) will be setup later. */
- root_tile->mmio.size = SZ_16M;
+ root_tile->mmio.regs_size = SZ_4M;
root_tile->mmio.regs = xe->mmio.regs;
+ root_tile->mmio.tile = root_tile;
return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
}
-static void mmio_flush_pending_writes(struct xe_gt *gt)
+static void mmio_flush_pending_writes(struct xe_mmio *mmio)
{
#define DUMMY_REG_OFFSET 0x130030
- struct xe_tile *tile = gt_to_tile(gt);
int i;
- if (tile->xe->info.platform != XE_LUNARLAKE)
+ if (mmio->tile->xe->info.platform != XE_LUNARLAKE)
return;
/* 4 dummy writes */
for (i = 0; i < 4; i++)
- writel(0, tile->mmio.regs + DUMMY_REG_OFFSET);
+ writel(0, mmio->regs + DUMMY_REG_OFFSET);
}
-u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
+u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u8 val;
/* Wa_15015404425 */
- mmio_flush_pending_writes(gt);
+ mmio_flush_pending_writes(mmio);
- val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
- trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
+ val = readb(mmio->regs + addr);
+ trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
return val;
}
-u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
+u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u16 val;
/* Wa_15015404425 */
- mmio_flush_pending_writes(gt);
+ mmio_flush_pending_writes(mmio);
- val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
- trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
+ val = readw(mmio->regs + addr);
+ trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
return val;
}
-void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
+void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
- trace_xe_reg_rw(gt, true, addr, val, sizeof(val));
+ trace_xe_reg_rw(mmio, true, addr, val, sizeof(val));
- if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
- xe_gt_sriov_vf_write32(gt, reg, val);
+ if (!reg.vf && mmio->sriov_vf_gt)
+ xe_gt_sriov_vf_write32(mmio->sriov_vf_gt, reg, val);
else
- writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
+ writel(val, mmio->regs + addr);
}
-u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
+u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg)
{
- struct xe_tile *tile = gt_to_tile(gt);
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u32 val;
/* Wa_15015404425 */
- mmio_flush_pending_writes(gt);
+ mmio_flush_pending_writes(mmio);
- if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
- val = xe_gt_sriov_vf_read32(gt, reg);
+ if (!reg.vf && mmio->sriov_vf_gt)
+ val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt, reg);
else
- val = readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
+ val = readl(mmio->regs + addr);
- trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
+ trace_xe_reg_rw(mmio, false, addr, val, sizeof(val));
return val;
}
-u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
+u32 xe_mmio_rmw32(struct xe_mmio *mmio, struct xe_reg reg, u32 clr, u32 set)
{
u32 old, reg_val;
- old = xe_mmio_read32(gt, reg);
+ old = xe_mmio_read32(mmio, reg);
reg_val = (old & ~clr) | set;
- xe_mmio_write32(gt, reg, reg_val);
+ xe_mmio_write32(mmio, reg, reg_val);
return old;
}
-int xe_mmio_write32_and_verify(struct xe_gt *gt,
+int xe_mmio_write32_and_verify(struct xe_mmio *mmio,
struct xe_reg reg, u32 val, u32 mask, u32 eval)
{
u32 reg_val;
- xe_mmio_write32(gt, reg, val);
- reg_val = xe_mmio_read32(gt, reg);
+ xe_mmio_write32(mmio, reg, val);
+ reg_val = xe_mmio_read32(mmio, reg);
return (reg_val & mask) != eval ? -EINVAL : 0;
}
-bool xe_mmio_in_range(const struct xe_gt *gt,
+bool xe_mmio_in_range(const struct xe_mmio *mmio,
const struct xe_mmio_range *range,
struct xe_reg reg)
{
- u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+ u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
return range && addr >= range->start && addr <= range->end;
}
/**
* xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
- * @gt: MMIO target GT
+ * @mmio: MMIO target
* @reg: register to read value from
*
* Although Intel GPUs have some 64-bit registers, the hardware officially
@@ -307,21 +310,21 @@ bool xe_mmio_in_range(const struct xe_gt *gt,
*
* Returns the value of the 64-bit register.
*/
-u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
+u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg)
{
struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
u32 ldw, udw, oldudw, retries;
- reg.addr = xe_mmio_adjusted_addr(gt, reg.addr);
- reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr);
+ reg.addr = xe_mmio_adjusted_addr(mmio, reg.addr);
+ reg_udw.addr = xe_mmio_adjusted_addr(mmio, reg_udw.addr);
/* we shouldn't adjust just one register address */
- xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4);
+ xe_tile_assert(mmio->tile, reg_udw.addr == reg.addr + 0x4);
- oldudw = xe_mmio_read32(gt, reg_udw);
+ oldudw = xe_mmio_read32(mmio, reg_udw);
for (retries = 5; retries; --retries) {
- ldw = xe_mmio_read32(gt, reg);
- udw = xe_mmio_read32(gt, reg_udw);
+ ldw = xe_mmio_read32(mmio, reg);
+ udw = xe_mmio_read32(mmio, reg_udw);
if (udw == oldudw)
break;
@@ -329,13 +332,13 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
oldudw = udw;
}
- xe_gt_WARN(gt, retries == 0,
- "64-bit read of %#x did not stabilize\n", reg.addr);
+ drm_WARN(&mmio->tile->xe->drm, retries == 0,
+ "64-bit read of %#x did not stabilize\n", reg.addr);
return (u64)udw << 32 | ldw;
}
-static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic, bool expect_match)
{
ktime_t cur = ktime_get_raw();
@@ -346,7 +349,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
bool check;
for (;;) {
- read = xe_mmio_read32(gt, reg);
+ read = xe_mmio_read32(mmio, reg);
check = (read & mask) == val;
if (!expect_match)
@@ -372,7 +375,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
}
if (ret != 0) {
- read = xe_mmio_read32(gt, reg);
+ read = xe_mmio_read32(mmio, reg);
check = (read & mask) == val;
if (!expect_match)
@@ -390,7 +393,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
/**
* xe_mmio_wait32() - Wait for a register to match the desired masked value
- * @gt: MMIO target GT
+ * @mmio: MMIO target
* @reg: register to read value from
* @mask: mask to be applied to the value read from the register
* @val: desired value after applying the mask
@@ -407,15 +410,15 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v
* @timeout_us for different reasons, specially in non-atomic contexts. Thus,
* it is possible that this function succeeds even after @timeout_us has passed.
*/
-int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+int xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic)
{
- return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, true);
+ return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, true);
}
/**
* xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
- * @gt: MMIO target GT
+ * @mmio: MMIO target
* @reg: register to read value from
* @mask: mask to be applied to the value read from the register
* @val: value not to be matched after applying the mask
@@ -426,8 +429,8 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t
* This function works exactly like xe_mmio_wait32() with the exception that
* @val is expected not to be matched.
*/
-int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic)
{
- return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, false);
+ return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false);
}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 26551410ecc8..8a46f4006a84 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -14,25 +14,30 @@ struct xe_reg;
int xe_mmio_init(struct xe_device *xe);
int xe_mmio_probe_tiles(struct xe_device *xe);
-u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg);
-u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg);
-void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
-u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg);
-u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set);
-int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval);
-bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg);
-
-u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
-int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
- u32 *out_val, bool atomic);
-int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
- u32 *out_val, bool atomic);
-
-static inline u32 xe_mmio_adjusted_addr(const struct xe_gt *gt, u32 addr)
+u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg);
+u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg);
+void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val);
+u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg);
+u32 xe_mmio_rmw32(struct xe_mmio *mmio, struct xe_reg reg, u32 clr, u32 set);
+int xe_mmio_write32_and_verify(struct xe_mmio *mmio, struct xe_reg reg, u32 val, u32 mask, u32 eval);
+bool xe_mmio_in_range(const struct xe_mmio *mmio, const struct xe_mmio_range *range, struct xe_reg reg);
+
+u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg);
+int xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val,
+ u32 timeout_us, u32 *out_val, bool atomic);
+int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask,
+ u32 val, u32 timeout_us, u32 *out_val, bool atomic);
+
+static inline u32 xe_mmio_adjusted_addr(const struct xe_mmio *mmio, u32 addr)
{
- if (addr < gt->mmio.adj_limit)
- addr += gt->mmio.adj_offset;
+ if (addr < mmio->adj_limit)
+ addr += mmio->adj_offset;
return addr;
}
+static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe)
+{
+ return &xe->tiles[0].mmio;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 7ff0ac5b799a..8df41cd12d51 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -278,7 +278,7 @@ static void xelp_lncf_dump(struct xe_mocs_info *info, struct xe_gt *gt, struct d
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i));
drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
j++,
@@ -310,7 +310,7 @@ static void xelp_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u ] (%#8x)\n",
i,
@@ -383,7 +383,7 @@ static void xehp_lncf_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i));
drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
j++,
@@ -428,7 +428,7 @@ static void pvc_mocs_dump(struct xe_mocs_info *info, unsigned int flags, struct
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_LNCFCMOCS(i));
drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n",
j++,
@@ -510,7 +510,7 @@ static void mtl_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u] (%#8x)\n",
i,
@@ -553,7 +553,7 @@ static void xe2_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
if (regs_are_mcr(gt))
reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
else
- reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+ reg_val = xe_mmio_read32(&gt->mmio, XELP_GLOBAL_MOCS(i));
drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u] (%#8x)\n",
i,
@@ -690,7 +690,7 @@ static void __init_mocs_table(struct xe_gt *gt,
if (regs_are_mcr(gt))
xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs);
else
- xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs);
+ xe_mmio_write32(&gt->mmio, XELP_GLOBAL_MOCS(i), mocs);
}
}
@@ -730,7 +730,7 @@ static void init_l3cc_table(struct xe_gt *gt,
if (regs_are_mcr(gt))
xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc);
else
- xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc);
+ xe_mmio_write32(&gt->mmio, XELP_LNCFCMOCS(i), l3cc);
}
}
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index eae38a49ee8e..bbe03db0c401 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -176,7 +176,7 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream)
static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
{
- return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) &
+ return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) &
OAG_OATAILPTR_MASK;
}
@@ -366,7 +366,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- xe_mmio_write32(stream->gt, oaheadptr,
+ xe_mmio_write32(&stream->gt->mmio, oaheadptr,
(head + gtt_offset) & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = head;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -377,22 +377,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT;
unsigned long flags;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0);
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
*/
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf);
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointer to read from */
@@ -444,21 +445,23 @@ static void xe_oa_enable(struct xe_oa_stream *stream)
stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
val |= OAG_OACONTROL_OA_PES_DISAG_EN;
- xe_mmio_write32(stream->gt, regs->oa_ctrl, val);
+ xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val);
}
static void xe_oa_disable(struct xe_oa_stream *stream)
{
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0);
- if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl,
+ struct xe_mmio *mmio = &stream->gt->mmio;
+
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0);
+ if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl,
OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm,
"wait for OA to be disabled timed out\n");
if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) {
/* <= XE_METEORLAKE except XE_PVC */
- xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1);
- if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
+ xe_mmio_write32(mmio, OA_TLB_INV_CR, 1);
+ if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm,
"wait for OA tlb invalidate timed out\n");
}
@@ -481,7 +484,7 @@ static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
/* Only clear our bits to avoid side-effects */
- stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status,
+ stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status,
OASTATUS_RELEVANT_BITS, 0);
/*
* Signal to userspace that there is non-zero OA status to read via
@@ -709,8 +712,7 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
{
RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
regs_offset + CTX_CONTEXT_CONTROL,
- _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
- enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0)
+ _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE),
},
};
struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol };
@@ -742,17 +744,16 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
{
RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
regs_offset + CTX_CONTEXT_CONTROL,
- _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
- enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) |
- _MASKED_FIELD(CTX_CTRL_RUN_ALONE,
- enable ? CTX_CTRL_RUN_ALONE : 0),
+ _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) |
+ _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0),
},
};
struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol };
int err;
/* Set ccs select to enable programming of OAC_OACONTROL */
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream));
+ xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl,
+ __oa_ccs_select(stream));
/* Modify stream hwe context image with regs_context */
err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
@@ -788,6 +789,7 @@ static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool en
static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 sqcnt1;
/*
@@ -801,7 +803,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
_MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
}
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
oag_configure_mmio_trigger(stream, false));
/* disable the context save/restore or OAR counters */
@@ -809,13 +811,13 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
xe_oa_configure_oa_context(stream, false);
/* Make sure we disable noa to save power. */
- xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0);
+ xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0);
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
/* Reset PMON Enable to save power. */
- xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0);
+ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0);
}
static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
@@ -943,6 +945,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 oa_debug, sqcnt1;
int ret;
@@ -969,12 +972,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL |
OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL;
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
_MASKED_BIT_ENABLE(oa_debug) |
oag_report_ctx_switches(stream) |
oag_configure_mmio_trigger(stream, true));
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
(OAG_OAGLBCTXCTRL_COUNTER_RESUME |
OAG_OAGLBCTXCTRL_TIMER_ENABLE |
REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK,
@@ -988,7 +991,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
- xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1);
+ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1);
/* Configure OAR/OAC */
if (stream->exec_q) {
@@ -1536,7 +1539,7 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
case XE_PVC:
case XE_METEORLAKE:
xe_pm_runtime_get(gt_to_xe(gt));
- reg = xe_mmio_read32(gt, RPM_CONFIG0);
+ reg = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
xe_pm_runtime_put(gt_to_xe(gt));
shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
@@ -2352,7 +2355,7 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt)
}
/* Ensure MMIO trigger remains disabled till there is a stream */
- xe_mmio_write32(gt, u->regs.oa_debug,
+ xe_mmio_write32(&gt->mmio, u->regs.oa_debug,
oag_configure_mmio_trigger(NULL, false));
/* Set oa_unit_ids now to ensure ids remain contiguous */
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index f291a1730024..6bd6adfdfc74 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -100,6 +100,10 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
* Reserved entries should be programmed with the maximum caching, minimum
* coherency (which matches an all-0's encoding), so we can just omit them
* in the table.
+ *
+ * Note: There is an implicit assumption in the driver that compression and
+ * coh_1way+ are mutually exclusive. If this is ever not true then userptr
+ * and imported dma-buf from external device will have uncleared ccs state.
*/
#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \
{ \
@@ -109,7 +113,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
- .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
+ .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \
+ XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
}
static const struct xe_pat_table_entry xe2_pat_table[] = {
@@ -160,7 +165,7 @@ static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[
for (int i = 0; i < n_entries; i++) {
struct xe_reg reg = XE_REG(_PAT_INDEX(i));
- xe_mmio_write32(gt, reg, table[i].value);
+ xe_mmio_write32(&gt->mmio, reg, table[i].value);
}
}
@@ -186,7 +191,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "PAT table:\n");
for (i = 0; i < xe->pat.n_entries; i++) {
- u32 pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+ u32 pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
@@ -278,7 +283,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
u32 pat;
if (xe_gt_is_media_type(gt))
- pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+ pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
@@ -316,10 +321,10 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry
int n_entries)
{
program_pat(gt, table, n_entries);
- xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value);
+ xe_mmio_write32(&gt->mmio, XE_REG(_PAT_ATS), xe2_pat_ats.value);
if (IS_DGFX(gt_to_xe(gt)))
- xe_mmio_write32(gt, XE_REG(_PAT_PTA), xe2_pat_pta.value);
+ xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), xe2_pat_pta.value);
}
static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
@@ -336,7 +341,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
for (i = 0; i < xe->pat.n_entries; i++) {
if (xe_gt_is_media_type(gt))
- pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+ pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
@@ -355,7 +360,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
* PPGTT entries.
*/
if (xe_gt_is_media_type(gt))
- pat = xe_mmio_read32(gt, XE_REG(_PAT_PTA));
+ pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_PTA));
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA));
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 937c3e064f0d..af7b2f2ff13a 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -103,7 +103,6 @@ static const struct xe_graphics_desc graphics_xelpp = {
#define XE_HP_FEATURES \
.has_range_tlb_invalidation = true, \
- .has_flat_ccs = true, \
.dma_mask_size = 46, \
.va_bits = 48, \
.vm_max_level = 3
@@ -120,6 +119,8 @@ static const struct xe_graphics_desc graphics_xehpg = {
XE_HP_FEATURES,
.vram_flags = XE_VRAM_FLAGS_NEED64K,
+
+ .has_flat_ccs = 1,
};
static const struct xe_graphics_desc graphics_xehpc = {
@@ -145,7 +146,6 @@ static const struct xe_graphics_desc graphics_xehpc = {
.has_asid = 1,
.has_atomic_enable_pte_bit = 1,
- .has_flat_ccs = 0,
.has_usm = 1,
};
@@ -156,7 +156,6 @@ static const struct xe_graphics_desc graphics_xelpg = {
BIT(XE_HW_ENGINE_CCS0),
XE_HP_FEATURES,
- .has_flat_ccs = 0,
};
#define XE2_GFX_FEATURES \
@@ -383,10 +382,12 @@ static const struct pci_device_id pciidlist[] = {
XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc),
+ XE_RPLU_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
+ XE_ARL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc),
@@ -467,13 +468,15 @@ enum xe_gmdid_type {
static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid)
{
- struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct xe_reg gmdid_reg = GMD_ID;
u32 val;
KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid);
if (IS_SRIOV_VF(xe)) {
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+
/*
* To get the value of the GMDID register, VFs must obtain it
* from the GuC using MMIO communication.
@@ -509,14 +512,17 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
gt->info.type = XE_GT_TYPE_UNINITIALIZED;
} else {
/*
- * We need to apply the GSI offset explicitly here as at this
- * point the xe_gt is not fully uninitialized and only basic
- * access to MMIO registers is possible.
+ * GMD_ID is a GT register, but at this point in the driver
+ * init we haven't fully initialized the GT yet so we need to
+ * read the register with the tile's MMIO accessor. That means
+ * we need to apply the GSI offset manually since it won't get
+ * automatically added as it would if we were using a GT mmio
+ * accessor.
*/
if (type == GMDID_MEDIA)
gmdid_reg.addr += MEDIA_GT_GSI_OFFSET;
- val = xe_mmio_read32(gt, gmdid_reg);
+ val = xe_mmio_read32(mmio, gmdid_reg);
}
*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
@@ -678,7 +684,10 @@ static int xe_info_init(struct xe_device *xe,
xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit;
if (xe->info.platform != XE_PVC)
xe->info.has_device_atomics_on_smem = 1;
+
+ /* Runtime detection may change this later */
xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
+
xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
xe->info.has_usm = graphics_desc->has_usm;
@@ -707,6 +716,7 @@ static int xe_info_init(struct xe_device *xe,
gt->info.type = XE_GT_TYPE_MAIN;
gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
gt->info.engine_mask = graphics_desc->hw_engine_mask;
+
if (MEDIA_VER(xe) < 13 && media_desc)
gt->info.engine_mask |= media_desc->hw_engine_mask;
@@ -725,8 +735,6 @@ static int xe_info_init(struct xe_device *xe,
gt->info.type = XE_GT_TYPE_MEDIA;
gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
gt->info.engine_mask = media_desc->hw_engine_mask;
- gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
- gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
/*
* FIXME: At the moment multi-tile and standalone media are
@@ -757,6 +765,25 @@ static void xe_pci_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
}
+/*
+ * Probe the PCI device, initialize various parts of the driver.
+ *
+ * Fault injection is used to test the error paths of some initialization
+ * functions called either directly from xe_pci_probe() or indirectly for
+ * example through xe_device_probe(). Those functions use the kernel fault
+ * injection capabilities infrastructure, see
+ * Documentation/fault-injection/fault-injection.rst for details. The macro
+ * ALLOW_ERROR_INJECTION() is used to conditionally skip function execution
+ * at runtime and use a provided return value. The first requirement for
+ * error injectable functions is proper handling of the error code by the
+ * caller for recovery, which is always the case here. The second
+ * requirement is that no state is changed before the first error return.
+ * It is not strictly fullfilled for all initialization functions using the
+ * ALLOW_ERROR_INJECTION() macro but this is acceptable because for those
+ * error cases at probe time, the error code is simply propagated up by the
+ * caller. Therefore there is no consequence on those specific callers when
+ * function error injection skips the whole function.
+ */
static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
const struct xe_device_desc *desc = (const void *)ent->driver_data;
@@ -924,6 +951,8 @@ static int xe_pci_resume(struct device *dev)
if (err)
return err;
+ pci_restore_state(pdev);
+
err = pci_enable_device(pdev);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 7397d556996a..d95d9835de42 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -44,7 +44,7 @@ static int pcode_mailbox_status(struct xe_tile *tile)
[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
};
- err = xe_mmio_read32(tile->primary_gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
+ err = xe_mmio_read32(&tile->mmio, PCODE_MAILBOX) & PCODE_ERROR_MASK;
if (err) {
drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err,
err_decode[err].str ?: "Unknown");
@@ -58,7 +58,7 @@ static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *d
unsigned int timeout_ms, bool return_data,
bool atomic)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
int err;
if (tile_to_xe(tile)->info.skip_pcode)
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 7cf2160fe040..40f7c844ed44 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -5,6 +5,7 @@
#include "xe_pm.h"
+#include <linux/fault-inject.h>
#include <linux/pm_runtime.h>
#include <drm/drm_managed.h>
@@ -123,7 +124,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
- xe_display_pm_suspend(xe, false);
+ xe_display_pm_suspend(xe);
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
@@ -133,7 +134,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
if (err) {
- xe_display_pm_resume(xe, false);
+ xe_display_pm_resume(xe);
goto err;
}
}
@@ -187,7 +188,7 @@ int xe_pm_resume(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_resume(gt);
- xe_display_pm_resume(xe, false);
+ xe_display_pm_resume(xe);
err = xe_bo_restore_user(xe);
if (err)
@@ -263,6 +264,7 @@ int xe_pm_init_early(struct xe_device *xe)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
/**
* xe_pm_init - Initialize Xe Power Management
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index d6353e8969f0..f27f579f4d85 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -2188,5 +2188,5 @@ void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops)
pt_op->num_entries);
}
- xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
+ xe_pt_update_ops_fini(tile, vops);
}
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 28d9bb3b825d..5246a4a2740e 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -91,16 +91,17 @@ __read_timestamps(struct xe_gt *gt,
u64 *cpu_delta,
__ktime_func_t cpu_clock)
{
+ struct xe_mmio *mmio = &gt->mmio;
u32 upper, lower, old_upper, loop = 0;
- upper = xe_mmio_read32(gt, upper_reg);
+ upper = xe_mmio_read32(mmio, upper_reg);
do {
*cpu_delta = local_clock();
*cpu_ts = cpu_clock();
- lower = xe_mmio_read32(gt, lower_reg);
+ lower = xe_mmio_read32(mmio, lower_reg);
*cpu_delta = local_clock() - *cpu_delta;
old_upper = upper;
- upper = xe_mmio_read32(gt, upper_reg);
+ upper = xe_mmio_read32(mmio, upper_reg);
} while (upper != old_upper && loop++ < 2);
*engine_ts = (u64)upper << 32 | lower;
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 440ac572f6e5..191cb4121acd 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -15,6 +15,7 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
+#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_force_wake.h"
#include "xe_gt.h"
@@ -164,7 +165,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
else if (entry->clr_bits + 1)
val = (reg.mcr ?
xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
- xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+ xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
else
val = 0;
@@ -180,7 +181,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
if (entry->reg.mcr)
xe_gt_mcr_multicast_write(gt, reg_mcr, val);
else
- xe_mmio_write32(gt, reg, val);
+ xe_mmio_write32(&gt->mmio, reg, val);
}
void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
@@ -194,14 +195,14 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
- err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (err)
goto err_force_wake;
xa_for_each(&sr->xa, reg, entry)
apply_one_mmio(gt, entry);
- err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
XE_WARN_ON(err);
return;
@@ -227,7 +228,7 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
- err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (err)
goto err_force_wake;
@@ -241,7 +242,7 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
}
xe_reg_whitelist_print_entry(&p, 0, reg, entry);
- xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot),
+ xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot),
reg | entry->set_bits);
slot++;
}
@@ -250,10 +251,10 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
u32 addr = RING_NOPID(mmio_base).addr;
- xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
+ xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
}
- err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
XE_WARN_ON(err);
return;
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 86c705d18c0d..b13d4d62f0b1 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -196,7 +196,7 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
*gt = (*hwe)->gt;
*xe = gt_to_xe(*gt);
break;
- };
+ }
}
/**
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index fe2cb2a96f78..e055bed7ae55 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -53,7 +53,7 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
if (IS_ERR(bo)) {
drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
PTR_ERR(bo));
- return (struct xe_sa_manager *)bo;
+ return ERR_CAST(bo);
}
sa_manager->bo = bo;
sa_manager->is_iomem = bo->vmap.is_iomem;
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 5a1d65e4f19f..ef10782af656 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -3,6 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include "regs/xe_regs.h"
@@ -35,7 +37,7 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
static bool test_is_vf(struct xe_device *xe)
{
- u32 value = xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
+ u32 value = xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG);
return value & VF_CAP;
}
@@ -119,6 +121,7 @@ int xe_sriov_init(struct xe_device *xe)
return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe);
}
+ALLOW_ERROR_INJECTION(xe_sriov_init, ERRNO); /* See xe_pci_probe() */
/**
* xe_sriov_print_info - Print basic SR-IOV information.
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index dda5268507d8..07cf7cfe4abd 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -3,6 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
+#include <linux/fault-inject.h>
+
#include <drm/drm_managed.h>
#include "xe_device.h"
@@ -129,6 +131,7 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */
static int tile_ttm_mgr_init(struct xe_tile *tile)
{
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 8573d7a87d84..91130ad8999c 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -21,6 +21,7 @@
#include "xe_vm.h"
#define __dev_name_xe(xe) dev_name((xe)->drm.dev)
+#define __dev_name_tile(tile) __dev_name_xe(tile_to_xe((tile)))
#define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt)))
#define __dev_name_eq(q) __dev_name_gt((q)->gt)
@@ -342,12 +343,12 @@ DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal,
);
TRACE_EVENT(xe_reg_rw,
- TP_PROTO(struct xe_gt *gt, bool write, u32 reg, u64 val, int len),
+ TP_PROTO(struct xe_mmio *mmio, bool write, u32 reg, u64 val, int len),
- TP_ARGS(gt, write, reg, val, len),
+ TP_ARGS(mmio, write, reg, val, len),
TP_STRUCT__entry(
- __string(dev, __dev_name_gt(gt))
+ __string(dev, __dev_name_tile(mmio->tile))
__field(u64, val)
__field(u32, reg)
__field(u16, write)
diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h
index 9b1a1d4304ae..30a3cfbaaa09 100644
--- a/drivers/gpu/drm/xe/xe_trace_bo.h
+++ b/drivers/gpu/drm/xe/xe_trace_bo.h
@@ -189,7 +189,7 @@ DECLARE_EVENT_CLASS(xe_vm,
),
TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev),
- __entry->vm, __entry->asid)
+ __entry->vm, __entry->asid)
);
DEFINE_EVENT(xe_vm, xe_vm_kill,
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index f7113cf6109d..423856cc18d4 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -60,7 +60,7 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
- struct xe_gt *mmio = xe_root_mmio_gt(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u64 stolen_size;
u64 tile_offset;
@@ -94,7 +94,7 @@ static u32 get_wopcm_size(struct xe_device *xe)
u32 wopcm_size;
u64 val;
- val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
+ val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED);
val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
switch (val) {
@@ -119,7 +119,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
u32 stolen_size, wopcm_size;
u32 ggc, gms;
- ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
+ ggc = xe_mmio_read32(xe_root_tile_mmio(xe), GGC);
/*
* Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
@@ -159,7 +159,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
stolen_size -= wopcm_size;
if (media_gt && XE_WA(media_gt, 14019821291)) {
- u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
+ u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE)
& ~GENMASK_ULL(5, 0);
/*
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index faa1bf42e50e..d449de0fb6ec 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -33,7 +33,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
{ XE_RTP_NAME("Tuning: L3 cache - media"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
@@ -42,20 +42,48 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
SET(CCCHKNREG1, L3CMPCTRL))
},
+ { XE_RTP_NAME("Tuning: Compression Overfetch - media"),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
+ SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
+ },
{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
},
+ { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
+ },
{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(L3SQCREG2,
COMPMEMRD256BOVRFETCHEN))
},
+ { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
+ COMPMEMRD256BOVRFETCHEN))
+ },
{ XE_RTP_NAME("Tuning: Stateless compression control"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
},
+ { XE_RTP_NAME("Tuning: Stateless compression control - media"),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
+ REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
+ },
+ { XE_RTP_NAME("Tuning: L3 RW flush all Cache"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+ XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN))
+ },
+ { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"),
+ XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
+ },
+
{}
};
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index d431d0031185..fb0eda3d5682 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -4,6 +4,7 @@
*/
#include <linux/bitfield.h>
+#include <linux/fault-inject.h>
#include <linux/firmware.h>
#include <drm/drm_managed.h>
@@ -796,6 +797,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
return err;
}
+ALLOW_ERROR_INJECTION(xe_uc_fw_init, ERRNO); /* See xe_pci_probe() */
static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw)
{
@@ -806,6 +808,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
{
struct xe_device *xe = uc_fw_to_xe(uc_fw);
struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+ struct xe_mmio *mmio = &gt->mmio;
u64 src_offset;
u32 dma_ctrl;
int ret;
@@ -814,34 +817,34 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
/* Set the source address for the uCode */
src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset;
- xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
- xe_mmio_write32(gt, DMA_ADDR_0_HIGH,
+ xe_mmio_write32(mmio, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
+ xe_mmio_write32(mmio, DMA_ADDR_0_HIGH,
upper_32_bits(src_offset) | DMA_ADDRESS_SPACE_GGTT);
/* Set the DMA destination */
- xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset);
- xe_mmio_write32(gt, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
+ xe_mmio_write32(mmio, DMA_ADDR_1_LOW, offset);
+ xe_mmio_write32(mmio, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
/*
* Set the transfer size. The header plus uCode will be copied to WOPCM
* via DMA, excluding any other components
*/
- xe_mmio_write32(gt, DMA_COPY_SIZE,
+ xe_mmio_write32(mmio, DMA_COPY_SIZE,
sizeof(struct uc_css_header) + uc_fw->ucode_size);
/* Start the DMA */
- xe_mmio_write32(gt, DMA_CTRL,
+ xe_mmio_write32(mmio, DMA_CTRL,
_MASKED_BIT_ENABLE(dma_flags | START_DMA));
/* Wait for DMA to finish */
- ret = xe_mmio_wait32(gt, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl,
+ ret = xe_mmio_wait32(mmio, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl,
false);
if (ret)
drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
xe_uc_fw_type_repr(uc_fw->type), dma_ctrl);
/* Disable the bits once DMA is over */
- xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
+ xe_mmio_write32(mmio, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7acd5fc9d032..ce9dca4d4e87 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1613,7 +1613,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
up_write(&vm->lock);
- mutex_lock(&xe->usm.lock);
+ down_write(&xe->usm.lock);
if (vm->usm.asid) {
void *lookup;
@@ -1623,7 +1623,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
xe_assert(xe, lookup == vm);
}
- mutex_unlock(&xe->usm.lock);
+ up_write(&xe->usm.lock);
for_each_tile(tile, xe, id)
xe_range_fence_tree_fini(&vm->rftree[id]);
@@ -1765,25 +1765,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
if (IS_ERR(vm))
return PTR_ERR(vm);
- mutex_lock(&xef->vm.lock);
- err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
- mutex_unlock(&xef->vm.lock);
- if (err)
- goto err_close_and_put;
-
if (xe->info.has_asid) {
- mutex_lock(&xe->usm.lock);
+ down_write(&xe->usm.lock);
err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
XA_LIMIT(1, XE_MAX_ASID - 1),
&xe->usm.next_asid, GFP_KERNEL);
- mutex_unlock(&xe->usm.lock);
+ up_write(&xe->usm.lock);
if (err < 0)
- goto err_free_id;
+ goto err_close_and_put;
vm->usm.asid = asid;
}
- args->vm_id = id;
vm->xef = xe_file_get(xef);
/* Record BO memory for VM pagetable created against client */
@@ -1796,12 +1789,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
#endif
+ /* user id alloc must always be last in ioctl to prevent UAF */
+ err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+ if (err)
+ goto err_close_and_put;
+
+ args->vm_id = id;
+
return 0;
-err_free_id:
- mutex_lock(&xef->vm.lock);
- xa_erase(&xef->vm.xa, id);
- mutex_unlock(&xef->vm.lock);
err_close_and_put:
xe_vm_close_and_put(vm);
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 80ba2fc78837..2a623bfcda7e 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -169,7 +169,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
u64 offset_hi, offset_lo;
u32 nodes, num_enabled;
- reg = xe_mmio_read32(gt, MIRROR_FUSE3);
+ reg = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg);
num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */
@@ -185,7 +185,8 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */
/* We don't expect any holes */
- xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size),
+ xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(&gt_to_tile(gt)->mmio, GSMBASE) -
+ ccs_size),
"Hole between CCS and GSM.\n");
} else {
reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
@@ -257,7 +258,7 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
if (xe->info.has_flat_ccs) {
offset = get_flat_ccs_offset(gt, *tile_size);
} else {
- offset = xe_mmio_read64_2x32(gt, GSMBASE);
+ offset = xe_mmio_read64_2x32(&tile->mmio, GSMBASE);
}
/* remove the tile offset so we have just the available size */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index d424992514a4..94ea76b098ed 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -8,6 +8,7 @@
#include <drm/drm_managed.h>
#include <kunit/visibility.h>
#include <linux/compiler_types.h>
+#include <linux/fault-inject.h>
#include <generated/xe_wa_oob.h>
@@ -850,6 +851,7 @@ int xe_wa_init(struct xe_gt *gt)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */
void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
{
@@ -887,11 +889,11 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
*/
void xe_wa_apply_tile_workarounds(struct xe_tile *tile)
{
- struct xe_gt *mmio = tile->primary_gt;
+ struct xe_mmio *mmio = &tile->mmio;
if (IS_SRIOV_VF(tile->xe))
return;
- if (XE_WA(mmio, 22010954014))
+ if (XE_WA(tile->primary_gt, 22010954014))
xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS);
}
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index d3a99157e523..ada0d0aa6b74 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -5,6 +5,8 @@
#include "xe_wopcm.h"
+#include <linux/fault-inject.h>
+
#include "regs/xe_guc_regs.h"
#include "xe_device.h"
#include "xe_force_wake.h"
@@ -123,8 +125,8 @@ static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
static bool __wopcm_regs_locked(struct xe_gt *gt,
u32 *guc_wopcm_base, u32 *guc_wopcm_size)
{
- u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET);
- u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE);
+ u32 reg_base = xe_mmio_read32(&gt->mmio, DMA_GUC_WOPCM_OFFSET);
+ u32 reg_size = xe_mmio_read32(&gt->mmio, GUC_WOPCM_SIZE);
if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) ||
!(reg_base & GUC_WOPCM_OFFSET_VALID))
@@ -150,13 +152,13 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
XE_WARN_ON(size & ~GUC_WOPCM_SIZE_MASK);
mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
- err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask,
+ err = xe_mmio_write32_and_verify(&gt->mmio, GUC_WOPCM_SIZE, size, mask,
size | GUC_WOPCM_SIZE_LOCKED);
if (err)
goto err_out;
mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
- err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET,
+ err = xe_mmio_write32_and_verify(&gt->mmio, DMA_GUC_WOPCM_OFFSET,
base | huc_agent, mask,
base | huc_agent |
GUC_WOPCM_OFFSET_VALID);
@@ -169,10 +171,10 @@ err_out:
drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
DMA_GUC_WOPCM_OFFSET.addr,
- xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET));
+ xe_mmio_read32(&gt->mmio, DMA_GUC_WOPCM_OFFSET));
drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
GUC_WOPCM_SIZE.addr,
- xe_mmio_read32(gt, GUC_WOPCM_SIZE));
+ xe_mmio_read32(&gt->mmio, GUC_WOPCM_SIZE));
return err;
}
@@ -268,3 +270,4 @@ check:
return ret;
}
+ALLOW_ERROR_INJECTION(xe_wopcm_init, ERRNO); /* See xe_pci_probe() */