diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
99 files changed, 2311 insertions, 741 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index bc177368af6c..2de0de41b8dd 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -40,9 +40,21 @@ config DRM_XE_DEBUG_VM If in doubt, say "N". +config DRM_XE_DEBUG_MEMIRQ + bool "Enable extra memirq debugging" + default n + help + Choose this option to enable additional debugging info for + memory based interrupts. + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_XE_DEBUG_SRIOV bool "Enable extra SR-IOV debugging" default n + select DRM_XE_DEBUG_MEMIRQ help Enable extra SR-IOV debugging info. diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index edfd812e0f41..8f1c5c329f79 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -129,6 +129,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ + xe_gt_sriov_pf_migration.o \ xe_gt_sriov_pf_monitor.o \ xe_gt_sriov_pf_policy.o \ xe_gt_sriov_pf_service.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index 181180f5945c..b6a1852749dd 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -557,4 +557,65 @@ #define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn #define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96 GUC_HXG_REQUEST_MSG_n_DATAn +/** + * DOC: PF2GUC_SAVE_RESTORE_VF + * + * This message is used by the PF to migrate VF info state maintained by the GuC. + * + * This message must be sent as `CTB HXG Message`_. + * + * Available since GuC version 70.25.0 + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = **OPCODE** - operation to take: | + * | | | | + * | | | - _`GUC_PF_OPCODE_VF_SAVE` = 0 | + * | | | - _`GUC_PF_OPCODE_VF_RESTORE` = 1 | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_SAVE_RESTORE_VF` = 0x550B | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VFID** - VF identifier | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **ADDR_LO** - lower 32-bits of GGTT offset to the buffer | + * | | | where the VF info will be save to or restored from. | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **ADDR_HI** - upper 32-bits of GGTT offset to the buffer | + * | | | where the VF info will be save to or restored from. | + * +---+-------+--------------------------------------------------------------+ + * | 4 | 27:0 | **SIZE** - size of the buffer (in dwords) | + * | +-------+--------------------------------------------------------------+ + * | | 31:28 | MBZ | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = **USED** - size of used buffer space (in dwords) | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_PF2GUC_SAVE_RESTORE_VF 0x550Bu + +#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 4u) +#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_0_OPCODE GUC_HXG_EVENT_MSG_0_DATA0 +#define GUC_PF_OPCODE_VF_SAVE 0u +#define GUC_PF_OPCODE_VF_RESTORE 1u +#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn +#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_2_ADDR_LO GUC_HXG_EVENT_MSG_n_DATAn +#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_3_ADDR_HI GUC_HXG_EVENT_MSG_n_DATAn +#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_SIZE (0xfffffffu << 0) +#define PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_MBZ (0xfu << 28) + +#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_0_USED GUC_HXG_RESPONSE_MSG_0_DATA0 + #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h deleted file mode 100644 index 98e9dd78f670..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _I915_GPU_ERROR_H_ -#define _I915_GPU_ERROR_H_ - -struct drm_i915_error_state_buf; - -__printf(2, 3) -static inline void -i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) -{ -} - -#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index eb5b5f0e4bd9..ee3469d4ae73 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -10,11 +10,11 @@ #include "xe_device_types.h" #include "xe_mmio.h" -static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore) +static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncore) { struct xe_device *xe = container_of(uncore, struct xe_device, uncore); - return xe_root_mmio_gt(xe); + return xe_root_tile_mmio(xe); } static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore) @@ -29,7 +29,7 @@ static inline u32 intel_uncore_read(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); + return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg); } static inline u8 intel_uncore_read8(struct intel_uncore *uncore, @@ -37,7 +37,7 @@ static inline u8 intel_uncore_read8(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg); + return xe_mmio_read8(__compat_uncore_to_mmio(uncore), reg); } static inline u16 intel_uncore_read16(struct intel_uncore *uncore, @@ -45,7 +45,7 @@ static inline u16 intel_uncore_read16(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - return xe_mmio_read16(__compat_uncore_to_gt(uncore), reg); + return xe_mmio_read16(__compat_uncore_to_mmio(uncore), reg); } static inline u64 @@ -57,11 +57,11 @@ intel_uncore_read64_2x32(struct intel_uncore *uncore, u32 upper, lower, old_upper; int loop = 0; - upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg); + upper = xe_mmio_read32(__compat_uncore_to_mmio(uncore), upper_reg); do { old_upper = upper; - lower = xe_mmio_read32(__compat_uncore_to_gt(uncore), lower_reg); - upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg); + lower = xe_mmio_read32(__compat_uncore_to_mmio(uncore), lower_reg); + upper = xe_mmio_read32(__compat_uncore_to_mmio(uncore), upper_reg); } while (upper != old_upper && loop++ < 2); return (u64)upper << 32 | lower; @@ -72,7 +72,7 @@ static inline void intel_uncore_posting_read(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); + xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg); } static inline void intel_uncore_write(struct intel_uncore *uncore, @@ -80,7 +80,7 @@ static inline void intel_uncore_write(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val); + xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val); } static inline u32 intel_uncore_rmw(struct intel_uncore *uncore, @@ -88,7 +88,7 @@ static inline u32 intel_uncore_rmw(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - return xe_mmio_rmw32(__compat_uncore_to_gt(uncore), reg, clear, set); + return xe_mmio_rmw32(__compat_uncore_to_mmio(uncore), reg, clear, set); } static inline int intel_wait_for_register(struct intel_uncore *uncore, @@ -97,7 +97,7 @@ static inline int intel_wait_for_register(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value, + return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, timeout * USEC_PER_MSEC, NULL, false); } @@ -107,7 +107,7 @@ static inline int intel_wait_for_register_fw(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value, + return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, timeout * USEC_PER_MSEC, NULL, false); } @@ -118,7 +118,7 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value, + return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, fast_timeout_us + 1000 * slow_timeout_ms, out_value, false); } @@ -128,7 +128,7 @@ static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); + return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg); } static inline void intel_uncore_write_fw(struct intel_uncore *uncore, @@ -136,7 +136,7 @@ static inline void intel_uncore_write_fw(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val); + xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val); } static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore, @@ -144,7 +144,7 @@ static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); + return xe_mmio_read32(__compat_uncore_to_mmio(uncore), reg); } static inline void intel_uncore_write_notrace(struct intel_uncore *uncore, @@ -152,7 +152,7 @@ static inline void intel_uncore_write_notrace(struct intel_uncore *uncore, { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val); + xe_mmio_write32(__compat_uncore_to_mmio(uncore), reg, val); } static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 75736faf2a80..26b2cae11d46 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -4,7 +4,7 @@ */ #include "xe_display.h" -#include "regs/xe_regs.h" +#include "regs/xe_irq_regs.h" #include <linux/fb.h> @@ -13,7 +13,6 @@ #include <uapi/drm/xe_drm.h> #include "soc/intel_dram.h" -#include "i915_drv.h" /* FIXME: HAS_DISPLAY() depends on this */ #include "intel_acpi.h" #include "intel_audio.h" #include "intel_bw.h" @@ -34,7 +33,7 @@ static bool has_display(struct xe_device *xe) { - return HAS_DISPLAY(xe); + return HAS_DISPLAY(&xe->display); } /** @@ -309,18 +308,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe) } /* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ -void xe_display_pm_runtime_suspend(struct xe_device *xe) -{ - if (!xe->info.probe_display) - return; - - if (xe->d3cold.allowed) - xe_display_pm_suspend(xe, true); - - intel_hpd_poll_enable(xe); -} - -void xe_display_pm_suspend(struct xe_device *xe, bool runtime) +static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; bool s2idle = suspend_to_idle(); @@ -355,6 +343,52 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_dmc_suspend(xe); } +void xe_display_pm_suspend(struct xe_device *xe) +{ + __xe_display_pm_suspend(xe, false); +} + +void xe_display_pm_shutdown(struct xe_device *xe) +{ + struct intel_display *display = &xe->display; + + if (!xe->info.probe_display) + return; + + intel_power_domains_disable(xe); + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + if (has_display(xe)) { + drm_kms_helper_poll_disable(&xe->drm); + intel_display_driver_disable_user_access(xe); + intel_display_driver_suspend(xe); + } + + xe_display_flush_cleanup_work(xe); + intel_dp_mst_suspend(xe); + intel_hpd_cancel_work(xe); + + if (has_display(xe)) + intel_display_driver_suspend_access(xe); + + intel_encoder_suspend_all(display); + intel_encoder_shutdown_all(display); + + intel_opregion_suspend(display, PCI_D3cold); + + intel_dmc_suspend(xe); +} + +void xe_display_pm_runtime_suspend(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + if (xe->d3cold.allowed) + __xe_display_pm_suspend(xe, true); + + intel_hpd_poll_enable(xe); +} + void xe_display_pm_suspend_late(struct xe_device *xe) { bool s2idle = suspend_to_idle(); @@ -366,15 +400,17 @@ void xe_display_pm_suspend_late(struct xe_device *xe) intel_display_power_suspend_late(xe); } -void xe_display_pm_runtime_resume(struct xe_device *xe) +void xe_display_pm_shutdown_late(struct xe_device *xe) { if (!xe->info.probe_display) return; - intel_hpd_poll_disable(xe); - - if (xe->d3cold.allowed) - xe_display_pm_resume(xe, true); + /* + * The only requirement is to reboot with display DC states disabled, + * for now leaving all display power wells in the INIT power domain + * enabled. + */ + intel_power_domains_driver_remove(xe); } void xe_display_pm_resume_early(struct xe_device *xe) @@ -387,7 +423,7 @@ void xe_display_pm_resume_early(struct xe_device *xe) intel_power_domains_resume(xe); } -void xe_display_pm_resume(struct xe_device *xe, bool runtime) +static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; @@ -421,6 +457,23 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_power_domains_enable(xe); } +void xe_display_pm_resume(struct xe_device *xe) +{ + __xe_display_pm_resume(xe, false); +} + +void xe_display_pm_runtime_resume(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + intel_hpd_poll_disable(xe); + + if (xe->d3cold.allowed) + __xe_display_pm_resume(xe, true); +} + + static void display_device_remove(struct drm_device *dev, void *arg) { struct xe_device *xe = arg; diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 53d727fd792b..17afa537aee5 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -34,10 +34,12 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir); void xe_display_irq_reset(struct xe_device *xe); void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt); -void xe_display_pm_suspend(struct xe_device *xe, bool runtime); +void xe_display_pm_suspend(struct xe_device *xe); +void xe_display_pm_shutdown(struct xe_device *xe); void xe_display_pm_suspend_late(struct xe_device *xe); +void xe_display_pm_shutdown_late(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe); -void xe_display_pm_resume(struct xe_device *xe, bool runtime); +void xe_display_pm_resume(struct xe_device *xe); void xe_display_pm_runtime_suspend(struct xe_device *xe); void xe_display_pm_runtime_resume(struct xe_device *xe); @@ -65,10 +67,12 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) static inline void xe_display_irq_reset(struct xe_device *xe) {} static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {} -static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_shutdown(struct xe_device *xe) {} static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} +static inline void xe_display_pm_shutdown_late(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {} -static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_resume(struct xe_device *xe) {} static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f99d901a3214..f95375451e2f 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d if (!vma) return false; + /* Set scanout flag for WC mapping */ obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_ALIGN(size), ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_GGTT); + XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT); if (IS_ERR(obj)) { kfree(vma); return false; @@ -73,5 +74,9 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { - /* TODO: add xe specific flush_map() for dsb buffer object. */ + /* + * The memory barrier here is to ensure coherency of DSB vs MMIO, + * both for weak ordering archs and discrete cards. + */ + xe_device_wmb(dsb_buf->vma->bo->tile->xe); } diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 660ff42e45a6..fb80042cbe0d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -169,6 +169,8 @@ #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) + #define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) @@ -342,6 +344,14 @@ #define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0) #define FORCEWAKE_RENDER XE_REG(0xa278) + +#define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0) +#define MEDIA_SLICE3_AWAKE_STATUS REG_BIT(4) +#define MEDIA_SLICE2_AWAKE_STATUS REG_BIT(3) +#define MEDIA_SLICE1_AWAKE_STATUS REG_BIT(2) +#define RENDER_AWAKE_STATUS REG_BIT(1) +#define MEDIA_SLICE0_AWAKE_STATUS REG_BIT(0) + #define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4) #define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4) #define FORCEWAKE_GSC XE_REG(0xa618) @@ -378,6 +388,9 @@ #define L3SQCREG3 XE_REG_MCR(0xb108) #define COMPPWOVERFETCHEN REG_BIT(28) +#define SCRATCH3_LBCF XE_REG_MCR(0xb154) +#define RWFLUSHALLEN REG_BIT(17) + #define XEHP_L3SQCREG5 XE_REG_MCR(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) @@ -391,6 +404,12 @@ #define SCRATCH1LPFC XE_REG(0xb474) #define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) +#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604) + +#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) + +#define XE2LPM_SCRATCH3_LBCF XE_REG_MCR(0xb654) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) @@ -548,62 +567,4 @@ #define GT_PERF_STATUS XE_REG(0x1381b4) #define VOLTAGE_MASK REG_GENMASK(10, 0) -/* - * Note: Interrupt registers 1900xx are VF accessible only until version 12.50. - * On newer platforms, VFs are using memory-based interrupts instead. - * However, for simplicity we keep this XE_REG_OPTION_VF tag intact. - */ - -#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF) -#define INTR_GSC REG_BIT(31) -#define INTR_GUC REG_BIT(25) -#define INTR_MGUC REG_BIT(24) -#define INTR_BCS8 REG_BIT(23) -#define INTR_BCS(x) REG_BIT(15 - (x)) -#define INTR_CCS(x) REG_BIT(4 + (x)) -#define INTR_RCS0 REG_BIT(0) -#define INTR_VECS(x) REG_BIT(31 - (x)) -#define INTR_VCS(x) REG_BIT(x) - -#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF) -#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF) -#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF) -#define ENGINE1_MASK REG_GENMASK(31, 16) -#define ENGINE0_MASK REG_GENMASK(15, 0) -#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF) -#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF) -#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF) - -#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF) -#define INTR_DATA_VALID REG_BIT(31) -#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x) -#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) -#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x) -#define OTHER_GUC_INSTANCE 0 -#define OTHER_GSC_HECI2_INSTANCE 3 -#define OTHER_GSC_INSTANCE 6 - -#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF) -#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF) -#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF) -#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF) -#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF) -#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF) -#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4) -#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF) -#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF) -#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF) -#define CCS0_CCS1_INTR_MASK XE_REG(0x190100) -#define CCS2_CCS3_INTR_MASK XE_REG(0x190104) -#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110) -#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114) -#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118) -#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) -#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) -#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) -#define GSC_ER_COMPLETE REG_BIT(5) -#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) -#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) -#define GT_RENDER_USER_INTERRUPT REG_BIT(0) - #endif diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h new file mode 100644 index 000000000000..1776b3f78ccb --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ +#ifndef _XE_IRQ_REGS_H_ +#define _XE_IRQ_REGS_H_ + +#include "regs/xe_reg_defs.h" + +#define PCU_IRQ_OFFSET 0x444e0 +#define GU_MISC_IRQ_OFFSET 0x444f0 +#define GU_MISC_GSE REG_BIT(27) + +#define DG1_MSTR_TILE_INTR XE_REG(0x190008) +#define DG1_MSTR_IRQ REG_BIT(31) +#define DG1_MSTR_TILE(t) REG_BIT(t) + +#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF) +#define MASTER_IRQ REG_BIT(31) +#define GU_MISC_IRQ REG_BIT(29) +#define DISPLAY_IRQ REG_BIT(16) +#define GT_DW_IRQ(x) REG_BIT(x) + +/* + * Note: Interrupt registers 1900xx are VF accessible only until version 12.50. + * On newer platforms, VFs are using memory-based interrupts instead. + * However, for simplicity we keep this XE_REG_OPTION_VF tag intact. + */ + +#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF) +#define INTR_GSC REG_BIT(31) +#define INTR_GUC REG_BIT(25) +#define INTR_MGUC REG_BIT(24) +#define INTR_BCS8 REG_BIT(23) +#define INTR_BCS(x) REG_BIT(15 - (x)) +#define INTR_CCS(x) REG_BIT(4 + (x)) +#define INTR_RCS0 REG_BIT(0) +#define INTR_VECS(x) REG_BIT(31 - (x)) +#define INTR_VCS(x) REG_BIT(x) + +#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF) +#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF) +#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF) +#define ENGINE1_MASK REG_GENMASK(31, 16) +#define ENGINE0_MASK REG_GENMASK(15, 0) +#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF) +#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF) +#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF) + +#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF) +#define INTR_DATA_VALID REG_BIT(31) +#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x) +#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) +#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x) +#define OTHER_GUC_INSTANCE 0 +#define OTHER_GSC_HECI2_INSTANCE 3 +#define OTHER_GSC_INSTANCE 6 + +#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF) +#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF) +#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF) +#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF) +#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF) +#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF) +#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4) +#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF) +#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF) +#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF) +#define CCS0_CCS1_INTR_MASK XE_REG(0x190100) +#define CCS2_CCS3_INTR_MASK XE_REG(0x190104) +#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110) +#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114) +#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118) +#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) +#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) +#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) +#define GSC_ER_COMPLETE REG_BIT(5) +#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) +#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) +#define GT_RENDER_USER_INTERRUPT REG_BIT(0) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 23f7dc5bbe99..51fd40ffafcb 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -128,7 +128,7 @@ struct xe_reg_mcr { * options. */ #define XE_REG_MCR(r_, ...) ((const struct xe_reg_mcr){ \ - .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ + .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ }) static inline bool xe_reg_is_valid(struct xe_reg r) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index dfa869f0dddd..3293172b0128 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -11,10 +11,6 @@ #define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12) #define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0) -#define PCU_IRQ_OFFSET 0x444e0 -#define GU_MISC_IRQ_OFFSET 0x444f0 -#define GU_MISC_GSE REG_BIT(27) - #define GU_CNTL_PROTECTED XE_REG(0x10100C) #define DRIVERINT_FLR_DIS REG_BIT(31) @@ -57,16 +53,6 @@ #define MTL_MPE_FREQUENCY XE_REG(0x13802c) #define MTL_RPE_MASK REG_GENMASK(8, 0) -#define DG1_MSTR_TILE_INTR XE_REG(0x190008) -#define DG1_MSTR_IRQ REG_BIT(31) -#define DG1_MSTR_TILE(t) REG_BIT(t) - -#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF) -#define MASTER_IRQ REG_BIT(31) -#define GU_MISC_IRQ REG_BIT(29) -#define DISPLAY_IRQ REG_BIT(16) -#define GT_DW_IRQ(x) REG_BIT(x) - #define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF) #define VF_CAP REG_BIT(0) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 8dac069483e8..7d3fd720478b 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -6,6 +6,12 @@ #include <kunit/test.h> #include <kunit/visibility.h> +#include <linux/iosys-map.h> +#include <linux/random.h> +#include <linux/swap.h> + +#include <uapi/linux/sysinfo.h> + #include "tests/xe_kunit_helpers.h" #include "tests/xe_pci_test.h" #include "tests/xe_test.h" @@ -358,9 +364,242 @@ static void xe_bo_evict_kunit(struct kunit *test) evict_test_run_device(xe); } +struct xe_bo_link { + struct list_head link; + struct xe_bo *bo; + u32 val; +}; + +#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M) + +static int shrink_test_fill_random(struct xe_bo *bo, struct rnd_state *state, + struct xe_bo_link *link) +{ + struct iosys_map map; + int ret = ttm_bo_vmap(&bo->ttm, &map); + size_t __maybe_unused i; + + if (ret) + return ret; + + for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) { + u32 val = prandom_u32_state(state); + + iosys_map_wr(&map, i, u32, val); + if (i == 0) + link->val = val; + } + + ttm_bo_vunmap(&bo->ttm, &map); + return 0; +} + +static bool shrink_test_verify(struct kunit *test, struct xe_bo *bo, + unsigned int bo_nr, struct rnd_state *state, + struct xe_bo_link *link) +{ + struct iosys_map map; + int ret = ttm_bo_vmap(&bo->ttm, &map); + size_t i; + bool failed = false; + + if (ret) { + KUNIT_FAIL(test, "Error mapping bo %u for content check.\n", bo_nr); + return true; + } + + for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) { + u32 val = prandom_u32_state(state); + + if (iosys_map_rd(&map, i, u32) != val) { + KUNIT_FAIL(test, "Content not preserved, bo %u offset 0x%016llx", + bo_nr, (unsigned long long)i); + kunit_info(test, "Failed value is 0x%08x, recorded 0x%08x\n", + (unsigned int)iosys_map_rd(&map, i, u32), val); + if (i == 0 && val != link->val) + kunit_info(test, "Looks like PRNG is out of sync.\n"); + failed = true; + break; + } + } + + ttm_bo_vunmap(&bo->ttm, &map); + + return failed; +} + +/* + * Try to create system bos corresponding to twice the amount + * of available system memory to test shrinker functionality. + * If no swap space is available to accommodate the + * memory overcommit, mark bos purgeable. + */ +static int shrink_test_run_device(struct xe_device *xe) +{ + struct kunit *test = kunit_get_current_test(); + LIST_HEAD(bos); + struct xe_bo_link *link, *next; + struct sysinfo si; + size_t ram, ram_and_swap, purgeable, alloced, to_alloc, limit; + unsigned int interrupted = 0, successful = 0, count = 0; + struct rnd_state prng; + u64 rand_seed; + bool failed = false; + + rand_seed = get_random_u64(); + prandom_seed_state(&prng, rand_seed); + kunit_info(test, "Random seed is 0x%016llx.\n", + (unsigned long long)rand_seed); + + /* Skip if execution time is expected to be too long. */ + + limit = SZ_32G; + /* IGFX with flat CCS needs to copy when swapping / shrinking */ + if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe)) + limit = SZ_16G; + + si_meminfo(&si); + ram = (size_t)si.freeram * si.mem_unit; + if (ram > limit) { + kunit_skip(test, "Too long expected execution time.\n"); + return 0; + } + to_alloc = ram * 2; + + ram_and_swap = ram + get_nr_swap_pages() * PAGE_SIZE; + if (to_alloc > ram_and_swap) + purgeable = to_alloc - ram_and_swap; + purgeable += purgeable / 5; + + kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n", + (unsigned long)ram); + for (alloced = 0; alloced < to_alloc; alloced += XE_BO_SHRINK_SIZE) { + struct xe_bo *bo; + unsigned int mem_type; + struct xe_ttm_tt *xe_tt; + + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) { + KUNIT_FAIL(test, "Unexpected link allocation failure\n"); + failed = true; + break; + } + + INIT_LIST_HEAD(&link->link); + + /* We can create bos using WC caching here. But it is slower. */ + bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE, + DRM_XE_GEM_CPU_CACHING_WB, + XE_BO_FLAG_SYSTEM); + if (IS_ERR(bo)) { + if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) && + bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS)) + KUNIT_FAIL(test, "Error creating bo: %pe\n", bo); + kfree(link); + failed = true; + break; + } + xe_bo_lock(bo, false); + xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm); + + /* + * Allocate purgeable bos first, because if we do it the + * other way around, they may not be subject to swapping... + */ + if (alloced < purgeable) { + xe_tt->purgeable = true; + bo->ttm.priority = 0; + } else { + int ret = shrink_test_fill_random(bo, &prng, link); + + if (ret) { + xe_bo_unlock(bo); + xe_bo_put(bo); + KUNIT_FAIL(test, "Error filling bo with random data: %pe\n", + ERR_PTR(ret)); + kfree(link); + failed = true; + break; + } + } + + mem_type = bo->ttm.resource->mem_type; + xe_bo_unlock(bo); + link->bo = bo; + list_add_tail(&link->link, &bos); + + if (mem_type != XE_PL_TT) { + KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n", + bo->ttm.resource->mem_type); + failed = true; + } + cond_resched(); + if (signal_pending(current)) + break; + } + + /* + * Read back and destroy bos. Reset the pseudo-random seed to get an + * identical pseudo-random number sequence for readback. + */ + prandom_seed_state(&prng, rand_seed); + list_for_each_entry_safe(link, next, &bos, link) { + static struct ttm_operation_ctx ctx = {.interruptible = true}; + struct xe_bo *bo = link->bo; + struct xe_ttm_tt *xe_tt; + int ret; + + count++; + if (!signal_pending(current) && !failed) { + bool purgeable, intr = false; + + xe_bo_lock(bo, NULL); + + /* xe_tt->purgeable is cleared on validate. */ + xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm); + purgeable = xe_tt->purgeable; + do { + ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx); + if (ret == -EINTR) + intr = true; + } while (ret == -EINTR && !signal_pending(current)); + + if (!ret && !purgeable) + failed = shrink_test_verify(test, bo, count, &prng, link); + + xe_bo_unlock(bo); + if (ret) { + KUNIT_FAIL(test, "Validation failed: %pe\n", + ERR_PTR(ret)); + failed = true; + } else if (intr) { + interrupted++; + } else { + successful++; + } + } + xe_bo_put(link->bo); + list_del(&link->link); + kfree(link); + } + kunit_info(test, "Readbacks interrupted: %u successful: %u\n", + interrupted, successful); + + return 0; +} + +static void xe_bo_shrink_kunit(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + shrink_test_run_device(xe); +} + static struct kunit_case xe_bo_tests[] = { KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), + KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, + {.speed = KUNIT_SPEED_SLOW}), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 79be73b4a02b..ea932c051cc7 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -55,7 +55,7 @@ static void read_l3cc_table(struct xe_gt *gt, if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1)); else - reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1)); + reg_val = xe_mmio_read32(>->mmio, XELP_LNCFCMOCS(i >> 1)); mocs_dbg(gt, "reg_val=0x%x\n", reg_val); } else { @@ -94,7 +94,7 @@ static void read_mocs_table(struct xe_gt *gt, if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); else - reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + reg_val = xe_mmio_read32(>->mmio, XELP_GLOBAL_MOCS(i)); mocs_expected = get_entry_control(info, i); mocs = reg_val; diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index e22bbf57fca7..04d6b95c6d87 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -10,7 +10,7 @@ #include <drm/drm_print.h> -#include "xe_device_types.h" +#include "xe_gt_types.h" #include "xe_step.h" /** diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index f379df3a12bf..5e8f60a8d431 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -283,6 +283,8 @@ struct xe_ttm_tt { struct device *dev; struct sg_table sgt; struct sg_table *sg; + /** @purgeable: Whether the content of the pages of @ttm is purgeable. */ + bool purgeable; }; static int xe_tt_map_sg(struct ttm_tt *tt) @@ -468,7 +470,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.offset += vram->io_start; mem->bus.is_iomem = true; -#if !defined(CONFIG_X86) +#if !IS_ENABLED(CONFIG_X86) mem->bus.caching = ttm_write_combined; #endif return 0; @@ -680,8 +682,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, tt_has_data = ttm && (ttm_tt_is_populated(ttm) || (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : - (!mem_type_is_vram(old_mem_type) && !tt_has_data); + move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) : + (!mem_type_is_vram(old_mem_type) && !tt_has_data)); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); @@ -761,7 +763,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (xe_rpm_reclaim_safe(xe)) { /* * We might be called through swapout in the validation path of - * another TTM device, so unconditionally acquire rpm here. + * another TTM device, so acquire rpm here. */ xe_pm_runtime_get(xe); } else { @@ -1082,6 +1084,33 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) } } +static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx) +{ + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + + if (ttm_bo->ttm) { + struct ttm_placement place = {}; + int ret = ttm_bo_validate(ttm_bo, &place, ctx); + + drm_WARN_ON(&xe->drm, ret); + } +} + +static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false + }; + + if (ttm_bo->ttm) { + struct xe_ttm_tt *xe_tt = + container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm); + + if (xe_tt->purgeable) + xe_ttm_bo_purge(ttm_bo, &ctx); + } +} + const struct ttm_device_funcs xe_ttm_funcs = { .ttm_tt_create = xe_ttm_tt_create, .ttm_tt_populate = xe_ttm_tt_populate, @@ -1094,6 +1123,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .release_notify = xe_ttm_bo_release_notify, .eviction_valuable = ttm_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, + .swap_notify = xe_ttm_bo_swap_notify, }; static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 70d4e4d46c3c..1940cd3229f4 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -6,6 +6,7 @@ #include "xe_device.h" #include <linux/delay.h> +#include <linux/fault-inject.h> #include <linux/units.h> #include <drm/drm_aperture.h> @@ -171,10 +172,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); - mutex_unlock(&xef->vm.lock); xe_file_put(xef); @@ -298,6 +297,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->unordered_wq) destroy_workqueue(xe->unordered_wq); + if (xe->destroy_wq) + destroy_workqueue(xe->destroy_wq); + ttm_device_fini(&xe->ttm); } @@ -336,9 +338,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, init_waitqueue_head(&xe->ufence_wq); - err = drmm_mutex_init(&xe->drm, &xe->usm.lock); - if (err) - goto err; + init_rwsem(&xe->usm.lock); xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); @@ -363,8 +363,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); + xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); if (!xe->ordered_wq || !xe->unordered_wq || - !xe->preempt_fence_wq) { + !xe->preempt_fence_wq || !xe->destroy_wq) { /* * Cleanup done in xe_device_destroy via * drmm_add_action_or_reset register above @@ -383,6 +384,12 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, err: return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */ + +static bool xe_driver_flr_disabled(struct xe_device *xe) +{ + return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS; +} /* * The driver-initiated FLR is the highest level of reset that we can trigger @@ -397,17 +404,12 @@ err: * if/when a new instance of i915 is bound to the device it will do a full * re-init anyway. */ -static void xe_driver_flr(struct xe_device *xe) +static void __xe_driver_flr(struct xe_device *xe) { const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); int ret; - if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) { - drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); - return; - } - drm_dbg(&xe->drm, "Triggering Driver-FLR\n"); /* @@ -419,25 +421,25 @@ static void xe_driver_flr(struct xe_device *xe) * is still pending (unless the HW is totally dead), but better to be * safe in case something unexpected happens */ - ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); + ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); if (ret) { drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret); return; } - xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); + xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS); /* Trigger the actual Driver-FLR */ - xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR); + xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR); /* Wait for hardware teardown to complete */ - ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); + ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); if (ret) { drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret); return; } /* Wait for hardware/firmware re-init to complete */ - ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS, + ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS, flr_timeout, NULL, false); if (ret) { drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret); @@ -445,7 +447,17 @@ static void xe_driver_flr(struct xe_device *xe) } /* Clear sticky completion status */ - xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); + xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS); +} + +static void xe_driver_flr(struct xe_device *xe) +{ + if (xe_driver_flr_disabled(xe)) { + drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); + return; + } + + __xe_driver_flr(xe); } static void xe_driver_flr_fini(void *arg) @@ -488,16 +500,15 @@ mask_err: return err; } -static bool verify_lmem_ready(struct xe_gt *gt) +static bool verify_lmem_ready(struct xe_device *xe) { - u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT; + u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT; return !!val; } static int wait_for_lmem_ready(struct xe_device *xe) { - struct xe_gt *gt = xe_root_mmio_gt(xe); unsigned long timeout, start; if (!IS_DGFX(xe)) @@ -506,7 +517,7 @@ static int wait_for_lmem_ready(struct xe_device *xe) if (IS_SRIOV_VF(xe)) return 0; - if (verify_lmem_ready(gt)) + if (verify_lmem_ready(xe)) return 0; drm_dbg(&xe->drm, "Waiting for lmem initialization\n"); @@ -535,13 +546,14 @@ static int wait_for_lmem_ready(struct xe_device *xe) msleep(20); - } while (!verify_lmem_ready(gt)); + } while (!verify_lmem_ready(xe)); drm_dbg(&xe->drm, "lmem ready after %ums", jiffies_to_msecs(jiffies - start)); return 0; } +ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */ static void update_device_info(struct xe_device *xe) { @@ -589,15 +601,17 @@ int xe_device_probe_early(struct xe_device *xe) return 0; } -static int xe_device_set_has_flat_ccs(struct xe_device *xe) +static int probe_has_flat_ccs(struct xe_device *xe) { + struct xe_gt *gt; u32 reg; int err; + /* Always enabled/disabled, no runtime check to do */ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) return 0; - struct xe_gt *gt = xe_root_mmio_gt(xe); + gt = xe_root_mmio_gt(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) @@ -646,6 +660,13 @@ int xe_device_probe(struct xe_device *xe) err = xe_gt_init_early(gt); if (err) return err; + + /* + * Only after this point can GT-specific MMIO operations + * (including things like communication with the GuC) + * be performed. + */ + xe_gt_mmio_init(gt); } for_each_tile(tile, xe, id) { @@ -661,11 +682,9 @@ int xe_device_probe(struct xe_device *xe) err = xe_ggtt_init_early(tile->mem.ggtt); if (err) return err; - if (IS_SRIOV_VF(xe)) { - err = xe_memirq_init(&tile->sriov.vf.memirq); - if (err) - return err; - } + err = xe_memirq_init(&tile->memirq); + if (err) + return err; } for_each_gt(gt, xe, id) { @@ -689,7 +708,7 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err; - err = xe_device_set_has_flat_ccs(xe); + err = probe_has_flat_ccs(xe); if (err) goto err; @@ -799,6 +818,24 @@ void xe_device_remove(struct xe_device *xe) void xe_device_shutdown(struct xe_device *xe) { + struct xe_gt *gt; + u8 id; + + drm_dbg(&xe->drm, "Shutting down device\n"); + + if (xe_driver_flr_disabled(xe)) { + xe_display_pm_shutdown(xe); + + xe_irq_suspend(xe); + + for_each_gt(gt, xe, id) + xe_gt_shutdown(gt); + + xe_display_pm_shutdown_late(xe); + } else { + /* BOOM! */ + __xe_driver_flr(xe); + } } /** @@ -812,11 +849,9 @@ void xe_device_shutdown(struct xe_device *xe) */ void xe_device_wmb(struct xe_device *xe) { - struct xe_gt *gt = xe_root_mmio_gt(xe); - wmb(); if (IS_DGFX(xe)) - xe_mmio_write32(gt, VF_CAP_REG, 0); + xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } /** @@ -857,7 +892,7 @@ void xe_device_td_flush(struct xe_device *xe) if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)) return; - xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -865,7 +900,7 @@ void xe_device_td_flush(struct xe_device *xe) * scenario on current platforms if all cache entries are * transient and need to be flushed.. */ - if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0, + if (xe_mmio_wait32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0, 150, NULL, false)) xe_gt_err_once(gt, "TD flush timeout\n"); @@ -888,9 +923,9 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1); + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); - if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true)) + if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); spin_unlock(>->global_invl_lock); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 894f04770454..4c3f0ebe78a9 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -9,6 +9,8 @@ #include <drm/drm_util.h> #include "xe_device_types.h" +#include "xe_gt_types.h" +#include "xe_sriov.h" static inline struct xe_device *to_xe_device(const struct drm_device *dev) { @@ -138,7 +140,7 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) { - return >->mmio.fw; + return >->pm.fw; } void xe_device_assert_mem_access(struct xe_device *xe); @@ -153,11 +155,22 @@ static inline bool xe_device_has_sriov(struct xe_device *xe) return xe->info.has_sriov; } +static inline bool xe_device_has_msix(struct xe_device *xe) +{ + /* TODO: change this when MSI-X support is fully integrated */ + return false; +} + static inline bool xe_device_has_memirq(struct xe_device *xe) { return GRAPHICS_VERx100(xe) >= 1250; } +static inline bool xe_device_uses_memirq(struct xe_device *xe) +{ + return xe_device_has_memirq(xe) && (IS_SRIOV_VF(xe) || xe_device_has_msix(xe)); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index ec7eb7811126..85bede4dd646 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -14,7 +14,6 @@ #include "xe_devcoredump_types.h" #include "xe_heci_gsc.h" -#include "xe_gt_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" #include "xe_oa.h" @@ -108,6 +107,45 @@ struct xe_mem_region { }; /** + * struct xe_mmio - register mmio structure + * + * Represents an MMIO region that the CPU may use to access registers. A + * region may share its IO map with other regions (e.g., all GTs within a + * tile share the same map with their parent tile, but represent different + * subregions of the overall IO space). + */ +struct xe_mmio { + /** @tile: Backpointer to tile, used for tracing */ + struct xe_tile *tile; + + /** @regs: Map used to access registers. */ + void __iomem *regs; + + /** + * @sriov_vf_gt: Backpointer to GT. + * + * This pointer is only set for GT MMIO regions and only when running + * as an SRIOV VF structure + */ + struct xe_gt *sriov_vf_gt; + + /** + * @regs_size: Length of the register region within the map. + * + * The size of the iomap set in *regs is generally larger than the + * register mmio space since it includes unused regions and/or + * non-register regions such as the GGTT PTEs. + */ + size_t regs_size; + + /** @adj_limit: adjust MMIO address if address is below this value */ + u32 adj_limit; + + /** @adj_offset: offset to add to MMIO address when adjusting */ + u32 adj_offset; +}; + +/** * struct xe_tile - hardware tile structure * * From a driver perspective, a "tile" is effectively a complete GPU, containing @@ -148,26 +186,14 @@ struct xe_tile { * * 4MB-8MB: reserved * * 8MB-16MB: global GTT */ - struct { - /** @mmio.size: size of tile's MMIO space */ - size_t size; - - /** @mmio.regs: pointer to tile's MMIO space (starting with registers) */ - void __iomem *regs; - } mmio; + struct xe_mmio mmio; /** * @mmio_ext: MMIO-extension info for a tile. * * Each tile has its own additional 256MB (28-bit) MMIO-extension space. */ - struct { - /** @mmio_ext.size: size of tile's additional MMIO-extension space */ - size_t size; - - /** @mmio_ext.regs: pointer to tile's additional MMIO-extension space */ - void __iomem *regs; - } mmio_ext; + struct xe_mmio mmio_ext; /** @mem: memory management info for tile */ struct { @@ -200,14 +226,14 @@ struct xe_tile { struct xe_lmtt lmtt; } pf; struct { - /** @sriov.vf.memirq: Memory Based Interrupts. */ - struct xe_memirq memirq; - /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ struct xe_ggtt_node *ggtt_balloon[2]; } vf; } sriov; + /** @memirq: Memory Based Interrupts. */ + struct xe_memirq memirq; + /** @pcode: tile's PCODE */ struct { /** @pcode.lock: protecting tile's PCODE mailbox data */ @@ -369,7 +395,7 @@ struct xe_device { /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; /** @usm.lock: protects UM state */ - struct mutex lock; + struct rw_semaphore lock; } usm; /** @pinned: pinned BO state */ @@ -396,6 +422,9 @@ struct xe_device { /** @unordered_wq: used to serialize unordered work, mostly display */ struct workqueue_struct *unordered_wq; + /** @destroy_wq: used to serialize user destroy work, like queue */ + struct workqueue_struct *destroy_wq; + /** @tiles: device tiles */ struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; @@ -567,15 +596,23 @@ struct xe_file { struct { /** @vm.xe: xarray to store VMs */ struct xarray xa; - /** @vm.lock: protects file VM state */ + /** + * @vm.lock: Protects VM lookup + reference and removal a from + * file xarray. Not an intended to be an outer lock which does + * thing while being held. + */ struct mutex lock; } vm; /** @exec_queue: Submission exec queue state for file */ struct { - /** @exec_queue.xe: xarray to store engines */ + /** @exec_queue.xa: xarray to store exece queues */ struct xarray xa; - /** @exec_queue.lock: protects file engine state */ + /** + * @exec_queue.lock: Protects exec queue lookup + reference and + * removal a frommfile xarray. Not an intended to be an outer + * lock which does thing while being held. + */ struct mutex lock; } exec_queue; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index c4add8b38bbd..fb52a23e28f8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -283,8 +283,15 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) + xa_for_each(&xef->exec_queue.xa, i, q) { + xe_exec_queue_get(q); + mutex_unlock(&xef->exec_queue.lock); + xe_exec_queue_update_run_ticks(q); + + mutex_lock(&xef->exec_queue.lock); + xe_exec_queue_put(q); + } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7f28b7fc68d5..d098d2dd1b2d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -635,14 +635,14 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } } - mutex_lock(&xef->exec_queue.lock); + q->xef = xe_file_get(xef); + + /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->exec_queue.lock); if (err) goto kill_exec_queue; args->exec_queue_id = id; - q->xef = xe_file_get(xef); return 0; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 6a59165b9569..f3b71fe7a96d 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -44,6 +44,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, u32 ctx_id) { struct xe_gt *gt = hwe->gt; + struct xe_mmio *mmio = >->mmio; struct xe_device *xe = gt_to_xe(gt); u64 lrc_desc; @@ -58,7 +59,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, } if (hwe->class == XE_ENGINE_CLASS_COMPUTE) - xe_mmio_write32(hwe->gt, RCU_MODE, + xe_mmio_write32(mmio, RCU_MODE, _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); @@ -76,17 +77,17 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, */ wmb(); - xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base), + xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), xe_bo_ggtt_addr(hwe->hwsp)); - xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base)); - xe_mmio_write32(gt, RING_MODE(hwe->mmio_base), + xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); + xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); - xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), + xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), lower_32_bits(lrc_desc)); - xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), + xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), upper_32_bits(lrc_desc)); - xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base), + xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base), EL_CTRL_LOAD); } @@ -168,8 +169,8 @@ static u64 read_execlist_status(struct xe_hw_engine *hwe) struct xe_gt *gt = hwe->gt; u32 hi, lo; - lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); - hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); + lo = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); + hi = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); return lo | (u64)hi << 32; } diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index b263fff15273..a64c14757c84 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -100,7 +100,7 @@ static void __domain_ctl(struct xe_gt *gt, struct xe_force_wake_domain *domain, if (IS_SRIOV_VF(gt_to_xe(gt))) return; - xe_mmio_write32(gt, domain->reg_ctl, domain->mask | (wake ? domain->val : 0)); + xe_mmio_write32(>->mmio, domain->reg_ctl, domain->mask | (wake ? domain->val : 0)); } static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake) @@ -111,7 +111,7 @@ static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, if (IS_SRIOV_VF(gt_to_xe(gt))) return 0; - ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, wake ? domain->val : 0, + ret = xe_mmio_wait32(>->mmio, domain->reg_ack, domain->val, wake ? domain->val : 0, XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, &value, true); if (ret) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 2895f154654c..47bfd9d2635d 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -5,6 +5,7 @@ #include "xe_ggtt.h" +#include <linux/fault-inject.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/sizes.h> @@ -107,8 +108,10 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) static void ggtt_update_access_counter(struct xe_ggtt *ggtt) { - struct xe_gt *gt = XE_WA(ggtt->tile->primary_gt, 22019338487) ? ggtt->tile->primary_gt : - ggtt->tile->media_gt; + struct xe_tile *tile = ggtt->tile; + struct xe_gt *affected_gt = XE_WA(tile->primary_gt, 22019338487) ? + tile->primary_gt : tile->media_gt; + struct xe_mmio *mmio = &affected_gt->mmio; u32 max_gtt_writes = XE_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63; /* * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit @@ -118,7 +121,7 @@ static void ggtt_update_access_counter(struct xe_ggtt *ggtt) lockdep_assert_held(&ggtt->lock); if ((++ggtt->access_count % max_gtt_writes) == 0) { - xe_mmio_write32(gt, GMD_ID, 0x0); + xe_mmio_write32(mmio, GMD_ID, 0x0); ggtt->access_count = 0; } } @@ -262,6 +265,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) return 0; } +ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt); diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index c518d1d16d82..50361b4638f9 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -90,6 +90,11 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) cancel_work_sync(&sched->work_process_msg); } +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched) +{ + drm_sched_resume_timeout(&sched->base, sched->base.timeout); +} + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index cee9c6809fc0..5ad5629a6c60 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -22,6 +22,8 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched); void xe_sched_submission_start(struct xe_gpu_scheduler *sched); void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched); + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 6fbea70d3d36..783b09bf3681 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -34,6 +34,7 @@ #include "instructions/xe_gsc_commands.h" #include "regs/xe_gsc_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_irq_regs.h" static struct xe_gt * gsc_to_gt(struct xe_gsc *gsc) @@ -179,7 +180,7 @@ out_bo: static int gsc_fw_is_loaded(struct xe_gt *gt) { - return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & + return xe_mmio_read32(>->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & HECI1_FWSTS1_INIT_COMPLETE; } @@ -190,7 +191,7 @@ static int gsc_fw_wait(struct xe_gt *gt) * executed by the GSCCS. To account for possible submission delays or * other issues, we use a 500ms timeout in the wait here. */ - return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + return xe_mmio_wait32(>->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE), HECI1_FWSTS1_INIT_COMPLETE, HECI1_FWSTS1_INIT_COMPLETE, 500 * USEC_PER_MSEC, NULL, false); @@ -330,7 +331,7 @@ static int gsc_er_complete(struct xe_gt *gt) * so in that scenario we're always guaranteed to find the correct * value. */ - er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; + er_status = xe_mmio_read32(>->mmio, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { /* @@ -581,11 +582,11 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) return; - xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); + xe_mmio_rmw32(>->mmio, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); if (prep) { /* make sure the reset bit is clear when writing the CSR reg */ - xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), + xe_mmio_rmw32(>->mmio, HECI_H_CSR(MTL_GSC_HECI2_BASE), HECI_H_CSR_RST, HECI_H_CSR_IG); msleep(200); } @@ -599,6 +600,7 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) { struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_mmio *mmio = >->mmio; int err; xe_uc_fw_print(&gsc->fw, p); @@ -613,12 +615,12 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) return; drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", - xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)), - xe_mmio_read32(gt, HECI_FWSTS2(MTL_GSC_HECI1_BASE)), - xe_mmio_read32(gt, HECI_FWSTS3(MTL_GSC_HECI1_BASE)), - xe_mmio_read32(gt, HECI_FWSTS4(MTL_GSC_HECI1_BASE)), - xe_mmio_read32(gt, HECI_FWSTS5(MTL_GSC_HECI1_BASE)), - xe_mmio_read32(gt, HECI_FWSTS6(MTL_GSC_HECI1_BASE))); + xe_mmio_read32(mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(mmio, HECI_FWSTS2(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(mmio, HECI_FWSTS3(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(mmio, HECI_FWSTS4(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(mmio, HECI_FWSTS5(MTL_GSC_HECI1_BASE)), + xe_mmio_read32(mmio, HECI_FWSTS6(MTL_GSC_HECI1_BASE))); xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); } diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 2d6ea8c01445..6d89c22ae811 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -65,7 +65,7 @@ gsc_to_gt(struct xe_gsc *gsc) bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); - u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)); + u32 fwsts1 = xe_mmio_read32(>->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE)); return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fwsts1) == HECI1_FWSTS1_PROXY_STATE_NORMAL; @@ -78,7 +78,7 @@ static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set) /* make sure we never accidentally write the RST bit */ clr |= HECI_H_CSR_RST; - xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set); + xe_mmio_rmw32(>->mmio, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set); } static void gsc_proxy_irq_clear(struct xe_gsc *gsc) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index f0dc2bf24c7b..2d5e78311b76 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -108,7 +108,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) return; if (!xe_gt_is_media_type(gt)) { - xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); + xe_mmio_write32(>->mmio, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); @@ -245,7 +245,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) else if (entry->clr_bits + 1) val = (reg.mcr ? xe_gt_mcr_unicast_read_any(gt, reg_mcr) : - xe_mmio_read32(gt, reg)) & (~entry->clr_bits); + xe_mmio_read32(>->mmio, reg)) & (~entry->clr_bits); else val = 0; @@ -440,7 +440,7 @@ static int gt_fw_domain_init(struct xe_gt *gt) * Stash hardware-reported version. Since this register does not exist * on pre-MTL platforms, reading it there will (correctly) return 0. */ - gt->info.gmdid = xe_mmio_read32(gt, GMD_ID); + gt->info.gmdid = xe_mmio_read32(>->mmio, GMD_ID); err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); XE_WARN_ON(err); @@ -623,6 +623,30 @@ int xe_gt_init(struct xe_gt *gt) return 0; } +/** + * xe_gt_mmio_init() - Initialize GT's MMIO access + * @gt: the GT object + * + * Initialize GT's MMIO accessor, which will be used to access registers inside + * this GT. + */ +void xe_gt_mmio_init(struct xe_gt *gt) +{ + struct xe_tile *tile = gt_to_tile(gt); + + gt->mmio.regs = tile->mmio.regs; + gt->mmio.regs_size = tile->mmio.regs_size; + gt->mmio.tile = tile; + + if (gt->info.type == XE_GT_TYPE_MEDIA) { + gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; + gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; + } + + if (IS_SRIOV_VF(gt_to_xe(gt))) + gt->mmio.sriov_vf_gt = gt; +} + void xe_gt_record_user_engines(struct xe_gt *gt) { struct xe_hw_engine *hwe; @@ -650,8 +674,8 @@ static int do_gt_reset(struct xe_gt *gt) xe_gsc_wa_14015076503(gt, true); - xe_mmio_write32(gt, GDRST, GRDOM_FULL); - err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false); + xe_mmio_write32(>->mmio, GDRST, GRDOM_FULL); + err = xe_mmio_wait32(>->mmio, GDRST, GRDOM_FULL, 0, 5000, NULL, false); if (err) xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n", ERR_PTR(err)); @@ -862,6 +886,13 @@ err_msg: return err; } +void xe_gt_shutdown(struct xe_gt *gt) +{ + xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + do_gt_reset(gt); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); +} + /** * xe_gt_sanitize_freq() - Restore saved frequencies if necessary. * @gt: the GT object @@ -874,7 +905,9 @@ int xe_gt_sanitize_freq(struct xe_gt *gt) int ret = 0; if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || - xe_uc_fw_is_loaded(>->uc.gsc.fw)) && XE_WA(gt, 22019338487)) + xe_uc_fw_is_loaded(>->uc.gsc.fw) || + xe_uc_fw_is_in_error_state(>->uc.gsc.fw)) && + XE_WA(gt, 22019338487)) ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); return ret; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index ee138e9768a2..82b9b7f82fca 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -31,6 +31,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile); int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); +void xe_gt_mmio_init(struct xe_gt *gt); void xe_gt_declare_wedged(struct xe_gt *gt); int xe_gt_record_default_lrcs(struct xe_gt *gt); @@ -48,6 +49,7 @@ void xe_gt_record_user_engines(struct xe_gt *gt); void xe_gt_suspend_prepare(struct xe_gt *gt); int xe_gt_suspend(struct xe_gt *gt); +void xe_gt_shutdown(struct xe_gt *gt); int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); void xe_gt_sanitize(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index d2e4dc3aaf61..9360ac4de489 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -68,7 +68,7 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) } } - xe_mmio_write32(gt, CCS_MODE, mode); + xe_mmio_write32(>->mmio, CCS_MODE, mode); xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", mode, config, num_engines, num_slices); diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 86c2d62b4bdc..cc2ae159298e 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -17,7 +17,7 @@ static u32 read_reference_ts_freq(struct xe_gt *gt) { - u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE); + u32 ts_override = xe_mmio_read32(>->mmio, TIMESTAMP_OVERRIDE); u32 base_freq, frac_freq; base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK, @@ -57,7 +57,7 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg) int xe_gt_clock_init(struct xe_gt *gt) { - u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE); + u32 ctc_reg = xe_mmio_read32(>->mmio, CTC_MODE); u32 freq = 0; /* Assuming gen11+ so assert this assumption is correct */ @@ -66,7 +66,7 @@ int xe_gt_clock_init(struct xe_gt *gt) if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) { freq = read_reference_ts_freq(gt); } else { - u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0); + u32 c0 = xe_mmio_read32(>->mmio, RPM_CONFIG0); freq = get_crystal_clock_freq(c0); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 8f95d3a5949b..cbc43973ff7e 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -15,6 +15,7 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_mcr.h" +#include "xe_gt_idle.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_vf_debugfs.h" #include "xe_gt_stats.h" @@ -109,6 +110,17 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int powergate_info(struct xe_gt *gt, struct drm_printer *p) +{ + int ret; + + xe_pm_runtime_get(gt_to_xe(gt)); + ret = xe_gt_idle_pg_print(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return ret; +} + static int force_reset(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); @@ -288,6 +300,7 @@ static const struct drm_info_list debugfs_list[] = { {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, + {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 68a5778b4319..6bd39b2c5003 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -11,9 +11,9 @@ #include <drm/drm_managed.h> #include <drm/drm_print.h> -#include "xe_device_types.h" #include "xe_gt_sysfs.h" #include "xe_gt_throttle.h" +#include "xe_gt_types.h" #include "xe_guc_pc.h" #include "xe_pm.h" @@ -237,11 +237,11 @@ int xe_gt_freq_init(struct xe_gt *gt) if (!gt->freq) return -ENOMEM; - err = devm_add_action(xe->drm.dev, freq_fini, gt->freq); + err = sysfs_create_files(gt->freq, freq_attrs); if (err) return err; - err = sysfs_create_files(gt->freq, freq_attrs); + err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 67aba4140510..746812aee8ff 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -98,7 +98,9 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency) void xe_gt_idle_enable_pg(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - u32 pg_enable; + struct xe_gt_idle *gtidle = >->gtidle; + struct xe_mmio *mmio = >->mmio; + u32 vcs_mask, vecs_mask; int i, j; if (IS_SRIOV_VF(xe)) @@ -110,12 +112,19 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) xe_device_assert_mem_access(gt_to_xe(gt)); - pg_enable = RENDER_POWERGATE_ENABLE | MEDIA_POWERGATE_ENABLE; + vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE); + vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); + + if (vcs_mask || vecs_mask) + gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE; + + if (!xe_gt_is_media_type(gt)) + gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if ((gt->info.engine_mask & BIT(i))) - pg_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | - VDN_MFXVDENC_POWERGATE_ENABLE(j)); + gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | + VDN_MFXVDENC_POWERGATE_ENABLE(j)); } XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); @@ -124,25 +133,113 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) * GuC sets the hysteresis value when GuC PC is enabled * else set it to 25 (25 * 1.28us) */ - xe_mmio_write32(gt, MEDIA_POWERGATE_IDLE_HYSTERESIS, 25); - xe_mmio_write32(gt, RENDER_POWERGATE_IDLE_HYSTERESIS, 25); + xe_mmio_write32(mmio, MEDIA_POWERGATE_IDLE_HYSTERESIS, 25); + xe_mmio_write32(mmio, RENDER_POWERGATE_IDLE_HYSTERESIS, 25); } - xe_mmio_write32(gt, POWERGATE_ENABLE, pg_enable); + xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); } void xe_gt_idle_disable_pg(struct xe_gt *gt) { + struct xe_gt_idle *gtidle = >->gtidle; + if (IS_SRIOV_VF(gt_to_xe(gt))) return; xe_device_assert_mem_access(gt_to_xe(gt)); + gtidle->powergate_enable = 0; + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + xe_mmio_write32(>->mmio, POWERGATE_ENABLE, gtidle->powergate_enable); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); +} - xe_mmio_write32(gt, POWERGATE_ENABLE, 0); +/** + * xe_gt_idle_pg_print - Xe powergating info + * @gt: GT object + * @p: drm_printer. + * + * This function prints the powergating information + * + * Return: 0 on success, negative error code otherwise + */ +int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_gt_idle *gtidle = >->gtidle; + struct xe_device *xe = gt_to_xe(gt); + enum xe_gt_idle_state state; + u32 pg_enabled, pg_status = 0; + u32 vcs_mask, vecs_mask; + int err, n; + /* + * Media Slices + * + * Slice 0: VCS0, VCS1, VECS0 + * Slice 1: VCS2, VCS3, VECS1 + * Slice 2: VCS4, VCS5, VECS2 + * Slice 3: VCS6, VCS7, VECS3 + */ + static const struct { + u64 engines; + u32 status_bit; + } media_slices[] = { + {(BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | + BIT(XE_HW_ENGINE_VECS0)), MEDIA_SLICE0_AWAKE_STATUS}, - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); + {(BIT(XE_HW_ENGINE_VCS2) | BIT(XE_HW_ENGINE_VCS3) | + BIT(XE_HW_ENGINE_VECS1)), MEDIA_SLICE1_AWAKE_STATUS}, + + {(BIT(XE_HW_ENGINE_VCS4) | BIT(XE_HW_ENGINE_VCS5) | + BIT(XE_HW_ENGINE_VECS2)), MEDIA_SLICE2_AWAKE_STATUS}, + + {(BIT(XE_HW_ENGINE_VCS6) | BIT(XE_HW_ENGINE_VCS7) | + BIT(XE_HW_ENGINE_VECS3)), MEDIA_SLICE3_AWAKE_STATUS}, + }; + + if (xe->info.platform == XE_PVC) { + drm_printf(p, "Power Gating not supported\n"); + return 0; + } + + state = gtidle->idle_status(gtidle_to_pc(gtidle)); + pg_enabled = gtidle->powergate_enable; + + /* Do not wake the GT to read powergating status */ + if (state != GT_IDLE_C6) { + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + pg_enabled = xe_mmio_read32(>->mmio, POWERGATE_ENABLE); + pg_status = xe_mmio_read32(>->mmio, POWERGATE_DOMAIN_STATUS); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); + } + + if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) { + drm_printf(p, "Render Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & RENDER_POWERGATE_ENABLE)); + + drm_printf(p, "Render Power Gate Status: %s\n", + str_up_down(pg_status & RENDER_AWAKE_STATUS)); + } + + vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE); + vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); + + /* Print media CPG status only if media is present */ + if (vcs_mask || vecs_mask) { + drm_printf(p, "Media Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & MEDIA_POWERGATE_ENABLE)); + + for (n = 0; n < ARRAY_SIZE(media_slices); n++) + if (gt->info.engine_mask & media_slices[n].engines) + drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n, + str_up_down(pg_status & media_slices[n].status_bit)); + } + return 0; } static ssize_t name_show(struct device *dev, @@ -260,9 +357,9 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt) return; /* Units of 1280 ns for a total of 5s */ - xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA); + xe_mmio_write32(>->mmio, RC_IDLE_HYSTERSIS, 0x3B9ACA); /* Enable RC6 */ - xe_mmio_write32(gt, RC_CONTROL, + xe_mmio_write32(>->mmio, RC_CONTROL, RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE); } @@ -274,6 +371,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt) if (IS_SRIOV_VF(gt_to_xe(gt))) return; - xe_mmio_write32(gt, RC_CONTROL, 0); - xe_mmio_write32(gt, RC_STATE, 0); + xe_mmio_write32(>->mmio, RC_CONTROL, 0); + xe_mmio_write32(>->mmio, RC_STATE, 0); } diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 554447b5d46d..4455a6501cb0 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -8,6 +8,7 @@ #include "xe_gt_idle_types.h" +struct drm_printer; struct xe_gt; int xe_gt_idle_init(struct xe_gt_idle *gtidle); @@ -15,5 +16,6 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt); void xe_gt_idle_disable_c6(struct xe_gt *gt); void xe_gt_idle_enable_pg(struct xe_gt *gt); void xe_gt_idle_disable_pg(struct xe_gt *gt); +int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p); #endif /* _XE_GT_IDLE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h index f99b447534f3..b8b297a3f884 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle_types.h +++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h @@ -23,6 +23,8 @@ enum xe_gt_idle_state { struct xe_gt_idle { /** @name: name */ char name[16]; + /** @powergate_enable: copy of powergate enable bits */ + u32 powergate_enable; /** @residency_multiplier: residency multiplier in ns */ u32 residency_multiplier; /** @cur_residency: raw driver copy of idle residency */ diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 7d7bd0be6233..4c0767403881 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -239,11 +239,13 @@ static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = { static void init_steering_l3bank(struct xe_gt *gt) { + struct xe_mmio *mmio = >->mmio; + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, - xe_mmio_read32(gt, MIRROR_FUSE3)); + xe_mmio_read32(mmio, MIRROR_FUSE3)); u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, - xe_mmio_read32(gt, XEHP_FUSE4)); + xe_mmio_read32(mmio, XEHP_FUSE4)); /* * Group selects mslice, instance selects bank within mslice. @@ -254,7 +256,7 @@ static void init_steering_l3bank(struct xe_gt *gt) bank_mask & BIT(0) ? 0 : 2; } else if (gt_to_xe(gt)->info.platform == XE_DG2) { u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, - xe_mmio_read32(gt, MIRROR_FUSE3)); + xe_mmio_read32(mmio, MIRROR_FUSE3)); u32 bank = __ffs(mslice_mask) * 8; /* @@ -266,7 +268,7 @@ static void init_steering_l3bank(struct xe_gt *gt) gt->steering[L3BANK].instance_target = bank & 0x3; } else { u32 fuse = REG_FIELD_GET(L3BANK_MASK, - ~xe_mmio_read32(gt, MIRROR_FUSE3)); + ~xe_mmio_read32(mmio, MIRROR_FUSE3)); gt->steering[L3BANK].group_target = 0; /* unused */ gt->steering[L3BANK].instance_target = __ffs(fuse); @@ -276,7 +278,7 @@ static void init_steering_l3bank(struct xe_gt *gt) static void init_steering_mslice(struct xe_gt *gt) { u32 mask = REG_FIELD_GET(MEML3_EN_MASK, - xe_mmio_read32(gt, MIRROR_FUSE3)); + xe_mmio_read32(>->mmio, MIRROR_FUSE3)); /* * mslice registers are valid (not terminated) if either the meml3 @@ -380,7 +382,7 @@ static void init_steering_oaddrm(struct xe_gt *gt) static void init_steering_sqidi_psmi(struct xe_gt *gt) { u32 mask = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, - xe_mmio_read32(gt, MIRROR_FUSE3)); + xe_mmio_read32(>->mmio, MIRROR_FUSE3)); u32 select = __ffs(mask); gt->steering[SQIDI_PSMI].group_target = select >> 1; @@ -439,7 +441,7 @@ void xe_gt_mcr_init(struct xe_gt *gt) if (gt->info.type == XE_GT_TYPE_MEDIA) { drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); - if (MEDIA_VER(xe) >= 20) { + if (MEDIA_VERx100(xe) >= 1301) { gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table; gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table; } else { @@ -494,8 +496,8 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) | REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2); - xe_mmio_write32(gt, MCFG_MCR_SELECTOR, steer_val); - xe_mmio_write32(gt, SF_MCR_SELECTOR, steer_val); + xe_mmio_write32(>->mmio, MCFG_MCR_SELECTOR, steer_val); + xe_mmio_write32(>->mmio, SF_MCR_SELECTOR, steer_val); /* * For GAM registers, all reads should be directed to instance 1 * (unicast reads against other instances are not allowed), @@ -533,7 +535,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, continue; for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { - if (xe_mmio_in_range(gt, >->steering[type].ranges[i], reg)) { + if (xe_mmio_in_range(>->mmio, >->steering[type].ranges[i], reg)) { *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; return true; @@ -544,7 +546,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges; if (implicit_ranges) for (int i = 0; implicit_ranges[i].end > 0; i++) - if (xe_mmio_in_range(gt, &implicit_ranges[i], reg)) + if (xe_mmio_in_range(>->mmio, &implicit_ranges[i], reg)) return false; /* @@ -579,7 +581,7 @@ static void mcr_lock(struct xe_gt *gt) __acquires(>->mcr_lock) * when a read to the relevant register returns 1. */ if (GRAPHICS_VERx100(xe) >= 1270) - ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL, + ret = xe_mmio_wait32(>->mmio, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL, true); drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); @@ -589,7 +591,7 @@ static void mcr_unlock(struct xe_gt *gt) __releases(>->mcr_lock) { /* Release hardware semaphore - this is done by writing 1 to the register */ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) - xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1); + xe_mmio_write32(>->mmio, STEER_SEMAPHORE, 0x1); spin_unlock(>->mcr_lock); } @@ -603,6 +605,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, u8 rw_flag, int group, int instance, u32 value) { const struct xe_reg reg = to_xe_reg(reg_mcr); + struct xe_mmio *mmio = >->mmio; struct xe_reg steer_reg; u32 steer_val, val = 0; @@ -635,12 +638,12 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, if (rw_flag == MCR_OP_READ) steer_val |= MCR_MULTICAST; - xe_mmio_write32(gt, steer_reg, steer_val); + xe_mmio_write32(mmio, steer_reg, steer_val); if (rw_flag == MCR_OP_READ) - val = xe_mmio_read32(gt, reg); + val = xe_mmio_read32(mmio, reg); else - xe_mmio_write32(gt, reg, value); + xe_mmio_write32(mmio, reg, value); /* * If we turned off the multicast bit (during a write) we're required @@ -649,7 +652,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, * operation. */ if (rw_flag == MCR_OP_WRITE) - xe_mmio_write32(gt, steer_reg, MCR_MULTICAST); + xe_mmio_write32(mmio, steer_reg, MCR_MULTICAST); return val; } @@ -684,7 +687,7 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr) group, instance, 0); mcr_unlock(gt); } else { - val = xe_mmio_read32(gt, reg); + val = xe_mmio_read32(>->mmio, reg); } return val; @@ -757,7 +760,7 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, * to touch the steering register. */ mcr_lock(gt); - xe_mmio_write32(gt, reg, value); + xe_mmio_write32(>->mmio, reg, value); mcr_unlock(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 00af059a8971..79c426dc2505 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -185,6 +185,21 @@ unlock_dma_resv: return err; } +static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} + static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) { struct xe_device *xe = gt_to_xe(gt); @@ -197,16 +212,9 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) if (pf->trva_fault) return -EFAULT; - /* ASID to VM */ - mutex_lock(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, pf->asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = NULL; - mutex_unlock(&xe->usm.lock); - if (!vm) - return -EINVAL; + vm = asid_to_vm(xe, pf->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); /* * TODO: Change to read lock? Using write lock for simplicity. @@ -548,14 +556,9 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) if (acc->access_type != ACC_TRIGGER) return -EINVAL; - /* ASID to VM */ - mutex_lock(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, acc->asid); - if (vm) - xe_vm_get(vm); - mutex_unlock(&xe->usm.lock); - if (!vm || !xe_vm_in_fault_mode(vm)) - return -EINVAL; + vm = asid_to_vm(xe, acc->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); down_read(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h index d6228baaff1e..5dc71394372d 100644 --- a/drivers/gpu/drm/xe/xe_gt_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -8,7 +8,7 @@ #include <drm/drm_print.h> -#include "xe_device_types.h" +#include "xe_gt_types.h" #define xe_gt_printk(_gt, _level, _fmt, ...) \ drm_##_level(>_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 905f409db74b..e71fc3d2bda2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -5,12 +5,15 @@ #include <drm/drm_managed.h> +#include "regs/xe_guc_regs.h" #include "regs/xe_regs.h" +#include "xe_gt.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_service.h" #include "xe_mmio.h" @@ -72,7 +75,7 @@ static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) static void pf_enable_ggtt_guest_update(struct xe_gt *gt) { - xe_mmio_write32(gt, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN); + xe_mmio_write32(>->mmio, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN); } /** @@ -87,6 +90,57 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) pf_enable_ggtt_guest_update(gt); xe_gt_sriov_pf_service_update(gt); + xe_gt_sriov_pf_migration_init(gt); +} + +static u32 pf_get_vf_regs_stride(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000; +} + +static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride) +{ + struct xe_reg pf_reg = vf_reg; + + pf_reg.vf = 0; + pf_reg.addr += stride * vfid; + + return pf_reg; +} + +static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid) +{ + u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt)); + struct xe_reg scratch; + int n, count; + + if (xe_gt_is_media_type(gt)) { + count = MED_VF_SW_FLAG_COUNT; + for (n = 0; n < count; n++) { + scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride); + xe_mmio_write32(>->mmio, scratch, 0); + } + } else { + count = VF_SW_FLAG_COUNT; + for (n = 0; n < count; n++) { + scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride); + xe_mmio_write32(>->mmio, scratch, 0); + } + } +} + +/** + * xe_gt_sriov_pf_sanitize_hw() - Reset hardware state related to a VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + pf_clear_vf_scratch_regs(gt, vfid); } /** diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f0cb726a6919..96fab779a906 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -11,6 +11,7 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); +void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 02f7328bd6ce..1f50aec3a059 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -9,9 +9,11 @@ #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_monitor.h" #include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" @@ -176,6 +178,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) CASE2STR(PAUSE_SEND_PAUSE); CASE2STR(PAUSE_WAIT_GUC); CASE2STR(PAUSE_GUC_DONE); + CASE2STR(PAUSE_SAVE_GUC); CASE2STR(PAUSE_FAILED); CASE2STR(PAUSED); CASE2STR(RESUME_WIP); @@ -415,6 +418,10 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) * : | : / * : v : / * : PAUSE_GUC_DONE o-----restart + * : | : + * : | o---<--busy : + * : v / / : + * : PAUSE_SAVE_GUC : * : / : * : / : * :....o..............o...............o...........: @@ -434,6 +441,7 @@ static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC); } } @@ -464,12 +472,41 @@ static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid) pf_enter_vf_pause_failed(gt, vfid); } +static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) + pf_enter_vf_state_machine_bug(gt, vfid); +} + +static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) +{ + int err; + + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) + return false; + + err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid); + if (err) { + /* retry if busy */ + if (err == -EBUSY) { + pf_enter_vf_pause_save_guc(gt, vfid); + return true; + } + /* give up on error */ + if (err == -EIO) + pf_enter_vf_mismatch(gt, vfid); + } + + pf_enter_vf_pause_completed(gt, vfid); + return true; +} + static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) { if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) return false; - pf_enter_vf_pause_completed(gt, vfid); + pf_enter_vf_pause_save_guc(gt, vfid); return true; } @@ -1008,7 +1045,7 @@ static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) return false; - /* XXX: placeholder */ + xe_gt_sriov_pf_sanitize_hw(gt, vfid); pf_enter_vf_flr_send_finish(gt, vfid); return true; @@ -1338,6 +1375,9 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid) if (pf_exit_vf_pause_guc_done(gt, vfid)) return true; + if (pf_exit_vf_pause_save_guc(gt, vfid)) + return true; + if (pf_exit_vf_resume_send_resume(gt, vfid)) return true; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h index 11830aafea45..f02f941b4ad2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h @@ -27,6 +27,7 @@ * @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command. * @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC. * @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC. + * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state. * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed. * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused. * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress. @@ -56,6 +57,7 @@ enum xe_gt_sriov_control_bits { XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE, + XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC, XE_GT_SRIOV_STATE_PAUSE_FAILED, XE_GT_SRIOV_STATE_PAUSED, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 2290ddaf9594..ccbcf6e572d0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -17,6 +17,7 @@ #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_monitor.h" #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_pf_service.h" @@ -312,6 +313,9 @@ static const struct { { "stop", xe_gt_sriov_pf_control_stop_vf }, { "pause", xe_gt_sriov_pf_control_pause_vf }, { "resume", xe_gt_sriov_pf_control_resume_vf }, +#ifdef CONFIG_DRM_XE_DEBUG_SRIOV + { "restore!", xe_gt_sriov_pf_migration_restore_guc_state }, +#endif }; static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) @@ -375,6 +379,44 @@ static const struct file_operations control_ops = { .llseek = default_llseek, }; +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │ ├── vf1 + * │ │ ├── guc_state + */ +static ssize_t guc_state_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct dentry *dent = file_dentry(file); + struct dentry *parent = dent->d_parent; + struct xe_gt *gt = extract_gt(parent); + unsigned int vfid = extract_vfid(parent); + + return xe_gt_sriov_pf_migration_read_guc_state(gt, vfid, buf, count, pos); +} + +static ssize_t guc_state_write(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct dentry *dent = file_dentry(file); + struct dentry *parent = dent->d_parent; + struct xe_gt *gt = extract_gt(parent); + unsigned int vfid = extract_vfid(parent); + + if (*pos) + return -EINVAL; + + return xe_gt_sriov_pf_migration_write_guc_state(gt, vfid, buf, count); +} + +static const struct file_operations guc_state_ops = { + .owner = THIS_MODULE, + .read = guc_state_read, + .write = guc_state_write, + .llseek = default_llseek, +}; + /** * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs. * @gt: the &xe_gt to register @@ -423,5 +465,12 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) pf_add_config_attrs(gt, vfdentry, VFID(n)); debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops); + + /* for testing/debugging purposes only! */ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + debugfs_create_file("guc_state", + IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400, + vfdentry, NULL, &guc_state_ops); + } } } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c new file mode 100644 index 000000000000..c712111aa30d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_sriov_abi.h" +#include "xe_bo.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_migration.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_sriov.h" + +/* Return: number of dwords saved/restored/required or a negative error code on failure */ +static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode, + u64 addr, u32 ndwords) +{ + u32 request[PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_SAVE_RESTORE_VF) | + FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_0_OPCODE, opcode), + FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_1_VFID, vfid), + FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_2_ADDR_LO, lower_32_bits(addr)), + FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_3_ADDR_HI, upper_32_bits(addr)), + FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_SIZE, ndwords), + }; + + return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); +} + +/* Return: size of the state in dwords or a negative error code on failure */ +static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + ret = guc_action_vf_save_restore(>->uc.guc, vfid, GUC_PF_OPCODE_VF_SAVE, 0, 0); + return ret ?: -ENODATA; +} + +/* Return: number of state dwords saved or a negative error code on failure */ +static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid, + void *buff, size_t size) +{ + const int ndwords = size / sizeof(u32); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + struct xe_guc *guc = >->uc.guc; + struct xe_bo *bo; + int ret; + + xe_gt_assert(gt, size % sizeof(u32) == 0); + xe_gt_assert(gt, size == ndwords * sizeof(u32)); + + bo = xe_bo_create_pin_map(xe, tile, NULL, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE, + xe_bo_ggtt_addr(bo), ndwords); + if (!ret) + ret = -ENODATA; + else if (ret > ndwords) + ret = -EPROTO; + else if (ret > 0) + xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32)); + + xe_bo_unpin_map_no_vm(bo); + return ret; +} + +/* Return: number of state dwords restored or a negative error code on failure */ +static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid, + const void *buff, size_t size) +{ + const int ndwords = size / sizeof(u32); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + struct xe_guc *guc = >->uc.guc; + struct xe_bo *bo; + int ret; + + xe_gt_assert(gt, size % sizeof(u32) == 0); + xe_gt_assert(gt, size == ndwords * sizeof(u32)); + + bo = xe_bo_create_pin_map(xe, tile, NULL, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size); + + ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE, + xe_bo_ggtt_addr(bo), ndwords); + if (!ret) + ret = -ENODATA; + else if (ret > ndwords) + ret = -EPROTO; + + xe_bo_unpin_map_no_vm(bo); + return ret; +} + +static bool pf_migration_supported(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + return gt->sriov.pf.migration.supported; +} + +static struct mutex *pf_migration_mutex(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + return >->sriov.pf.migration.snapshot_lock; +} + +static struct xe_gt_sriov_state_snapshot *pf_pick_vf_snapshot(struct xe_gt *gt, + unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + lockdep_assert_held(pf_migration_mutex(gt)); + + return >->sriov.pf.vfs[vfid].snapshot; +} + +static unsigned int pf_snapshot_index(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) +{ + return container_of(snapshot, struct xe_gt_sriov_metadata, snapshot) - gt->sriov.pf.vfs; +} + +static void pf_free_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) +{ + struct xe_device *xe = gt_to_xe(gt); + + drmm_kfree(&xe->drm, snapshot->guc.buff); + snapshot->guc.buff = NULL; + snapshot->guc.size = 0; +} + +static int pf_alloc_guc_state(struct xe_gt *gt, + struct xe_gt_sriov_state_snapshot *snapshot, + size_t size) +{ + struct xe_device *xe = gt_to_xe(gt); + void *p; + + pf_free_guc_state(gt, snapshot); + + if (!size) + return -ENODATA; + + if (size % sizeof(u32)) + return -EINVAL; + + if (size > SZ_2M) + return -EFBIG; + + p = drmm_kzalloc(&xe->drm, size, GFP_KERNEL); + if (!p) + return -ENOMEM; + + snapshot->guc.buff = p; + snapshot->guc.size = size; + return 0; +} + +static void pf_dump_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) +{ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { + unsigned int vfid __maybe_unused = pf_snapshot_index(gt, snapshot); + + xe_gt_sriov_dbg_verbose(gt, "VF%u GuC state is %zu dwords:\n", + vfid, snapshot->guc.size / sizeof(u32)); + print_hex_dump_bytes("state: ", DUMP_PREFIX_OFFSET, + snapshot->guc.buff, min(SZ_64, snapshot->guc.size)); + } +} + +static int pf_save_vf_guc_state(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid); + size_t size; + int ret; + + ret = pf_send_guc_query_vf_state_size(gt, vfid); + if (ret < 0) + goto fail; + size = ret * sizeof(u32); + xe_gt_sriov_dbg_verbose(gt, "VF%u state size is %d dwords (%zu bytes)\n", vfid, ret, size); + + ret = pf_alloc_guc_state(gt, snapshot, size); + if (ret < 0) + goto fail; + + ret = pf_send_guc_save_vf_state(gt, vfid, snapshot->guc.buff, size); + if (ret < 0) + goto fail; + size = ret * sizeof(u32); + xe_gt_assert(gt, size); + xe_gt_assert(gt, size <= snapshot->guc.size); + snapshot->guc.size = size; + + pf_dump_guc_state(gt, snapshot); + return 0; + +fail: + xe_gt_sriov_dbg(gt, "Unable to save VF%u state (%pe)\n", vfid, ERR_PTR(ret)); + pf_free_guc_state(gt, snapshot); + return ret; +} + +/** + * xe_gt_sriov_pf_migration_save_guc_state() - Take a GuC VF state snapshot. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid) +{ + int err; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + if (!pf_migration_supported(gt)) + return -ENOPKG; + + mutex_lock(pf_migration_mutex(gt)); + err = pf_save_vf_guc_state(gt, vfid); + mutex_unlock(pf_migration_mutex(gt)); + + return err; +} + +static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid); + int ret; + + if (!snapshot->guc.size) + return -ENODATA; + + xe_gt_sriov_dbg_verbose(gt, "restoring %zu dwords of VF%u GuC state\n", + snapshot->guc.size / sizeof(u32), vfid); + ret = pf_send_guc_restore_vf_state(gt, vfid, snapshot->guc.buff, snapshot->guc.size); + if (ret < 0) + goto fail; + + xe_gt_sriov_dbg_verbose(gt, "restored %d dwords of VF%u GuC state\n", ret, vfid); + return 0; + +fail: + xe_gt_sriov_dbg(gt, "Failed to restore VF%u GuC state (%pe)\n", vfid, ERR_PTR(ret)); + return ret; +} + +/** + * xe_gt_sriov_pf_migration_restore_guc_state() - Restore a GuC VF state. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid) +{ + int ret; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + if (!pf_migration_supported(gt)) + return -ENOPKG; + + mutex_lock(pf_migration_mutex(gt)); + ret = pf_restore_vf_guc_state(gt, vfid); + mutex_unlock(pf_migration_mutex(gt)); + + return ret; +} + +#ifdef CONFIG_DEBUG_FS +/** + * xe_gt_sriov_pf_migration_read_guc_state() - Read a GuC VF state. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @buf: the user space buffer to read to + * @count: the maximum number of bytes to read + * @pos: the current position in the buffer + * + * This function is for PF only. + * + * This function reads up to @count bytes from the saved VF GuC state buffer + * at offset @pos into the user space address starting at @buf. + * + * Return: the number of bytes read or a negative error code on failure. + */ +ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid, + char __user *buf, size_t count, loff_t *pos) +{ + struct xe_gt_sriov_state_snapshot *snapshot; + ssize_t ret; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + if (!pf_migration_supported(gt)) + return -ENOPKG; + + mutex_lock(pf_migration_mutex(gt)); + snapshot = pf_pick_vf_snapshot(gt, vfid); + if (snapshot->guc.size) + ret = simple_read_from_buffer(buf, count, pos, snapshot->guc.buff, + snapshot->guc.size); + else + ret = -ENODATA; + mutex_unlock(pf_migration_mutex(gt)); + + return ret; +} + +/** + * xe_gt_sriov_pf_migration_write_guc_state() - Write a GuC VF state. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @buf: the user space buffer with GuC VF state + * @size: the size of GuC VF state (in bytes) + * + * This function is for PF only. + * + * This function reads @size bytes of the VF GuC state stored at user space + * address @buf and writes it into a internal VF state buffer. + * + * Return: the number of bytes used or a negative error code on failure. + */ +ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid, + const char __user *buf, size_t size) +{ + struct xe_gt_sriov_state_snapshot *snapshot; + loff_t pos = 0; + ssize_t ret; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid != PFID); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + if (!pf_migration_supported(gt)) + return -ENOPKG; + + mutex_lock(pf_migration_mutex(gt)); + snapshot = pf_pick_vf_snapshot(gt, vfid); + ret = pf_alloc_guc_state(gt, snapshot, size); + if (!ret) { + ret = simple_write_to_buffer(snapshot->guc.buff, size, &pos, buf, size); + if (ret < 0) + pf_free_guc_state(gt, snapshot); + else + pf_dump_guc_state(gt, snapshot); + } + mutex_unlock(pf_migration_mutex(gt)); + + return ret; +} +#endif /* CONFIG_DEBUG_FS */ + +static bool pf_check_migration_support(struct xe_gt *gt) +{ + /* GuC 70.25 with save/restore v2 is required */ + xe_gt_assert(gt, GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 25, 0)); + + /* XXX: for now this is for feature enabling only */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG); +} + +/** + * xe_gt_sriov_pf_migration_init() - Initialize support for VF migration. + * @gt: the &xe_gt + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_migration_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + gt->sriov.pf.migration.supported = pf_check_migration_support(gt); + + if (!pf_migration_supported(gt)) + return 0; + + err = drmm_mutex_init(&xe->drm, >->sriov.pf.migration.snapshot_lock); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h new file mode 100644 index 000000000000..09faeae00ddb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_MIGRATION_H_ +#define _XE_GT_SRIOV_PF_MIGRATION_H_ + +#include <linux/types.h> + +struct xe_gt; + +int xe_gt_sriov_pf_migration_init(struct xe_gt *gt); +int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid); + +#ifdef CONFIG_DEBUG_FS +ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid, + char __user *buf, size_t count, loff_t *pos); +ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid, + const char __user *buf, size_t count); +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h new file mode 100644 index 000000000000..1f3110b6d44f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_ +#define _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/types.h> + +/** + * struct xe_gt_sriov_state_snapshot - GT-level per-VF state snapshot data. + * + * Used by the PF driver to maintain per-VF migration data. + */ +struct xe_gt_sriov_state_snapshot { + /** @guc: GuC VF state snapshot */ + struct { + /** @guc.buff: buffer with the VF state */ + u32 *buff; + /** @guc.size: size of the buffer (must be dwords aligned) */ + u32 size; + } guc; +}; + +/** + * struct xe_gt_sriov_pf_migration - GT-level data. + * + * Used by the PF driver to maintain non-VF specific per-GT data. + */ +struct xe_gt_sriov_pf_migration { + /** @supported: indicates whether the feature is supported */ + bool supported; + + /** @snapshot_lock: protects all VFs snapshots */ + struct mutex snapshot_lock; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 0e23b7ea4f3e..924e75b94aec 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -237,7 +237,7 @@ static void read_many(struct xe_gt *gt, unsigned int count, const struct xe_reg *regs, u32 *values) { while (count--) - *values++ = xe_mmio_read32(gt, *regs++); + *values++ = xe_mmio_read32(>->mmio, *regs++); } static void pf_prepare_runtime_info(struct xe_gt *gt) @@ -402,7 +402,7 @@ static int pf_service_runtime_query(struct xe_gt *gt, u32 start, u32 limit, for (i = 0; i < count; ++i, ++data) { addr = runtime->regs[start + i].addr; - data->offset = xe_mmio_adjusted_addr(gt, addr); + data->offset = xe_mmio_adjusted_addr(>->mmio, addr); data->value = runtime->values[start + i]; } @@ -513,7 +513,7 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p for (; size--; regs++, values++) { drm_printf(p, "reg[%#x] = %#x\n", - xe_mmio_adjusted_addr(gt, regs->addr), *values); + xe_mmio_adjusted_addr(>->mmio, regs->addr), *values); } return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index 28e1b130bf87..0426b1a77069 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -10,6 +10,7 @@ #include "xe_gt_sriov_pf_config_types.h" #include "xe_gt_sriov_pf_control_types.h" +#include "xe_gt_sriov_pf_migration_types.h" #include "xe_gt_sriov_pf_monitor_types.h" #include "xe_gt_sriov_pf_policy_types.h" #include "xe_gt_sriov_pf_service_types.h" @@ -29,6 +30,9 @@ struct xe_gt_sriov_metadata { /** @version: negotiated VF/PF ABI version */ struct xe_gt_sriov_pf_service_version version; + + /** @snapshot: snapshot of the VF state data */ + struct xe_gt_sriov_state_snapshot snapshot; }; /** @@ -36,6 +40,7 @@ struct xe_gt_sriov_metadata { * @service: service data. * @control: control data. * @policy: policy data. + * @migration: migration data. * @spare: PF-only provisioning configuration. * @vfs: metadata for all VFs. */ @@ -43,6 +48,7 @@ struct xe_gt_sriov_pf { struct xe_gt_sriov_pf_service service; struct xe_gt_sriov_pf_control control; struct xe_gt_sriov_pf_policy policy; + struct xe_gt_sriov_pf_migration migration; struct xe_gt_sriov_spare_config spare; struct xe_gt_sriov_metadata *vfs; }; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 4ebc82e607af..d3baba50f085 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -881,7 +881,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr) */ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) { - u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u32 addr = xe_mmio_adjusted_addr(>->mmio, reg.addr); struct vf_runtime_reg *rr; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); @@ -917,7 +917,7 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) */ void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) { - u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u32 addr = xe_mmio_adjusted_addr(>->mmio, reg.addr); xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); xe_gt_assert(gt, !reg.vf); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c index f3ddcbefc6bc..2ed5b6780d30 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c @@ -33,7 +33,7 @@ static const struct drm_info_list vf_info[] = { .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_vf_print_version, }, -#if defined(CONFIG_DRM_XE_DEBUG) || defined(CONFIG_DRM_XE_DEBUG_SRIOV) +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) || IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) { "runtime_regs", .show = xe_gt_debugfs_simple_show, diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index a05c3699e8b9..ec2b8246204b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -51,5 +51,5 @@ int xe_gt_sysfs_init(struct xe_gt *gt) gt->sysfs = &kg->base; - return devm_add_action(xe->drm.dev, gt_sysfs_fini, gt); + return devm_add_action_or_reset(xe->drm.dev, gt_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index 25963e33a383..03b225364101 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -41,9 +41,9 @@ u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt) xe_pm_runtime_get(gt_to_xe(gt)); if (xe_gt_is_media_type(gt)) - reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS); + reg = xe_mmio_read32(>->mmio, MTL_MEDIA_PERF_LIMIT_REASONS); else - reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS); + reg = xe_mmio_read32(>->mmio, GT0_PERF_LIMIT_REASONS); xe_pm_runtime_put(gt_to_xe(gt)); return reg; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index cca9cf536f76..98616de0c5bb 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -274,17 +274,19 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { + struct xe_mmio *mmio = >->mmio; + if (IS_SRIOV_VF(xe)) return 0; xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, PVC_GUC_TLB_INV_DESC1_INVALIDATE); - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, PVC_GUC_TLB_INV_DESC0_VALID); } else { - xe_mmio_write32(gt, GUC_TLB_INV_CR, + xe_mmio_write32(mmio, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE); } xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 0662f71c6ede..651ba53623e5 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -25,7 +25,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) va_start(argp, numregs); for (i = 0; i < numregs; i++) - fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg)); + fuse_val[i] = xe_mmio_read32(>->mmio, va_arg(argp, struct xe_reg)); va_end(argp); bitmap_from_arr32(mask, fuse_val, numregs * 32); @@ -35,7 +35,7 @@ static void load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type) { struct xe_device *xe = gt_to_xe(gt); - u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE); + u32 reg_val = xe_mmio_read32(>->mmio, XELP_EU_ENABLE); u32 val = 0; int i; @@ -127,7 +127,7 @@ static void load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) { struct xe_device *xe = gt_to_xe(gt); - u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3); + u32 fuse3 = xe_mmio_read32(>->mmio, MIRROR_FUSE3); if (GRAPHICS_VER(xe) >= 20) { xe_l3_bank_mask_t per_node = {}; @@ -141,7 +141,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) xe_l3_bank_mask_t per_node = {}; xe_l3_bank_mask_t per_mask_bit = {}; u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); - u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4); + u32 fuse4 = xe_mmio_read32(>->mmio, XEHP_FUSE4); u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4); bitmap_set_value8(per_mask_bit, 0x3, 0); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 3d1c51de0268..a287b98ee70b 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -6,6 +6,7 @@ #ifndef _XE_GT_TYPES_H_ #define _XE_GT_TYPES_H_ +#include "xe_device_types.h" #include "xe_force_wake_types.h" #include "xe_gt_idle_types.h" #include "xe_gt_sriov_pf_types.h" @@ -145,19 +146,20 @@ struct xe_gt { /** * @mmio: mmio info for GT. All GTs within a tile share the same * register space, but have their own copy of GSI registers at a - * specific offset, as well as their own forcewake handling. + * specific offset. + */ + struct xe_mmio mmio; + + /** + * @pm: power management info for GT. The driver uses the GT's + * "force wake" interface to wake up specific parts of the GT hardware + * from C6 sleep states and ensure the hardware remains awake while it + * is being actively used. */ struct { - /** @mmio.fw: force wake for GT */ + /** @pm.fw: force wake for GT */ struct xe_force_wake fw; - /** - * @mmio.adj_limit: adjust MMIO address if address is below this - * value - */ - u32 adj_limit; - /** @mmio.adj_offset: offect to add to MMIO address when adjusting */ - u32 adj_offset; - } mmio; + } pm; /** @sriov: virtualization data related to GT */ union { diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 52df28032a6f..c2ddf883702b 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -14,6 +14,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_gtt_defs.h" #include "regs/xe_guc_regs.h" +#include "regs/xe_irq_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" @@ -236,10 +237,10 @@ static void guc_write_params(struct xe_guc *guc) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - xe_mmio_write32(gt, SOFT_SCRATCH(0), 0); + xe_mmio_write32(>->mmio, SOFT_SCRATCH(0), 0); for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]); + xe_mmio_write32(>->mmio, SOFT_SCRATCH(1 + i), guc->params[i]); } static void guc_fini_hw(void *arg) @@ -425,6 +426,7 @@ int xe_guc_post_load_init(struct xe_guc *guc) int xe_guc_reset(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); + struct xe_mmio *mmio = >->mmio; u32 guc_status, gdrst; int ret; @@ -433,15 +435,15 @@ int xe_guc_reset(struct xe_guc *guc) if (IS_SRIOV_VF(gt_to_xe(gt))) return xe_gt_sriov_vf_bootstrap(gt); - xe_mmio_write32(gt, GDRST, GRDOM_GUC); + xe_mmio_write32(mmio, GDRST, GRDOM_GUC); - ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false); + ret = xe_mmio_wait32(mmio, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false); if (ret) { xe_gt_err(gt, "GuC reset timed out, GDRST=%#x\n", gdrst); goto err_out; } - guc_status = xe_mmio_read32(gt, GUC_STATUS); + guc_status = xe_mmio_read32(mmio, GUC_STATUS); if (!(guc_status & GS_MIA_IN_RESET)) { xe_gt_err(gt, "GuC status: %#x, MIA core expected to be in reset\n", guc_status); @@ -459,6 +461,7 @@ err_out: static void guc_prepare_xfer(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); + struct xe_mmio *mmio = >->mmio; struct xe_device *xe = guc_to_xe(guc); u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | @@ -473,12 +476,12 @@ static void guc_prepare_xfer(struct xe_guc *guc) shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index); /* Must program this register before loading the ucode with DMA */ - xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags); + xe_mmio_write32(mmio, GUC_SHIM_CONTROL, shim_flags); - xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE); + xe_mmio_write32(mmio, GT_PM_CONFIG, GT_DOORBELL_ENABLE); /* Make sure GuC receives ARAT interrupts */ - xe_mmio_rmw32(gt, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0); + xe_mmio_rmw32(mmio, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0); } /* @@ -494,7 +497,7 @@ static int guc_xfer_rsa(struct xe_guc *guc) if (guc->fw.rsa_size > 256) { u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) + xe_uc_fw_rsa_offset(&guc->fw); - xe_mmio_write32(gt, UOS_RSA_SCRATCH(0), rsa_ggtt_addr); + xe_mmio_write32(>->mmio, UOS_RSA_SCRATCH(0), rsa_ggtt_addr); return 0; } @@ -503,7 +506,7 @@ static int guc_xfer_rsa(struct xe_guc *guc) return -ENOMEM; for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) - xe_mmio_write32(gt, UOS_RSA_SCRATCH(i), rsa[i]); + xe_mmio_write32(>->mmio, UOS_RSA_SCRATCH(i), rsa[i]); return 0; } @@ -583,7 +586,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc) * extreme thermal throttling. And a system that is that hot during boot is probably * dead anyway! */ -#if defined(CONFIG_DRM_XE_DEBUG) +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define GUC_LOAD_RETRY_LIMIT 20 #else #define GUC_LOAD_RETRY_LIMIT 3 @@ -593,6 +596,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc) static void guc_wait_ucode(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); + struct xe_mmio *mmio = >->mmio; struct xe_guc_pc *guc_pc = >->uc.guc.pc; ktime_t before, after, delta; int load_done; @@ -619,7 +623,7 @@ static void guc_wait_ucode(struct xe_guc *guc) * timeouts rather than allowing a huge timeout each time. So basically, need * to treat a timeout no different to a value change. */ - ret = xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK, + ret = xe_mmio_wait32_not(mmio, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK, last_status, 1000 * 1000, &status, false); if (ret < 0) count++; @@ -657,7 +661,7 @@ static void guc_wait_ucode(struct xe_guc *guc) switch (bootrom) { case XE_BOOTROM_STATUS_NO_KEY_FOUND: xe_gt_err(gt, "invalid key requested, header = 0x%08X\n", - xe_mmio_read32(gt, GUC_HEADER_INFO)); + xe_mmio_read32(mmio, GUC_HEADER_INFO)); break; case XE_BOOTROM_STATUS_RSA_FAILED: @@ -672,7 +676,7 @@ static void guc_wait_ucode(struct xe_guc *guc) switch (ukernel) { case XE_GUC_LOAD_STATUS_EXCEPTION: xe_gt_err(gt, "firmware exception. EIP: %#x\n", - xe_mmio_read32(gt, SOFT_SCRATCH(13))); + xe_mmio_read32(mmio, SOFT_SCRATCH(13))); break; case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: @@ -824,10 +828,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - msg = xe_mmio_read32(gt, SOFT_SCRATCH(15)); + msg = xe_mmio_read32(>->mmio, SOFT_SCRATCH(15)); msg &= XE_GUC_RECV_MSG_EXCEPTION | XE_GUC_RECV_MSG_CRASH_DUMP_POSTED; - xe_mmio_write32(gt, SOFT_SCRATCH(15), 0); + xe_mmio_write32(>->mmio, SOFT_SCRATCH(15), 0); if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED) xe_gt_err(gt, "Received early GuC crash dump notification!\n"); @@ -844,14 +848,14 @@ static void guc_enable_irq(struct xe_guc *guc) REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); /* Primary GuC and media GuC share a single enable bit */ - xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, + xe_mmio_write32(>->mmio, GUC_SG_INTR_ENABLE, REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST)); /* * There are separate mask bits for primary and media GuCs, so use * a RMW operation to avoid clobbering the other GuC's setting. */ - xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0); + xe_mmio_rmw32(>->mmio, GUC_SG_INTR_MASK, events, 0); } int xe_guc_enable_communication(struct xe_guc *guc) @@ -863,7 +867,7 @@ int xe_guc_enable_communication(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); struct xe_tile *tile = gt_to_tile(gt); - err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc); + err = xe_memirq_init_guc(&tile->memirq, guc); if (err) return err; } else { @@ -907,7 +911,7 @@ void xe_guc_notify(struct xe_guc *guc) * additional payload data to the GuC but this capability is not * used by the firmware yet. Use default value in the meantime. */ - xe_mmio_write32(gt, guc->notify_reg, default_notify_data); + xe_mmio_write32(>->mmio, guc->notify_reg, default_notify_data); } int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) @@ -925,6 +929,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); + struct xe_mmio *mmio = >->mmio; u32 header, reply; struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); @@ -947,19 +952,19 @@ retry: /* Not in critical data-path, just do if else for GT type */ if (xe_gt_is_media_type(gt)) { for (i = 0; i < len; ++i) - xe_mmio_write32(gt, MED_VF_SW_FLAG(i), + xe_mmio_write32(mmio, MED_VF_SW_FLAG(i), request[i]); - xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX)); + xe_mmio_read32(mmio, MED_VF_SW_FLAG(LAST_INDEX)); } else { for (i = 0; i < len; ++i) - xe_mmio_write32(gt, VF_SW_FLAG(i), + xe_mmio_write32(mmio, VF_SW_FLAG(i), request[i]); - xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX)); + xe_mmio_read32(mmio, VF_SW_FLAG(LAST_INDEX)); } xe_guc_notify(guc); - ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_ORIGIN, + ret = xe_mmio_wait32(mmio, reply_reg, GUC_HXG_MSG_0_ORIGIN, FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), 50000, &reply, false); if (ret) { @@ -969,7 +974,7 @@ timeout: return ret; } - header = xe_mmio_read32(gt, reply_reg); + header = xe_mmio_read32(mmio, reply_reg); if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_BUSY) { /* @@ -985,7 +990,7 @@ timeout: BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS); BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1); - ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask, + ret = xe_mmio_wait32(mmio, reply_reg, resp_mask, resp_mask, 1000000, &header, false); if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != @@ -1032,7 +1037,7 @@ proto: for (i = 1; i < VF_SW_FLAG_COUNT; i++) { reply_reg.addr += sizeof(u32); - response_buf[i] = xe_mmio_read32(gt, reply_reg); + response_buf[i] = xe_mmio_read32(mmio, reply_reg); } } @@ -1155,7 +1160,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) if (err) return; - status = xe_mmio_read32(gt, GUC_STATUS); + status = xe_mmio_read32(>->mmio, GUC_STATUS); drm_printf(p, "\nGuC status 0x%08x:\n", status); drm_printf(p, "\tBootrom status = 0x%x\n", @@ -1170,7 +1175,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) drm_puts(p, "\nScratch registers:\n"); for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { drm_printf(p, "\t%2d: \t0x%x\n", - i, xe_mmio_read32(gt, SOFT_SCRATCH(i))); + i, xe_mmio_read32(>->mmio, SOFT_SCRATCH(i))); } xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index d1902a8581ca..04485461aa20 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -5,6 +5,8 @@ #include "xe_guc_ads.h" +#include <linux/fault-inject.h> + #include <drm/drm_managed.h> #include <generated/xe_wa_oob.h> @@ -418,6 +420,7 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) return 0; } +ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */ /** * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load @@ -684,7 +687,7 @@ static void guc_doorbell_init(struct xe_guc_ads *ads) if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { u32 distdbreg = - xe_mmio_read32(gt, DIST_DBS_POPULATED); + xe_mmio_read32(>->mmio, DIST_DBS_POPULATED); ads_blob_write(ads, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index f24dd5223926..5aef2be2d027 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -8,6 +8,7 @@ #include <linux/bitfield.h> #include <linux/circ_buf.h> #include <linux/delay.h> +#include <linux/fault-inject.h> #include <kunit/static_stub.h> @@ -182,7 +183,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); - INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); + INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); @@ -209,6 +210,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) ct->state = XE_GUC_CT_STATE_DISABLED; return 0; } +ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ #define desc_read(xe_, guc_ctb__, field_) \ xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \ @@ -667,16 +669,12 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, num_g2h = 1; if (g2h_fence_needs_alloc(g2h_fence)) { - void *ptr; - g2h_fence->seqno = next_ct_seqno(ct, true); - ptr = xa_store(&ct->fence_lookup, - g2h_fence->seqno, - g2h_fence, GFP_ATOMIC); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); + ret = xa_err(xa_store(&ct->fence_lookup, + g2h_fence->seqno, g2h_fence, + GFP_ATOMIC)); + if (ret) goto out; - } } seqno = g2h_fence->seqno; @@ -852,7 +850,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) #define ct_alive(ct) \ (xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \ !ct->ctbs.g2h.info.broken) - if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) + if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) return false; #undef ct_alive @@ -879,14 +877,11 @@ retry: retry_same_fence: ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence); if (unlikely(ret == -ENOMEM)) { - void *ptr; - /* Retry allocation /w GFP_KERNEL */ - ptr = xa_store(&ct->fence_lookup, - g2h_fence.seqno, - &g2h_fence, GFP_KERNEL); - if (IS_ERR(ptr)) - return PTR_ERR(ptr); + ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno, + &g2h_fence, GFP_KERNEL)); + if (ret) + return ret; goto retry_same_fence; } else if (unlikely(ret)) { @@ -897,22 +892,32 @@ retry_same_fence: goto retry_same_fence; if (!g2h_fence_needs_alloc(&g2h_fence)) - xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + xa_erase(&ct->fence_lookup, g2h_fence.seqno); return ret; } ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); + + /* + * Ensure we serialize with completion side to prevent UAF with fence going out of scope on + * the stack, since we have no clue if it will fire after the timeout before we can erase + * from the xa. Also we have some dependent loads and stores below for which we need the + * correct ordering, and we lack the needed barriers. + */ + mutex_lock(&ct->lock); if (!ret) { - xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x", - g2h_fence.seqno, action[0]); - xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s", + g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done)); + xa_erase(&ct->fence_lookup, g2h_fence.seqno); + mutex_unlock(&ct->lock); return -ETIME; } if (g2h_fence.retry) { xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", action[0], g2h_fence.reason); + mutex_unlock(&ct->lock); goto retry; } if (g2h_fence.fail) { @@ -921,7 +926,12 @@ retry_same_fence: ret = -EIO; } - return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret; + if (ret > 0) + ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; + + mutex_unlock(&ct->lock); + + return ret; } /** diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index a37ee3419428..651543721ce5 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -5,6 +5,8 @@ #include "xe_guc_log.h" +#include <linux/fault-inject.h> + #include <drm/drm_managed.h> #include "xe_bo.h" @@ -96,3 +98,4 @@ int xe_guc_log_init(struct xe_guc_log *log) return 0; } +ALLOW_ERROR_INJECTION(xe_guc_log_init, ERRNO); /* See xe_pci_probe() */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 034b29984d5e..2b654f820ae2 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -262,7 +262,7 @@ static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable) u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE; /* Allow/Disallow punit to process software freq requests */ - xe_mmio_write32(gt, RP_CONTROL, state); + xe_mmio_write32(>->mmio, RP_CONTROL, state); } static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq) @@ -274,7 +274,7 @@ static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq) /* Req freq is in units of 16.66 Mhz */ rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq)); - xe_mmio_write32(gt, RPNSWREQ, rpnswreq); + xe_mmio_write32(>->mmio, RPNSWREQ, rpnswreq); /* Sleep for a small time to allow pcode to respond */ usleep_range(100, 300); @@ -334,9 +334,9 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc) u32 reg; if (xe_gt_is_media_type(gt)) - reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY); + reg = xe_mmio_read32(>->mmio, MTL_MPE_FREQUENCY); else - reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY); + reg = xe_mmio_read32(>->mmio, MTL_GT_RPE_FREQUENCY); pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); } @@ -353,9 +353,9 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc) * PCODE at a different register */ if (xe->info.platform == XE_PVC) - reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP); + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); else - reg = xe_mmio_read32(gt, FREQ_INFO_REC); + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } @@ -392,10 +392,10 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) /* When in RC6, actual frequency reported will be 0. */ if (GRAPHICS_VERx100(xe) >= 1270) { - freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); + freq = xe_mmio_read32(>->mmio, MTL_MIRROR_TARGET_WP1); freq = REG_FIELD_GET(MTL_CAGF_MASK, freq); } else { - freq = xe_mmio_read32(gt, GT_PERF_STATUS); + freq = xe_mmio_read32(>->mmio, GT_PERF_STATUS); freq = REG_FIELD_GET(CAGF_MASK, freq); } @@ -425,7 +425,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) if (ret) return ret; - *freq = xe_mmio_read32(gt, RPNSWREQ); + *freq = xe_mmio_read32(>->mmio, RPNSWREQ); *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq); *freq = decode_freq(*freq); @@ -612,10 +612,10 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc) u32 reg, gt_c_state; if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { - reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); + reg = xe_mmio_read32(>->mmio, MTL_MIRROR_TARGET_WP1); gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg); } else { - reg = xe_mmio_read32(gt, GT_CORE_STATUS); + reg = xe_mmio_read32(>->mmio, GT_CORE_STATUS); gt_c_state = REG_FIELD_GET(RCN_MASK, reg); } @@ -638,7 +638,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 reg; - reg = xe_mmio_read32(gt, GT_GFX_RC6); + reg = xe_mmio_read32(>->mmio, GT_GFX_RC6); return reg; } @@ -652,7 +652,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u64 reg; - reg = xe_mmio_read32(gt, MTL_MEDIA_MC6); + reg = xe_mmio_read32(>->mmio, MTL_MEDIA_MC6); return reg; } @@ -665,9 +665,9 @@ static void mtl_init_fused_rp_values(struct xe_guc_pc *pc) xe_device_assert_mem_access(pc_to_xe(pc)); if (xe_gt_is_media_type(gt)) - reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP); + reg = xe_mmio_read32(>->mmio, MTL_MEDIAP_STATE_CAP); else - reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP); + reg = xe_mmio_read32(>->mmio, MTL_RP_STATE_CAP); pc->rp0_freq = decode_freq(REG_FIELD_GET(MTL_RP0_CAP_MASK, reg)); @@ -683,9 +683,9 @@ static void tgl_init_fused_rp_values(struct xe_guc_pc *pc) xe_device_assert_mem_access(pc_to_xe(pc)); if (xe->info.platform == XE_PVC) - reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP); + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); else - reg = xe_mmio_read32(gt, RP_STATE_CAP); + reg = xe_mmio_read32(>->mmio, RP_STATE_CAP); pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c index ade6162dc259..8f62de026724 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay.c +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -5,6 +5,7 @@ #include <linux/bitfield.h> #include <linux/delay.h> +#include <linux/fault-inject.h> #include <drm/drm_managed.h> @@ -355,6 +356,7 @@ int xe_guc_relay_init(struct xe_guc_relay *relay) return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay); } +ALLOW_ERROR_INJECTION(xe_guc_relay_init, ERRNO); /* See xe_pci_probe() */ static u32 to_relay_error(int err) { diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index fbbe6a487bbb..8a5c21a87977 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -276,10 +276,26 @@ static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) } #endif +static void xe_guc_submit_fini(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = wait_event_timeout(guc->submission_state.fini_wq, + xa_empty(&guc->submission_state.exec_queue_lookup), + HZ * 5); + + drain_workqueue(xe->destroy_wq); + + xe_gt_assert(gt, ret); +} + static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + xe_guc_submit_fini(guc); xa_destroy(&guc->submission_state.exec_queue_lookup); free_submit_wq(guc); } @@ -290,9 +306,15 @@ static void guc_submit_wedged_fini(void *arg) struct xe_exec_queue *q; unsigned long index; - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (exec_queue_wedged(q)) + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + if (exec_queue_wedged(q)) { + mutex_unlock(&guc->submission_state.lock); xe_exec_queue_put(q); + mutex_lock(&guc->submission_state.lock); + } + } + mutex_unlock(&guc->submission_state.lock); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -345,6 +367,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) xa_init(&guc->submission_state.exec_queue_lookup); + init_waitqueue_head(&guc->submission_state.fini_wq); + primelockdep(guc); return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); @@ -361,12 +385,14 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa xe_guc_id_mgr_release_locked(&guc->submission_state.idm, q->guc->id, q->width); + + if (xa_empty(&guc->submission_state.exec_queue_lookup)) + wake_up(&guc->submission_state.fini_wq); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) { int ret; - void *ptr; int i; /* @@ -386,12 +412,10 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) q->guc->id = ret; for (i = 0; i < q->width; ++i) { - ptr = xa_store(&guc->submission_state.exec_queue_lookup, - q->guc->id + i, q, GFP_NOWAIT); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); + ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, + q->guc->id + i, q, GFP_NOWAIT)); + if (ret) goto err_release; - } } return 0; @@ -1268,13 +1292,16 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) static void guc_exec_queue_fini_async(struct xe_exec_queue *q) { + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) __guc_exec_queue_fini_async(&q->guc->fini_async); else - queue_work(system_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->fini_async); } static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) @@ -1796,6 +1823,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q) } xe_sched_submission_start(sched); + xe_sched_submission_resume_tdr(sched); } int xe_guc_submit_start(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 546ac6350a31..69046f698271 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -81,6 +81,8 @@ struct xe_guc { #endif /** @submission_state.enabled: submission is enabled */ bool enabled; + /** @submission_state.fini_wq: submit fini wait queue */ + wait_queue_head_t fini_wq; } submission_state; /** @hwconfig: Hardware config state */ struct { diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index f5459f97af23..77c5830309cf 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -229,7 +229,7 @@ bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type) { struct xe_gt *gt = huc_to_gt(huc); - return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val; + return xe_mmio_read32(>->mmio, huc_auth_modes[type].reg) & huc_auth_modes[type].val; } int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) @@ -268,7 +268,7 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) goto fail; } - ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val, + ret = xe_mmio_wait32(>->mmio, huc_auth_modes[type].reg, huc_auth_modes[type].val, huc_auth_modes[type].val, 100000, NULL, false); if (ret) { xe_gt_err(gt, "HuC: firmware not verified: %pe\n", ERR_PTR(ret)); @@ -308,7 +308,7 @@ void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) return; drm_printf(p, "\nHuC status: 0x%08x\n", - xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO)); + xe_mmio_read32(>->mmio, HUC_KERNEL_LOAD_INFO)); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c9c3beb3ce8d..ea6d9ef7fab6 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -12,6 +12,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_irq_regs.h" #include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" @@ -295,7 +296,7 @@ void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, reg.addr += hwe->mmio_base; - xe_mmio_write32(hwe->gt, reg, val); + xe_mmio_write32(&hwe->gt->mmio, reg, val); } /** @@ -315,7 +316,7 @@ u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) reg.addr += hwe->mmio_base; - return xe_mmio_read32(hwe->gt, reg); + return xe_mmio_read32(&hwe->gt->mmio, reg); } void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) @@ -324,7 +325,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) - xe_mmio_write32(hwe->gt, RCU_MODE, + xe_mmio_write32(&hwe->gt->mmio, RCU_MODE, _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); @@ -354,7 +355,7 @@ static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt, hwe->class != XE_ENGINE_CLASS_RENDER) return false; - return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE; + return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE; } void @@ -460,6 +461,30 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); } +static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) +{ + const struct engine_info *info; + enum xe_hw_engine_id id; + + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { + info = &engine_infos[id]; + if (info->class == class && info->instance == instance) + return info; + } + + return NULL; +} + +static u16 get_msix_irq_offset(struct xe_gt *gt, enum xe_engine_class class) +{ + /* For MSI-X, hw engines report to offset of engine instance zero */ + const struct engine_info *info = find_engine_info(class, 0); + + xe_gt_assert(gt, info); + + return info ? info->irq_offset : 0; +} + static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, enum xe_hw_engine_id id) { @@ -479,7 +504,9 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->class = info->class; hwe->instance = info->instance; hwe->mmio_base = info->mmio_base; - hwe->irq_offset = info->irq_offset; + hwe->irq_offset = xe_device_has_msix(gt_to_xe(gt)) ? + get_msix_irq_offset(gt, info->class) : + info->irq_offset; hwe->domain = info->domain; hwe->name = info->name; hwe->fence_irq = >->fence_irq[info->class]; @@ -612,7 +639,7 @@ static void read_media_fuses(struct xe_gt *gt) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE); + media_fuse = xe_mmio_read32(>->mmio, GT_VEBOX_VDBOX_DISABLE); /* * Pre-Xe_HP platforms had register bits representing absent engines, @@ -657,7 +684,7 @@ static void read_copy_fuses(struct xe_gt *gt) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3); + bcs_mask = xe_mmio_read32(>->mmio, MIRROR_FUSE3); bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); /* BCS0 is always present; only BCS1-BCS8 may be fused off */ @@ -704,7 +731,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); u32 ccs_mask; - ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4); + ccs_mask = xe_mmio_read32(>->mmio, XEHP_FUSE4); ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask); for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { @@ -742,8 +769,8 @@ static void check_gsc_availability(struct xe_gt *gt) gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0); /* interrupts where previously enabled, so turn them off */ - xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0); - xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0); + xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); + xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); } @@ -809,6 +836,7 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot) { struct xe_gt *gt = hwe->gt; + struct xe_mmio *mmio = >->mmio; struct xe_device *xe = gt_to_xe(gt); unsigned int dss; u16 group, instance; @@ -820,11 +848,11 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, if (is_slice_common_per_gslice(xe) == false) { snapshot->reg.instdone.slice_common[0] = - xe_mmio_read32(gt, SC_INSTDONE); + xe_mmio_read32(mmio, SC_INSTDONE); snapshot->reg.instdone.slice_common_extra[0] = - xe_mmio_read32(gt, SC_INSTDONE_EXTRA); + xe_mmio_read32(mmio, SC_INSTDONE_EXTRA); snapshot->reg.instdone.slice_common_extra2[0] = - xe_mmio_read32(gt, SC_INSTDONE_EXTRA2); + xe_mmio_read32(mmio, SC_INSTDONE_EXTRA2); } else { for_each_geometry_dss(dss, gt, group, instance) { snapshot->reg.instdone.slice_common[dss] = @@ -903,6 +931,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain); snapshot->mmio_base = hwe->mmio_base; + snapshot->kernel_reserved = xe_hw_engine_is_reserved(hwe); /* no more VF accessible data below this point */ if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) @@ -959,7 +988,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) - snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); + snapshot->reg.rcu_mode = xe_mmio_read32(&hwe->gt->mmio, RCU_MODE); return snapshot; } @@ -1025,6 +1054,8 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, snapshot->logical_instance); drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", snapshot->forcewake.domain, snapshot->forcewake.ref); + drm_printf(p, "\tReserved: %s\n", + str_yes_no(snapshot->kernel_reserved)); drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam); drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga); drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n", @@ -1150,7 +1181,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class) u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe) { - return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base)); + return xe_mmio_read64_2x32(&hwe->gt->mmio, RING_TIMESTAMP(hwe->mmio_base)); } enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 8be6d420ece4..be60edb3e673 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -173,6 +173,8 @@ struct xe_hw_engine_snapshot { } forcewake; /** @mmio_base: MMIO base address of this hw engine*/ u32 mmio_base; + /** @kernel_reserved: Engine reserved, can't be used by userspace */ + bool kernel_reserved; /** @reg: Useful MMIO register snapshot */ struct { /** @reg.ring_execlist_status: RING_EXECLIST_STATUS */ diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index aa11728e7e79..fde56dad3ab7 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -149,7 +149,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *v u64 reg_val, min, max; struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; - struct xe_gt *mmio = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); @@ -190,7 +190,7 @@ unlock: static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) { - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); int ret = 0; u64 reg_val; struct xe_reg rapl_limit; @@ -222,7 +222,7 @@ unlock: static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) { - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); u64 reg_val; @@ -259,7 +259,7 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, l static void xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) { - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; u64 reg_val; @@ -282,7 +282,7 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at char *buf) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; int sensor_index = to_sensor_dev_attr(attr)->index; @@ -323,7 +323,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a const char *buf, size_t count) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); u32 x, y, rxy, x_w = 2; /* 2 bits */ u64 tau4, r, max_win; unsigned long val; @@ -498,7 +498,7 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value) { - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); u64 reg_val; reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel)); @@ -781,7 +781,7 @@ static const struct hwmon_chip_info hwmon_chip_info = { static void xe_hwmon_get_preregistration_info(struct xe_device *xe) { - struct xe_gt *mmio = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct xe_hwmon *hwmon = xe->hwmon; long energy; u64 val_sku_unit = 0; diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5f2c368c35ad..b7995ebd54ab 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -10,8 +10,7 @@ #include <drm/drm_managed.h> #include "display/xe_display.h" -#include "regs/xe_gt_regs.h" -#include "regs/xe_regs.h" +#include "regs/xe_irq_regs.h" #include "xe_device.h" #include "xe_drv.h" #include "xe_gsc_proxy.h" @@ -30,14 +29,14 @@ #define IIR(offset) XE_REG(offset + 0x8) #define IER(offset) XE_REG(offset + 0xc) -static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg) +static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg) { u32 val = xe_mmio_read32(mmio, reg); if (val == 0) return; - drm_WARN(>_to_xe(mmio)->drm, 1, + drm_WARN(&mmio->tile->xe->drm, 1, "Interrupt register 0x%x is not zero: 0x%08x\n", reg.addr, val); xe_mmio_write32(mmio, reg, 0xffffffff); @@ -52,7 +51,7 @@ static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg) */ static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits) { - struct xe_gt *mmio = tile->primary_gt; + struct xe_mmio *mmio = &tile->mmio; /* * If we're just enabling an interrupt now, it shouldn't already @@ -70,7 +69,7 @@ static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits) /* Mask and disable all interrupts. */ static void mask_and_disable(struct xe_tile *tile, u32 irqregs) { - struct xe_gt *mmio = tile->primary_gt; + struct xe_mmio *mmio = &tile->mmio; xe_mmio_write32(mmio, IMR(irqregs), ~0); /* Posting read */ @@ -87,7 +86,7 @@ static void mask_and_disable(struct xe_tile *tile, u32 irqregs) static u32 xelp_intr_disable(struct xe_device *xe) { - struct xe_gt *mmio = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); xe_mmio_write32(mmio, GFX_MSTR_IRQ, 0); @@ -103,7 +102,7 @@ static u32 xelp_intr_disable(struct xe_device *xe) static u32 gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl) { - struct xe_gt *mmio = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); u32 iir; if (!(master_ctl & GU_MISC_IRQ)) @@ -118,7 +117,7 @@ gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl) static inline void xelp_intr_enable(struct xe_device *xe, bool stall) { - struct xe_gt *mmio = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); xe_mmio_write32(mmio, GFX_MSTR_IRQ, MASTER_IRQ); if (stall) @@ -129,12 +128,13 @@ static inline void xelp_intr_enable(struct xe_device *xe, bool stall) void xe_irq_enable_hwe(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + struct xe_mmio *mmio = >->mmio; u32 ccs_mask, bcs_mask; u32 irqs, dmask, smask; u32 gsc_mask = 0; u32 heci_mask = 0; - if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) + if (xe_device_uses_memirq(xe)) return; if (xe_device_uc_enabled(xe)) { @@ -155,35 +155,35 @@ void xe_irq_enable_hwe(struct xe_gt *gt) if (!xe_gt_is_media_type(gt)) { /* Enable interrupts for each engine class */ - xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask); + xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); if (ccs_mask) - xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask); + xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask); /* Unmask interrupts for each engine instance */ - xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask); - xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask); + xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask); + xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask); if (bcs_mask & (BIT(1)|BIT(2))) - xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); if (bcs_mask & (BIT(3)|BIT(4))) - xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); if (bcs_mask & (BIT(5)|BIT(6))) - xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); if (bcs_mask & (BIT(7)|BIT(8))) - xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); if (ccs_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask); } if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) { /* Enable interrupts for each engine class */ - xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask); + xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask); /* Unmask interrupts for each engine instance */ - xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask); - xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask); - xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask); /* * the heci2 interrupt is enabled via the same register as the @@ -197,17 +197,17 @@ void xe_irq_enable_hwe(struct xe_gt *gt) } if (gsc_mask) { - xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask); - xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask); + xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask); + xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~gsc_mask); } if (heci_mask) - xe_mmio_write32(gt, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16)); + xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16)); } } static u32 gt_engine_identity(struct xe_device *xe, - struct xe_gt *mmio, + struct xe_mmio *mmio, const unsigned int bank, const unsigned int bit) { @@ -279,7 +279,7 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile, return tile->media_gt; default: break; - }; + } fallthrough; default: return tile->primary_gt; @@ -291,7 +291,7 @@ static void gt_irq_handler(struct xe_tile *tile, u32 *identity) { struct xe_device *xe = tile_to_xe(tile); - struct xe_gt *mmio = tile->primary_gt; + struct xe_mmio *mmio = &tile->mmio; unsigned int bank, bit; u16 instance, intr_vec; enum xe_engine_class class; @@ -376,7 +376,7 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) static u32 dg1_intr_disable(struct xe_device *xe) { - struct xe_gt *mmio = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); u32 val; /* First disable interrupts */ @@ -394,7 +394,7 @@ static u32 dg1_intr_disable(struct xe_device *xe) static void dg1_intr_enable(struct xe_device *xe, bool stall) { - struct xe_gt *mmio = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ); if (stall) @@ -431,7 +431,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) } for_each_tile(tile, xe, id) { - struct xe_gt *mmio = tile->primary_gt; + struct xe_mmio *mmio = &tile->mmio; if ((master_tile_ctl & DG1_MSTR_TILE(tile->id)) == 0) continue; @@ -474,7 +474,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) static void gt_irq_reset(struct xe_tile *tile) { - struct xe_gt *mmio = tile->primary_gt; + struct xe_mmio *mmio = &tile->mmio; u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, XE_ENGINE_CLASS_COMPUTE); @@ -504,7 +504,7 @@ static void gt_irq_reset(struct xe_tile *tile) if (ccs_mask & (BIT(0)|BIT(1))) xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); + xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); if ((tile->media_gt && xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) || @@ -547,7 +547,7 @@ static void dg1_irq_reset(struct xe_tile *tile) static void dg1_irq_reset_mstr(struct xe_tile *tile) { - struct xe_gt *mmio = tile->primary_gt; + struct xe_mmio *mmio = &tile->mmio; xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0); } @@ -566,7 +566,7 @@ static void vf_irq_reset(struct xe_device *xe) for_each_tile(tile, xe, id) { if (xe_device_has_memirq(xe)) - xe_memirq_reset(&tile->sriov.vf.memirq); + xe_memirq_reset(&tile->memirq); else gt_irq_reset(tile); } @@ -609,7 +609,7 @@ static void vf_irq_postinstall(struct xe_device *xe) for_each_tile(tile, xe, id) if (xe_device_has_memirq(xe)) - xe_memirq_postinstall(&tile->sriov.vf.memirq); + xe_memirq_postinstall(&tile->memirq); if (GRAPHICS_VERx100(xe) < 1210) xelp_intr_enable(xe, true); @@ -652,7 +652,7 @@ static irqreturn_t vf_mem_irq_handler(int irq, void *arg) spin_unlock(&xe->irq.lock); for_each_tile(tile, xe, id) - xe_memirq_handler(&tile->sriov.vf.memirq); + xe_memirq_handler(&tile->memirq); return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 8999ac511555..a60ceae4c6dd 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -193,7 +193,7 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt) lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo)); lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K)); - xe_mmio_write32(tile->primary_gt, + xe_mmio_write32(&tile->mmio, GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index aec7db39c061..f0976230012a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -599,10 +599,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) { - struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; + struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq; struct xe_device *xe = gt_to_xe(hwe->gt); - if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) + if (!xe_device_uses_memirq(xe)) return; regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | @@ -613,9 +613,9 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; - regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); + regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; - regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); + regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); } static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 95b6e9d7b7db..e3610cb90bb9 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -5,8 +5,8 @@ #include <drm/drm_managed.h> -#include "regs/xe_gt_regs.h" #include "regs/xe_guc_regs.h" +#include "regs/xe_irq_regs.h" #include "regs/xe_regs.h" #include "xe_assert.h" @@ -19,15 +19,25 @@ #include "xe_hw_engine.h" #include "xe_map.h" #include "xe_memirq.h" -#include "xe_sriov.h" -#include "xe_sriov_printk.h" #define memirq_assert(m, condition) xe_tile_assert(memirq_to_tile(m), condition) -#define memirq_debug(m, msg...) xe_sriov_dbg_verbose(memirq_to_xe(m), "MEMIRQ: " msg) +#define memirq_printk(m, _level, _fmt, ...) \ + drm_##_level(&memirq_to_xe(m)->drm, "MEMIRQ%u: " _fmt, \ + memirq_to_tile(m)->id, ##__VA_ARGS__) + +#ifdef CONFIG_DRM_XE_DEBUG_MEMIRQ +#define memirq_debug(m, _fmt, ...) memirq_printk(m, dbg, _fmt, ##__VA_ARGS__) +#else +#define memirq_debug(...) +#endif + +#define memirq_err(m, _fmt, ...) memirq_printk(m, err, _fmt, ##__VA_ARGS__) +#define memirq_err_ratelimited(m, _fmt, ...) \ + memirq_printk(m, err_ratelimited, _fmt, ##__VA_ARGS__) static struct xe_tile *memirq_to_tile(struct xe_memirq *memirq) { - return container_of(memirq, struct xe_tile, sriov.vf.memirq); + return container_of(memirq, struct xe_tile, memirq); } static struct xe_device *memirq_to_xe(struct xe_memirq *memirq) @@ -105,6 +115,44 @@ static const char *guc_name(struct xe_guc *guc) * | | * | | * +-----------+ + * + * + * MSI-X use case + * + * When using MSI-X, hw engines report interrupt status and source to engine + * instance 0. For this scenario, in order to differentiate between the + * engines, we need to pass different status/source pointers in the LRC. + * + * The requirements on those pointers are: + * - Interrupt status should be 4KiB aligned + * - Interrupt source should be 64 bytes aligned + * + * To accommodate this, we duplicate the memirq page layout above - + * allocating a page for each engine instance and pass this page in the LRC. + * Note that the same page can be reused for different engine types. + * For example, an LRC executing on CCS #x will have pointers to page #x, + * and an LRC executing on BCS #x will have the same pointers. + * + * :: + * + * 0x0000 +==============================+ <== page for instance 0 (BCS0, CCS0, etc.) + * | Interrupt Status Report Page | + * 0x0400 +==============================+ + * | Interrupt Source Report Page | + * 0x0440 +==============================+ + * | Interrupt Enable Mask | + * +==============================+ + * | Not used | + * 0x1000 +==============================+ <== page for instance 1 (BCS1, CCS1, etc.) + * | Interrupt Status Report Page | + * 0x1400 +==============================+ + * | Interrupt Source Report Page | + * 0x1440 +==============================+ + * | Not used | + * 0x2000 +==============================+ <== page for instance 2 (BCS2, CCS2, etc.) + * | ... | + * +==============================+ + * */ static void __release_xe_bo(struct drm_device *drm, void *arg) @@ -114,18 +162,30 @@ static void __release_xe_bo(struct drm_device *drm, void *arg) xe_bo_unpin_map_no_vm(bo); } +static inline bool hw_reports_to_instance_zero(struct xe_memirq *memirq) +{ + /* + * When the HW engines are configured to use MSI-X, + * they report interrupt status and source to the offset of + * engine instance 0. + */ + return xe_device_has_msix(memirq_to_xe(memirq)); +} + static int memirq_alloc_pages(struct xe_memirq *memirq) { struct xe_device *xe = memirq_to_xe(memirq); struct xe_tile *tile = memirq_to_tile(memirq); + size_t bo_size = hw_reports_to_instance_zero(memirq) ? + XE_HW_ENGINE_MAX_INSTANCE * SZ_4K : SZ_4K; struct xe_bo *bo; int err; - BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET, SZ_64)); - BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET, SZ_4K)); + BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET(0), SZ_64)); + BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET(0), SZ_4K)); /* XXX: convert to managed bo */ - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, + bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | @@ -140,25 +200,25 @@ static int memirq_alloc_pages(struct xe_memirq *memirq) memirq_assert(memirq, !xe_bo_is_vram(bo)); memirq_assert(memirq, !memirq->bo); - iosys_map_memset(&bo->vmap, 0, 0, SZ_4K); + iosys_map_memset(&bo->vmap, 0, 0, bo_size); memirq->bo = bo; - memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET); - memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET); + memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET(0)); + memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET(0)); memirq->mask = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_ENABLE_OFFSET); memirq_assert(memirq, !memirq->source.is_iomem); memirq_assert(memirq, !memirq->status.is_iomem); memirq_assert(memirq, !memirq->mask.is_iomem); - memirq_debug(memirq, "page offsets: source %#x status %#x\n", - xe_memirq_source_ptr(memirq), xe_memirq_status_ptr(memirq)); + memirq_debug(memirq, "page offsets: bo %#x bo_size %zu source %#x status %#x\n", + xe_bo_ggtt_addr(bo), bo_size, XE_MEMIRQ_SOURCE_OFFSET(0), + XE_MEMIRQ_STATUS_OFFSET(0)); return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo); out: - xe_sriov_err(memirq_to_xe(memirq), - "Failed to allocate memirq page (%pe)\n", ERR_PTR(err)); + memirq_err(memirq, "Failed to allocate memirq page (%pe)\n", ERR_PTR(err)); return err; } @@ -178,9 +238,7 @@ static void memirq_set_enable(struct xe_memirq *memirq, bool enable) * * These allocations are managed and will be implicitly released on unload. * - * Note: This function shall be called only by the VF driver. - * - * If this function fails then VF driver won't be able to operate correctly. + * If this function fails then the driver won't be able to operate correctly. * If `Memory Based Interrupts`_ are not used this function will return 0. * * Return: 0 on success or a negative error code on failure. @@ -190,9 +248,7 @@ int xe_memirq_init(struct xe_memirq *memirq) struct xe_device *xe = memirq_to_xe(memirq); int err; - memirq_assert(memirq, IS_SRIOV_VF(xe)); - - if (!xe_device_has_memirq(xe)) + if (!xe_device_uses_memirq(xe)) return 0; err = memirq_alloc_pages(memirq); @@ -208,52 +264,59 @@ int xe_memirq_init(struct xe_memirq *memirq) /** * xe_memirq_source_ptr - Get GGTT's offset of the `Interrupt Source Report Page`_. * @memirq: the &xe_memirq to query + * @hwe: the hw engine for which we want the report page * - * Shall be called only on VF driver when `Memory Based Interrupts`_ are used + * Shall be called when `Memory Based Interrupts`_ are used * and xe_memirq_init() didn't fail. * * Return: GGTT's offset of the `Interrupt Source Report Page`_. */ -u32 xe_memirq_source_ptr(struct xe_memirq *memirq) +u32 xe_memirq_source_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe) { - memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); - memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + u16 instance; + + memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq))); memirq_assert(memirq, memirq->bo); - return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET; + instance = hw_reports_to_instance_zero(memirq) ? hwe->instance : 0; + + return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET(instance); } /** * xe_memirq_status_ptr - Get GGTT's offset of the `Interrupt Status Report Page`_. * @memirq: the &xe_memirq to query + * @hwe: the hw engine for which we want the report page * - * Shall be called only on VF driver when `Memory Based Interrupts`_ are used + * Shall be called when `Memory Based Interrupts`_ are used * and xe_memirq_init() didn't fail. * * Return: GGTT's offset of the `Interrupt Status Report Page`_. */ -u32 xe_memirq_status_ptr(struct xe_memirq *memirq) +u32 xe_memirq_status_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe) { - memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); - memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + u16 instance; + + memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq))); memirq_assert(memirq, memirq->bo); - return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET; + instance = hw_reports_to_instance_zero(memirq) ? hwe->instance : 0; + + return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET(instance); } /** * xe_memirq_enable_ptr - Get GGTT's offset of the Interrupt Enable Mask. * @memirq: the &xe_memirq to query * - * Shall be called only on VF driver when `Memory Based Interrupts`_ are used + * Shall be called when `Memory Based Interrupts`_ are used * and xe_memirq_init() didn't fail. * * Return: GGTT's offset of the Interrupt Enable Mask. */ u32 xe_memirq_enable_ptr(struct xe_memirq *memirq) { - memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); - memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq))); memirq_assert(memirq, memirq->bo); return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_ENABLE_OFFSET; @@ -267,7 +330,7 @@ u32 xe_memirq_enable_ptr(struct xe_memirq *memirq) * Register `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_ * to be used by the GuC when `Memory Based Interrupts`_ are required. * - * Shall be called only on VF driver when `Memory Based Interrupts`_ are used + * Shall be called when `Memory Based Interrupts`_ are used * and xe_memirq_init() didn't fail. * * Return: 0 on success or a negative error code on failure. @@ -279,12 +342,11 @@ int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc) u32 source, status; int err; - memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); - memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq))); memirq_assert(memirq, memirq->bo); - source = xe_memirq_source_ptr(memirq) + offset; - status = xe_memirq_status_ptr(memirq) + offset * SZ_16; + source = xe_memirq_source_ptr(memirq, NULL) + offset; + status = xe_memirq_status_ptr(memirq, NULL) + offset * SZ_16; err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY, source); @@ -299,9 +361,8 @@ int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc) return 0; failed: - xe_sriov_err(memirq_to_xe(memirq), - "Failed to setup report pages in %s (%pe)\n", - guc_name(guc), ERR_PTR(err)); + memirq_err(memirq, "Failed to setup report pages in %s (%pe)\n", + guc_name(guc), ERR_PTR(err)); return err; } @@ -311,13 +372,12 @@ failed: * * This is part of the driver IRQ setup flow. * - * This function shall only be used by the VF driver on platforms that use + * This function shall only be used on platforms that use * `Memory Based Interrupts`_. */ void xe_memirq_reset(struct xe_memirq *memirq) { - memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); - memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq))); if (memirq->bo) memirq_set_enable(memirq, false); @@ -329,13 +389,12 @@ void xe_memirq_reset(struct xe_memirq *memirq) * * This is part of the driver IRQ setup flow. * - * This function shall only be used by the VF driver on platforms that use + * This function shall only be used on platforms that use * `Memory Based Interrupts`_. */ void xe_memirq_postinstall(struct xe_memirq *memirq) { - memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); - memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_uses_memirq(memirq_to_xe(memirq))); if (memirq->bo) memirq_set_enable(memirq, true); @@ -349,9 +408,9 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, value = iosys_map_rd(vector, offset, u8); if (value) { if (value != 0xff) - xe_sriov_err_ratelimited(memirq_to_xe(memirq), - "Unexpected memirq value %#x from %s at %u\n", - value, name, offset); + memirq_err_ratelimited(memirq, + "Unexpected memirq value %#x from %s at %u\n", + value, name, offset); iosys_map_wr(vector, offset, u8, 0x00); } @@ -379,6 +438,28 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat } /** + * xe_memirq_hwe_handler - Check and process interrupts for a specific HW engine. + * @memirq: the &xe_memirq + * @hwe: the hw engine to process + * + * This function reads and dispatches `Memory Based Interrupts` for the provided HW engine. + */ +void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe) +{ + u16 offset = hwe->irq_offset; + u16 instance = hw_reports_to_instance_zero(memirq) ? hwe->instance : 0; + struct iosys_map src_offset = IOSYS_MAP_INIT_OFFSET(&memirq->bo->vmap, + XE_MEMIRQ_SOURCE_OFFSET(instance)); + + if (memirq_received(memirq, &src_offset, offset, "SRC")) { + struct iosys_map status_offset = + IOSYS_MAP_INIT_OFFSET(&memirq->bo->vmap, + XE_MEMIRQ_STATUS_OFFSET(instance) + offset * SZ_16); + memirq_dispatch_engine(memirq, &status_offset, hwe); + } +} + +/** * xe_memirq_handler - The `Memory Based Interrupts`_ Handler. * @memirq: the &xe_memirq * @@ -405,13 +486,8 @@ void xe_memirq_handler(struct xe_memirq *memirq) if (gt->tile != tile) continue; - for_each_hw_engine(hwe, gt, id) { - if (memirq_received(memirq, &memirq->source, hwe->irq_offset, "SRC")) { - map = IOSYS_MAP_INIT_OFFSET(&memirq->status, - hwe->irq_offset * SZ_16); - memirq_dispatch_engine(memirq, &map, hwe); - } - } + for_each_hw_engine(hwe, gt, id) + xe_memirq_hwe_handler(memirq, hwe); } /* GuC and media GuC (if present) must be checked separately */ diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h index 2d40d03c3095..06130650e9d6 100644 --- a/drivers/gpu/drm/xe/xe_memirq.h +++ b/drivers/gpu/drm/xe/xe_memirq.h @@ -9,16 +9,18 @@ #include <linux/types.h> struct xe_guc; +struct xe_hw_engine; struct xe_memirq; int xe_memirq_init(struct xe_memirq *memirq); -u32 xe_memirq_source_ptr(struct xe_memirq *memirq); -u32 xe_memirq_status_ptr(struct xe_memirq *memirq); +u32 xe_memirq_source_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe); +u32 xe_memirq_status_ptr(struct xe_memirq *memirq, struct xe_hw_engine *hwe); u32 xe_memirq_enable_ptr(struct xe_memirq *memirq); void xe_memirq_reset(struct xe_memirq *memirq); void xe_memirq_postinstall(struct xe_memirq *memirq); +void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe); void xe_memirq_handler(struct xe_memirq *memirq); int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_memirq_types.h b/drivers/gpu/drm/xe/xe_memirq_types.h index 625b6b8736cc..9d0f6c1cdb9d 100644 --- a/drivers/gpu/drm/xe/xe_memirq_types.h +++ b/drivers/gpu/drm/xe/xe_memirq_types.h @@ -11,9 +11,9 @@ struct xe_bo; /* ISR */ -#define XE_MEMIRQ_STATUS_OFFSET 0x0 +#define XE_MEMIRQ_STATUS_OFFSET(inst) ((inst) * SZ_4K + 0x0) /* IIR */ -#define XE_MEMIRQ_SOURCE_OFFSET 0x400 +#define XE_MEMIRQ_SOURCE_OFFSET(inst) ((inst) * SZ_4K + 0x400) /* IMR */ #define XE_MEMIRQ_ENABLE_OFFSET 0x440 diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 3fd462fda625..a48f239cad1c 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -36,13 +36,19 @@ static void tiles_fini(void *arg) /* * On multi-tile devices, partition the BAR space for MMIO on each tile, * possibly accounting for register override on the number of tiles available. + * tile_mmio_size contains both the tile's 4MB register space, as well as + * additional space for the GTT and other (possibly unused) regions). * Resulting memory layout is like below: * * .----------------------. <- tile_count * tile_mmio_size * | .... | * |----------------------| <- 2 * tile_mmio_size + * | tile1 GTT + other | + * |----------------------| <- 1 * tile_mmio_size + 4MB * | tile1->mmio.regs | * |----------------------| <- 1 * tile_mmio_size + * | tile0 GTT + other | + * |----------------------| <- 4MB * | tile0->mmio.regs | * '----------------------' <- 0MB */ @@ -61,16 +67,16 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) /* Possibly override number of tile based on configuration register */ if (!xe->info.skip_mtcfg) { - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); u8 tile_count; u32 mtcfg; /* * Although the per-tile mmio regs are not yet initialized, this - * is fine as it's going to the root gt, that's guaranteed to be - * initialized earlier in xe_mmio_init() + * is fine as it's going to the root tile's mmio, that's + * guaranteed to be initialized earlier in xe_mmio_init() */ - mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); + mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; if (tile_count < xe->info.tile_count) { @@ -90,8 +96,9 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) regs = xe->mmio.regs; for_each_tile(tile, xe, id) { - tile->mmio.size = tile_mmio_size; + tile->mmio.regs_size = SZ_4M; tile->mmio.regs = regs; + tile->mmio.tile = tile; regs += tile_mmio_size; } } @@ -126,8 +133,9 @@ static void mmio_extension_setup(struct xe_device *xe, size_t tile_mmio_size, regs = xe->mmio.regs + tile_mmio_size * xe->info.tile_count; for_each_tile(tile, xe, id) { - tile->mmio_ext.size = tile_mmio_ext_size; + tile->mmio_ext.regs_size = tile_mmio_ext_size; tile->mmio_ext.regs = regs; + tile->mmio_ext.tile = tile; regs += tile_mmio_ext_size; } } @@ -157,137 +165,132 @@ int xe_mmio_init(struct xe_device *xe) { struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - const int mmio_bar = 0; /* * Map the entire BAR. * The first 16MB of the BAR, belong to the root tile, and include: * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). */ - xe->mmio.size = pci_resource_len(pdev, mmio_bar); - xe->mmio.regs = pci_iomap(pdev, mmio_bar, GTTMMADR_BAR); + xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR); + xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0); if (xe->mmio.regs == NULL) { drm_err(&xe->drm, "failed to map registers\n"); return -EIO; } /* Setup first tile; other tiles (if present) will be setup later. */ - root_tile->mmio.size = SZ_16M; + root_tile->mmio.regs_size = SZ_4M; root_tile->mmio.regs = xe->mmio.regs; + root_tile->mmio.tile = root_tile; return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } -static void mmio_flush_pending_writes(struct xe_gt *gt) +static void mmio_flush_pending_writes(struct xe_mmio *mmio) { #define DUMMY_REG_OFFSET 0x130030 - struct xe_tile *tile = gt_to_tile(gt); int i; - if (tile->xe->info.platform != XE_LUNARLAKE) + if (mmio->tile->xe->info.platform != XE_LUNARLAKE) return; /* 4 dummy writes */ for (i = 0; i < 4; i++) - writel(0, tile->mmio.regs + DUMMY_REG_OFFSET); + writel(0, mmio->regs + DUMMY_REG_OFFSET); } -u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) +u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg) { - struct xe_tile *tile = gt_to_tile(gt); - u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u8 val; /* Wa_15015404425 */ - mmio_flush_pending_writes(gt); + mmio_flush_pending_writes(mmio); - val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); - trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); + val = readb(mmio->regs + addr); + trace_xe_reg_rw(mmio, false, addr, val, sizeof(val)); return val; } -u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) +u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg) { - struct xe_tile *tile = gt_to_tile(gt); - u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u16 val; /* Wa_15015404425 */ - mmio_flush_pending_writes(gt); + mmio_flush_pending_writes(mmio); - val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); - trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); + val = readw(mmio->regs + addr); + trace_xe_reg_rw(mmio, false, addr, val, sizeof(val)); return val; } -void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) +void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val) { - struct xe_tile *tile = gt_to_tile(gt); - u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); - trace_xe_reg_rw(gt, true, addr, val, sizeof(val)); + trace_xe_reg_rw(mmio, true, addr, val, sizeof(val)); - if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) - xe_gt_sriov_vf_write32(gt, reg, val); + if (!reg.vf && mmio->sriov_vf_gt) + xe_gt_sriov_vf_write32(mmio->sriov_vf_gt, reg, val); else - writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + writel(val, mmio->regs + addr); } -u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) +u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) { - struct xe_tile *tile = gt_to_tile(gt); - u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u32 val; /* Wa_15015404425 */ - mmio_flush_pending_writes(gt); + mmio_flush_pending_writes(mmio); - if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) - val = xe_gt_sriov_vf_read32(gt, reg); + if (!reg.vf && mmio->sriov_vf_gt) + val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt, reg); else - val = readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); + val = readl(mmio->regs + addr); - trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); + trace_xe_reg_rw(mmio, false, addr, val, sizeof(val)); return val; } -u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set) +u32 xe_mmio_rmw32(struct xe_mmio *mmio, struct xe_reg reg, u32 clr, u32 set) { u32 old, reg_val; - old = xe_mmio_read32(gt, reg); + old = xe_mmio_read32(mmio, reg); reg_val = (old & ~clr) | set; - xe_mmio_write32(gt, reg, reg_val); + xe_mmio_write32(mmio, reg, reg_val); return old; } -int xe_mmio_write32_and_verify(struct xe_gt *gt, +int xe_mmio_write32_and_verify(struct xe_mmio *mmio, struct xe_reg reg, u32 val, u32 mask, u32 eval) { u32 reg_val; - xe_mmio_write32(gt, reg, val); - reg_val = xe_mmio_read32(gt, reg); + xe_mmio_write32(mmio, reg, val); + reg_val = xe_mmio_read32(mmio, reg); return (reg_val & mask) != eval ? -EINVAL : 0; } -bool xe_mmio_in_range(const struct xe_gt *gt, +bool xe_mmio_in_range(const struct xe_mmio *mmio, const struct xe_mmio_range *range, struct xe_reg reg) { - u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); + u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); return range && addr >= range->start && addr <= range->end; } /** * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads - * @gt: MMIO target GT + * @mmio: MMIO target * @reg: register to read value from * * Although Intel GPUs have some 64-bit registers, the hardware officially @@ -307,21 +310,21 @@ bool xe_mmio_in_range(const struct xe_gt *gt, * * Returns the value of the 64-bit register. */ -u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) +u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg) { struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; u32 ldw, udw, oldudw, retries; - reg.addr = xe_mmio_adjusted_addr(gt, reg.addr); - reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr); + reg.addr = xe_mmio_adjusted_addr(mmio, reg.addr); + reg_udw.addr = xe_mmio_adjusted_addr(mmio, reg_udw.addr); /* we shouldn't adjust just one register address */ - xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4); + xe_tile_assert(mmio->tile, reg_udw.addr == reg.addr + 0x4); - oldudw = xe_mmio_read32(gt, reg_udw); + oldudw = xe_mmio_read32(mmio, reg_udw); for (retries = 5; retries; --retries) { - ldw = xe_mmio_read32(gt, reg); - udw = xe_mmio_read32(gt, reg_udw); + ldw = xe_mmio_read32(mmio, reg); + udw = xe_mmio_read32(mmio, reg_udw); if (udw == oldudw) break; @@ -329,13 +332,13 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) oldudw = udw; } - xe_gt_WARN(gt, retries == 0, - "64-bit read of %#x did not stabilize\n", reg.addr); + drm_WARN(&mmio->tile->xe->drm, retries == 0, + "64-bit read of %#x did not stabilize\n", reg.addr); return (u64)udw << 32 | ldw; } -static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, +static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, u32 *out_val, bool atomic, bool expect_match) { ktime_t cur = ktime_get_raw(); @@ -346,7 +349,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v bool check; for (;;) { - read = xe_mmio_read32(gt, reg); + read = xe_mmio_read32(mmio, reg); check = (read & mask) == val; if (!expect_match) @@ -372,7 +375,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v } if (ret != 0) { - read = xe_mmio_read32(gt, reg); + read = xe_mmio_read32(mmio, reg); check = (read & mask) == val; if (!expect_match) @@ -390,7 +393,7 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v /** * xe_mmio_wait32() - Wait for a register to match the desired masked value - * @gt: MMIO target GT + * @mmio: MMIO target * @reg: register to read value from * @mask: mask to be applied to the value read from the register * @val: desired value after applying the mask @@ -407,15 +410,15 @@ static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 v * @timeout_us for different reasons, specially in non-atomic contexts. Thus, * it is possible that this function succeeds even after @timeout_us has passed. */ -int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, +int xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, u32 *out_val, bool atomic) { - return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, true); + return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, true); } /** * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value - * @gt: MMIO target GT + * @mmio: MMIO target * @reg: register to read value from * @mask: mask to be applied to the value read from the register * @val: value not to be matched after applying the mask @@ -426,8 +429,8 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t * This function works exactly like xe_mmio_wait32() with the exception that * @val is expected not to be matched. */ -int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, +int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, u32 *out_val, bool atomic) { - return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, false); + return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false); } diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 26551410ecc8..8a46f4006a84 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -14,25 +14,30 @@ struct xe_reg; int xe_mmio_init(struct xe_device *xe); int xe_mmio_probe_tiles(struct xe_device *xe); -u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg); -u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg); -void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); -u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg); -u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set); -int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval); -bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg); - -u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); -int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, - u32 *out_val, bool atomic); -int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, - u32 *out_val, bool atomic); - -static inline u32 xe_mmio_adjusted_addr(const struct xe_gt *gt, u32 addr) +u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg); +u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg); +void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val); +u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg); +u32 xe_mmio_rmw32(struct xe_mmio *mmio, struct xe_reg reg, u32 clr, u32 set); +int xe_mmio_write32_and_verify(struct xe_mmio *mmio, struct xe_reg reg, u32 val, u32 mask, u32 eval); +bool xe_mmio_in_range(const struct xe_mmio *mmio, const struct xe_mmio_range *range, struct xe_reg reg); + +u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg); +int xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, + u32 timeout_us, u32 *out_val, bool atomic); +int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, + u32 val, u32 timeout_us, u32 *out_val, bool atomic); + +static inline u32 xe_mmio_adjusted_addr(const struct xe_mmio *mmio, u32 addr) { - if (addr < gt->mmio.adj_limit) - addr += gt->mmio.adj_offset; + if (addr < mmio->adj_limit) + addr += mmio->adj_offset; return addr; } +static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe) +{ + return &xe->tiles[0].mmio; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 7ff0ac5b799a..8df41cd12d51 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -278,7 +278,7 @@ static void xelp_lncf_dump(struct xe_mocs_info *info, struct xe_gt *gt, struct d if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); else - reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + reg_val = xe_mmio_read32(>->mmio, XELP_LNCFCMOCS(i)); drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", j++, @@ -310,7 +310,7 @@ static void xelp_mocs_dump(struct xe_mocs_info *info, unsigned int flags, if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); else - reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + reg_val = xe_mmio_read32(>->mmio, XELP_GLOBAL_MOCS(i)); drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u ] (%#8x)\n", i, @@ -383,7 +383,7 @@ static void xehp_lncf_dump(struct xe_mocs_info *info, unsigned int flags, if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); else - reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + reg_val = xe_mmio_read32(>->mmio, XELP_LNCFCMOCS(i)); drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", j++, @@ -428,7 +428,7 @@ static void pvc_mocs_dump(struct xe_mocs_info *info, unsigned int flags, struct if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); else - reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + reg_val = xe_mmio_read32(>->mmio, XELP_LNCFCMOCS(i)); drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n", j++, @@ -510,7 +510,7 @@ static void mtl_mocs_dump(struct xe_mocs_info *info, unsigned int flags, if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); else - reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + reg_val = xe_mmio_read32(>->mmio, XELP_GLOBAL_MOCS(i)); drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u] (%#8x)\n", i, @@ -553,7 +553,7 @@ static void xe2_mocs_dump(struct xe_mocs_info *info, unsigned int flags, if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); else - reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + reg_val = xe_mmio_read32(>->mmio, XELP_GLOBAL_MOCS(i)); drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u] (%#8x)\n", i, @@ -690,7 +690,7 @@ static void __init_mocs_table(struct xe_gt *gt, if (regs_are_mcr(gt)) xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs); else - xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs); + xe_mmio_write32(>->mmio, XELP_GLOBAL_MOCS(i), mocs); } } @@ -730,7 +730,7 @@ static void init_l3cc_table(struct xe_gt *gt, if (regs_are_mcr(gt)) xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc); else - xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc); + xe_mmio_write32(>->mmio, XELP_LNCFCMOCS(i), l3cc); } } diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index eae38a49ee8e..bbe03db0c401 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -176,7 +176,7 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) { - return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) & + return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) & OAG_OATAILPTR_MASK; } @@ -366,7 +366,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - xe_mmio_write32(stream->gt, oaheadptr, + xe_mmio_write32(&stream->gt->mmio, oaheadptr, (head + gtt_offset) & OAG_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -377,22 +377,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { + struct xe_mmio *mmio = &stream->gt->mmio; u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; unsigned long flags; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0); - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, + xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0); + xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr, gtt_offset & OAG_OAHEADPTR_MASK); stream->oa_buffer.head = 0; /* * PRM says: "This MMIO must be set before the OATAILPTR register and after the * OAHEADPTR register. This is to enable proper functionality of the overflow bit". */ - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf); - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr, + xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf); + xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr, gtt_offset & OAG_OATAILPTR_MASK); /* Mark that we need updated tail pointer to read from */ @@ -444,21 +445,23 @@ static void xe_oa_enable(struct xe_oa_stream *stream) stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; - xe_mmio_write32(stream->gt, regs->oa_ctrl, val); + xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val); } static void xe_oa_disable(struct xe_oa_stream *stream) { - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0); - if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl, + struct xe_mmio *mmio = &stream->gt->mmio; + + xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0); + if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl, OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) drm_err(&stream->oa->xe->drm, "wait for OA to be disabled timed out\n"); if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { /* <= XE_METEORLAKE except XE_PVC */ - xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1); - if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) + xe_mmio_write32(mmio, OA_TLB_INV_CR, 1); + if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) drm_err(&stream->oa->xe->drm, "wait for OA tlb invalidate timed out\n"); } @@ -481,7 +484,7 @@ static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, size_t count, size_t *offset) { /* Only clear our bits to avoid side-effects */ - stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status, + stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status, OASTATUS_RELEVANT_BITS, 0); /* * Signal to userspace that there is non-zero OA status to read via @@ -709,8 +712,7 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), }, }; struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; @@ -742,17 +744,16 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | - _MASKED_FIELD(CTX_CTRL_RUN_ALONE, - enable ? CTX_CTRL_RUN_ALONE : 0), + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; int err; /* Set ccs select to enable programming of OAC_OACONTROL */ - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); + xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl, + __oa_ccs_select(stream)); /* Modify stream hwe context image with regs_context */ err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], @@ -788,6 +789,7 @@ static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool en static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) { + struct xe_mmio *mmio = &stream->gt->mmio; u32 sqcnt1; /* @@ -801,7 +803,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); } - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, + xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, oag_configure_mmio_trigger(stream, false)); /* disable the context save/restore or OAR counters */ @@ -809,13 +811,13 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) xe_oa_configure_oa_context(stream, false); /* Make sure we disable noa to save power. */ - xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0); + xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0); sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); /* Reset PMON Enable to save power. */ - xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0); + xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); } static void xe_oa_stream_destroy(struct xe_oa_stream *stream) @@ -943,6 +945,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) { + struct xe_mmio *mmio = &stream->gt->mmio; u32 oa_debug, sqcnt1; int ret; @@ -969,12 +972,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL | OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL; - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, + xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, _MASKED_BIT_ENABLE(oa_debug) | oag_report_ctx_switches(stream) | oag_configure_mmio_trigger(stream, true)); - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? + xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? (OAG_OAGLBCTXCTRL_COUNTER_RESUME | OAG_OAGLBCTXCTRL_TIMER_ENABLE | REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, @@ -988,7 +991,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1); + xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); /* Configure OAR/OAC */ if (stream->exec_q) { @@ -1536,7 +1539,7 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt) case XE_PVC: case XE_METEORLAKE: xe_pm_runtime_get(gt_to_xe(gt)); - reg = xe_mmio_read32(gt, RPM_CONFIG0); + reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); xe_pm_runtime_put(gt_to_xe(gt)); shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); @@ -2352,7 +2355,7 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) } /* Ensure MMIO trigger remains disabled till there is a stream */ - xe_mmio_write32(gt, u->regs.oa_debug, + xe_mmio_write32(>->mmio, u->regs.oa_debug, oag_configure_mmio_trigger(NULL, false)); /* Set oa_unit_ids now to ensure ids remain contiguous */ diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index f291a1730024..6bd6adfdfc74 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -100,6 +100,10 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { * Reserved entries should be programmed with the maximum caching, minimum * coherency (which matches an all-0's encoding), so we can just omit them * in the table. + * + * Note: There is an implicit assumption in the driver that compression and + * coh_1way+ are mutually exclusive. If this is ever not true then userptr + * and imported dma-buf from external device will have uncleared ccs state. */ #define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ { \ @@ -109,7 +113,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \ REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ - .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ + .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \ + XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ } static const struct xe_pat_table_entry xe2_pat_table[] = { @@ -160,7 +165,7 @@ static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[ for (int i = 0; i < n_entries; i++) { struct xe_reg reg = XE_REG(_PAT_INDEX(i)); - xe_mmio_write32(gt, reg, table[i].value); + xe_mmio_write32(>->mmio, reg, table[i].value); } } @@ -186,7 +191,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "PAT table:\n"); for (i = 0; i < xe->pat.n_entries; i++) { - u32 pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i))); + u32 pat = xe_mmio_read32(>->mmio, XE_REG(_PAT_INDEX(i))); u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat); drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i, @@ -278,7 +283,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) u32 pat; if (xe_gt_is_media_type(gt)) - pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i))); + pat = xe_mmio_read32(>->mmio, XE_REG(_PAT_INDEX(i))); else pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); @@ -316,10 +321,10 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry int n_entries) { program_pat(gt, table, n_entries); - xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value); + xe_mmio_write32(>->mmio, XE_REG(_PAT_ATS), xe2_pat_ats.value); if (IS_DGFX(gt_to_xe(gt))) - xe_mmio_write32(gt, XE_REG(_PAT_PTA), xe2_pat_pta.value); + xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe2_pat_pta.value); } static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) @@ -336,7 +341,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) for (i = 0; i < xe->pat.n_entries; i++) { if (xe_gt_is_media_type(gt)) - pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i))); + pat = xe_mmio_read32(>->mmio, XE_REG(_PAT_INDEX(i))); else pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); @@ -355,7 +360,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) * PPGTT entries. */ if (xe_gt_is_media_type(gt)) - pat = xe_mmio_read32(gt, XE_REG(_PAT_PTA)); + pat = xe_mmio_read32(>->mmio, XE_REG(_PAT_PTA)); else pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 937c3e064f0d..af7b2f2ff13a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -103,7 +103,6 @@ static const struct xe_graphics_desc graphics_xelpp = { #define XE_HP_FEATURES \ .has_range_tlb_invalidation = true, \ - .has_flat_ccs = true, \ .dma_mask_size = 46, \ .va_bits = 48, \ .vm_max_level = 3 @@ -120,6 +119,8 @@ static const struct xe_graphics_desc graphics_xehpg = { XE_HP_FEATURES, .vram_flags = XE_VRAM_FLAGS_NEED64K, + + .has_flat_ccs = 1, }; static const struct xe_graphics_desc graphics_xehpc = { @@ -145,7 +146,6 @@ static const struct xe_graphics_desc graphics_xehpc = { .has_asid = 1, .has_atomic_enable_pte_bit = 1, - .has_flat_ccs = 0, .has_usm = 1, }; @@ -156,7 +156,6 @@ static const struct xe_graphics_desc graphics_xelpg = { BIT(XE_HW_ENGINE_CCS0), XE_HP_FEATURES, - .has_flat_ccs = 0, }; #define XE2_GFX_FEATURES \ @@ -383,10 +382,12 @@ static const struct pci_device_id pciidlist[] = { XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc), + XE_RPLU_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), + XE_ARL_IDS(INTEL_VGA_DEVICE, &mtl_desc), XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), @@ -467,13 +468,15 @@ enum xe_gmdid_type { static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) { - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct xe_reg gmdid_reg = GMD_ID; u32 val; KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid); if (IS_SRIOV_VF(xe)) { + struct xe_gt *gt = xe_root_mmio_gt(xe); + /* * To get the value of the GMDID register, VFs must obtain it * from the GuC using MMIO communication. @@ -509,14 +512,17 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, gt->info.type = XE_GT_TYPE_UNINITIALIZED; } else { /* - * We need to apply the GSI offset explicitly here as at this - * point the xe_gt is not fully uninitialized and only basic - * access to MMIO registers is possible. + * GMD_ID is a GT register, but at this point in the driver + * init we haven't fully initialized the GT yet so we need to + * read the register with the tile's MMIO accessor. That means + * we need to apply the GSI offset manually since it won't get + * automatically added as it would if we were using a GT mmio + * accessor. */ if (type == GMDID_MEDIA) gmdid_reg.addr += MEDIA_GT_GSI_OFFSET; - val = xe_mmio_read32(gt, gmdid_reg); + val = xe_mmio_read32(mmio, gmdid_reg); } *ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val); @@ -678,7 +684,10 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit; if (xe->info.platform != XE_PVC) xe->info.has_device_atomics_on_smem = 1; + + /* Runtime detection may change this later */ xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; + xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; @@ -707,6 +716,7 @@ static int xe_info_init(struct xe_device *xe, gt->info.type = XE_GT_TYPE_MAIN; gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.engine_mask = graphics_desc->hw_engine_mask; + if (MEDIA_VER(xe) < 13 && media_desc) gt->info.engine_mask |= media_desc->hw_engine_mask; @@ -725,8 +735,6 @@ static int xe_info_init(struct xe_device *xe, gt->info.type = XE_GT_TYPE_MEDIA; gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; gt->info.engine_mask = media_desc->hw_engine_mask; - gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; - gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; /* * FIXME: At the moment multi-tile and standalone media are @@ -757,6 +765,25 @@ static void xe_pci_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); } +/* + * Probe the PCI device, initialize various parts of the driver. + * + * Fault injection is used to test the error paths of some initialization + * functions called either directly from xe_pci_probe() or indirectly for + * example through xe_device_probe(). Those functions use the kernel fault + * injection capabilities infrastructure, see + * Documentation/fault-injection/fault-injection.rst for details. The macro + * ALLOW_ERROR_INJECTION() is used to conditionally skip function execution + * at runtime and use a provided return value. The first requirement for + * error injectable functions is proper handling of the error code by the + * caller for recovery, which is always the case here. The second + * requirement is that no state is changed before the first error return. + * It is not strictly fullfilled for all initialization functions using the + * ALLOW_ERROR_INJECTION() macro but this is acceptable because for those + * error cases at probe time, the error code is simply propagated up by the + * caller. Therefore there is no consequence on those specific callers when + * function error injection skips the whole function. + */ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct xe_device_desc *desc = (const void *)ent->driver_data; @@ -924,6 +951,8 @@ static int xe_pci_resume(struct device *dev) if (err) return err; + pci_restore_state(pdev); + err = pci_enable_device(pdev); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 7397d556996a..d95d9835de42 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -44,7 +44,7 @@ static int pcode_mailbox_status(struct xe_tile *tile) [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, }; - err = xe_mmio_read32(tile->primary_gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; + err = xe_mmio_read32(&tile->mmio, PCODE_MAILBOX) & PCODE_ERROR_MASK; if (err) { drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err, err_decode[err].str ?: "Unknown"); @@ -58,7 +58,7 @@ static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *d unsigned int timeout_ms, bool return_data, bool atomic) { - struct xe_gt *mmio = tile->primary_gt; + struct xe_mmio *mmio = &tile->mmio; int err; if (tile_to_xe(tile)->info.skip_pcode) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 7cf2160fe040..40f7c844ed44 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -5,6 +5,7 @@ #include "xe_pm.h" +#include <linux/fault-inject.h> #include <linux/pm_runtime.h> #include <drm/drm_managed.h> @@ -123,7 +124,7 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); - xe_display_pm_suspend(xe, false); + xe_display_pm_suspend(xe); /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); @@ -133,7 +134,7 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { - xe_display_pm_resume(xe, false); + xe_display_pm_resume(xe); goto err; } } @@ -187,7 +188,7 @@ int xe_pm_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); - xe_display_pm_resume(xe, false); + xe_display_pm_resume(xe); err = xe_bo_restore_user(xe); if (err) @@ -263,6 +264,7 @@ int xe_pm_init_early(struct xe_device *xe) return 0; } +ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ /** * xe_pm_init - Initialize Xe Power Management diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d6353e8969f0..f27f579f4d85 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2188,5 +2188,5 @@ void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) pt_op->num_entries); } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); + xe_pt_update_ops_fini(tile, vops); } diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 28d9bb3b825d..5246a4a2740e 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -91,16 +91,17 @@ __read_timestamps(struct xe_gt *gt, u64 *cpu_delta, __ktime_func_t cpu_clock) { + struct xe_mmio *mmio = >->mmio; u32 upper, lower, old_upper, loop = 0; - upper = xe_mmio_read32(gt, upper_reg); + upper = xe_mmio_read32(mmio, upper_reg); do { *cpu_delta = local_clock(); *cpu_ts = cpu_clock(); - lower = xe_mmio_read32(gt, lower_reg); + lower = xe_mmio_read32(mmio, lower_reg); *cpu_delta = local_clock() - *cpu_delta; old_upper = upper; - upper = xe_mmio_read32(gt, upper_reg); + upper = xe_mmio_read32(mmio, upper_reg); } while (upper != old_upper && loop++ < 2); *engine_ts = (u64)upper << 32 | lower; diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 440ac572f6e5..191cb4121acd 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -15,6 +15,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "xe_device.h" #include "xe_device_types.h" #include "xe_force_wake.h" #include "xe_gt.h" @@ -164,7 +165,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) else if (entry->clr_bits + 1) val = (reg.mcr ? xe_gt_mcr_unicast_read_any(gt, reg_mcr) : - xe_mmio_read32(gt, reg)) & (~entry->clr_bits); + xe_mmio_read32(>->mmio, reg)) & (~entry->clr_bits); else val = 0; @@ -180,7 +181,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) if (entry->reg.mcr) xe_gt_mcr_multicast_write(gt, reg_mcr, val); else - xe_mmio_write32(gt, reg, val); + xe_mmio_write32(>->mmio, reg, val); } void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) @@ -194,14 +195,14 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); - err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_force_wake; xa_for_each(&sr->xa, reg, entry) apply_one_mmio(gt, entry); - err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); XE_WARN_ON(err); return; @@ -227,7 +228,7 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); - err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_force_wake; @@ -241,7 +242,7 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) } xe_reg_whitelist_print_entry(&p, 0, reg, entry); - xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), + xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), reg | entry->set_bits); slot++; } @@ -250,10 +251,10 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) { u32 addr = RING_NOPID(mmio_base).addr; - xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); + xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); } - err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); XE_WARN_ON(err); return; diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 86c705d18c0d..b13d4d62f0b1 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -196,7 +196,7 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, *gt = (*hwe)->gt; *xe = gt_to_xe(*gt); break; - }; + } } /** diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index fe2cb2a96f78..e055bed7ae55 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -53,7 +53,7 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); - return (struct xe_sa_manager *)bo; + return ERR_CAST(bo); } sa_manager->bo = bo; sa_manager->is_iomem = bo->vmap.is_iomem; diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 5a1d65e4f19f..ef10782af656 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -3,6 +3,8 @@ * Copyright © 2023 Intel Corporation */ +#include <linux/fault-inject.h> + #include <drm/drm_managed.h> #include "regs/xe_regs.h" @@ -35,7 +37,7 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode) static bool test_is_vf(struct xe_device *xe) { - u32 value = xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG); + u32 value = xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG); return value & VF_CAP; } @@ -119,6 +121,7 @@ int xe_sriov_init(struct xe_device *xe) return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe); } +ALLOW_ERROR_INJECTION(xe_sriov_init, ERRNO); /* See xe_pci_probe() */ /** * xe_sriov_print_info - Print basic SR-IOV information. diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index dda5268507d8..07cf7cfe4abd 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -3,6 +3,8 @@ * Copyright © 2023 Intel Corporation */ +#include <linux/fault-inject.h> + #include <drm/drm_managed.h> #include "xe_device.h" @@ -129,6 +131,7 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) return 0; } +ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */ static int tile_ttm_mgr_init(struct xe_tile *tile) { diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 8573d7a87d84..91130ad8999c 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -21,6 +21,7 @@ #include "xe_vm.h" #define __dev_name_xe(xe) dev_name((xe)->drm.dev) +#define __dev_name_tile(tile) __dev_name_xe(tile_to_xe((tile))) #define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt))) #define __dev_name_eq(q) __dev_name_gt((q)->gt) @@ -342,12 +343,12 @@ DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal, ); TRACE_EVENT(xe_reg_rw, - TP_PROTO(struct xe_gt *gt, bool write, u32 reg, u64 val, int len), + TP_PROTO(struct xe_mmio *mmio, bool write, u32 reg, u64 val, int len), - TP_ARGS(gt, write, reg, val, len), + TP_ARGS(mmio, write, reg, val, len), TP_STRUCT__entry( - __string(dev, __dev_name_gt(gt)) + __string(dev, __dev_name_tile(mmio->tile)) __field(u64, val) __field(u32, reg) __field(u16, write) diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index 9b1a1d4304ae..30a3cfbaaa09 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -189,7 +189,7 @@ DECLARE_EVENT_CLASS(xe_vm, ), TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev), - __entry->vm, __entry->asid) + __entry->vm, __entry->asid) ); DEFINE_EVENT(xe_vm, xe_vm_kill, diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index f7113cf6109d..423856cc18d4 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -60,7 +60,7 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct xe_tile *tile = xe_device_get_root_tile(xe); - struct xe_gt *mmio = xe_root_mmio_gt(xe); + struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u64 stolen_size; u64 tile_offset; @@ -94,7 +94,7 @@ static u32 get_wopcm_size(struct xe_device *xe) u32 wopcm_size; u64 val; - val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED); + val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED); val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); switch (val) { @@ -119,7 +119,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr u32 stolen_size, wopcm_size; u32 ggc, gms; - ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC); + ggc = xe_mmio_read32(xe_root_tile_mmio(xe), GGC); /* * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the @@ -159,7 +159,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr stolen_size -= wopcm_size; if (media_gt && XE_WA(media_gt, 14019821291)) { - u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE) + u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE) & ~GENMASK_ULL(5, 0); /* diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index faa1bf42e50e..d449de0fb6ec 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -33,7 +33,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, { XE_RTP_NAME("Tuning: L3 cache - media"), - XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, @@ -42,20 +42,48 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), SET(CCCHKNREG1, L3CMPCTRL)) }, + { XE_RTP_NAME("Tuning: Compression Overfetch - media"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX), + SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) + }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, + { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) + }, { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, { XE_RTP_NAME("Tuning: Stateless compression control"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) }, + { XE_RTP_NAME("Tuning: Stateless compression control - media"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, + { XE_RTP_NAME("Tuning: L3 RW flush all Cache"), + XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN)) + }, + { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) + }, + {} }; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index d431d0031185..fb0eda3d5682 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -4,6 +4,7 @@ */ #include <linux/bitfield.h> +#include <linux/fault-inject.h> #include <linux/firmware.h> #include <drm/drm_managed.h> @@ -796,6 +797,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) return err; } +ALLOW_ERROR_INJECTION(xe_uc_fw_init, ERRNO); /* See xe_pci_probe() */ static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw) { @@ -806,6 +808,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) { struct xe_device *xe = uc_fw_to_xe(uc_fw); struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_mmio *mmio = >->mmio; u64 src_offset; u32 dma_ctrl; int ret; @@ -814,34 +817,34 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) /* Set the source address for the uCode */ src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset; - xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset)); - xe_mmio_write32(gt, DMA_ADDR_0_HIGH, + xe_mmio_write32(mmio, DMA_ADDR_0_LOW, lower_32_bits(src_offset)); + xe_mmio_write32(mmio, DMA_ADDR_0_HIGH, upper_32_bits(src_offset) | DMA_ADDRESS_SPACE_GGTT); /* Set the DMA destination */ - xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset); - xe_mmio_write32(gt, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + xe_mmio_write32(mmio, DMA_ADDR_1_LOW, offset); + xe_mmio_write32(mmio, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); /* * Set the transfer size. The header plus uCode will be copied to WOPCM * via DMA, excluding any other components */ - xe_mmio_write32(gt, DMA_COPY_SIZE, + xe_mmio_write32(mmio, DMA_COPY_SIZE, sizeof(struct uc_css_header) + uc_fw->ucode_size); /* Start the DMA */ - xe_mmio_write32(gt, DMA_CTRL, + xe_mmio_write32(mmio, DMA_CTRL, _MASKED_BIT_ENABLE(dma_flags | START_DMA)); /* Wait for DMA to finish */ - ret = xe_mmio_wait32(gt, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl, + ret = xe_mmio_wait32(mmio, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl, false); if (ret) drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", xe_uc_fw_type_repr(uc_fw->type), dma_ctrl); /* Disable the bits once DMA is over */ - xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags)); + xe_mmio_write32(mmio, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags)); return ret; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7acd5fc9d032..ce9dca4d4e87 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1613,7 +1613,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); - mutex_lock(&xe->usm.lock); + down_write(&xe->usm.lock); if (vm->usm.asid) { void *lookup; @@ -1623,7 +1623,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); xe_assert(xe, lookup == vm); } - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); @@ -1765,25 +1765,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - mutex_lock(&xef->vm.lock); - err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->vm.lock); - if (err) - goto err_close_and_put; - if (xe->info.has_asid) { - mutex_lock(&xe->usm.lock); + down_write(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, XA_LIMIT(1, XE_MAX_ASID - 1), &xe->usm.next_asid, GFP_KERNEL); - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); if (err < 0) - goto err_free_id; + goto err_close_and_put; vm->usm.asid = asid; } - args->vm_id = id; vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ @@ -1796,12 +1789,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); #endif + /* user id alloc must always be last in ioctl to prevent UAF */ + err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); + if (err) + goto err_close_and_put; + + args->vm_id = id; + return 0; -err_free_id: - mutex_lock(&xef->vm.lock); - xa_erase(&xef->vm.xa, id); - mutex_unlock(&xef->vm.lock); err_close_and_put: xe_vm_close_and_put(vm); diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 80ba2fc78837..2a623bfcda7e 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -169,7 +169,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) u64 offset_hi, offset_lo; u32 nodes, num_enabled; - reg = xe_mmio_read32(gt, MIRROR_FUSE3); + reg = xe_mmio_read32(>->mmio, MIRROR_FUSE3); nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg); num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */ @@ -185,7 +185,8 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */ /* We don't expect any holes */ - xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size), + xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(>_to_tile(gt)->mmio, GSMBASE) - + ccs_size), "Hole between CCS and GSM.\n"); } else { reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); @@ -257,7 +258,7 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, if (xe->info.has_flat_ccs) { offset = get_flat_ccs_offset(gt, *tile_size); } else { - offset = xe_mmio_read64_2x32(gt, GSMBASE); + offset = xe_mmio_read64_2x32(&tile->mmio, GSMBASE); } /* remove the tile offset so we have just the available size */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index d424992514a4..94ea76b098ed 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -8,6 +8,7 @@ #include <drm/drm_managed.h> #include <kunit/visibility.h> #include <linux/compiler_types.h> +#include <linux/fault-inject.h> #include <generated/xe_wa_oob.h> @@ -850,6 +851,7 @@ int xe_wa_init(struct xe_gt *gt) return 0; } +ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) { @@ -887,11 +889,11 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) */ void xe_wa_apply_tile_workarounds(struct xe_tile *tile) { - struct xe_gt *mmio = tile->primary_gt; + struct xe_mmio *mmio = &tile->mmio; if (IS_SRIOV_VF(tile->xe)) return; - if (XE_WA(mmio, 22010954014)) + if (XE_WA(tile->primary_gt, 22010954014)) xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); } diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index d3a99157e523..ada0d0aa6b74 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -5,6 +5,8 @@ #include "xe_wopcm.h" +#include <linux/fault-inject.h> + #include "regs/xe_guc_regs.h" #include "xe_device.h" #include "xe_force_wake.h" @@ -123,8 +125,8 @@ static bool __check_layout(struct xe_device *xe, u32 wopcm_size, static bool __wopcm_regs_locked(struct xe_gt *gt, u32 *guc_wopcm_base, u32 *guc_wopcm_size) { - u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET); - u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE); + u32 reg_base = xe_mmio_read32(>->mmio, DMA_GUC_WOPCM_OFFSET); + u32 reg_size = xe_mmio_read32(>->mmio, GUC_WOPCM_SIZE); if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) || !(reg_base & GUC_WOPCM_OFFSET_VALID)) @@ -150,13 +152,13 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt, XE_WARN_ON(size & ~GUC_WOPCM_SIZE_MASK); mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; - err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask, + err = xe_mmio_write32_and_verify(>->mmio, GUC_WOPCM_SIZE, size, mask, size | GUC_WOPCM_SIZE_LOCKED); if (err) goto err_out; mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; - err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET, + err = xe_mmio_write32_and_verify(>->mmio, DMA_GUC_WOPCM_OFFSET, base | huc_agent, mask, base | huc_agent | GUC_WOPCM_OFFSET_VALID); @@ -169,10 +171,10 @@ err_out: drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n"); drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET", DMA_GUC_WOPCM_OFFSET.addr, - xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET)); + xe_mmio_read32(>->mmio, DMA_GUC_WOPCM_OFFSET)); drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE", GUC_WOPCM_SIZE.addr, - xe_mmio_read32(gt, GUC_WOPCM_SIZE)); + xe_mmio_read32(>->mmio, GUC_WOPCM_SIZE)); return err; } @@ -268,3 +270,4 @@ check: return ret; } +ALLOW_ERROR_INJECTION(xe_wopcm_init, ERRNO); /* See xe_pci_probe() */ |