diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
172 files changed, 6857 insertions, 2810 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index fcc2677a4229..2bb2bc052120 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE - tristate "Intel Xe Graphics" - depends on DRM && PCI && (m || (y && KUNIT=y)) + tristate "Intel Xe2 Graphics" + depends on DRM && PCI + depends on KUNIT || !KUNIT depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE @@ -31,7 +32,6 @@ config DRM_XE select ACPI_VIDEO if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE - select IOSF_MBI select CRC32 select SND_HDA_I915 if SND_HDA_CORE select CEC_CORE if CEC_NOTIFIER @@ -45,8 +45,10 @@ config DRM_XE select WANT_DEV_COREDUMP select AUXILIARY_BUS select HMM_MIRROR + select REGMAP if I2C help - Experimental driver for Intel Xe series GPUs + Driver for Intel Xe2 series GPUs and later. Experimental support + for Xe series is also available. If "M" is selected, the module will be called xe. @@ -85,16 +87,18 @@ config DRM_XE_GPUSVM Enable this option if you want support for CPU to GPU address mirroring. - If in doubut say "Y". + If in doubt say "Y". -config DRM_XE_DEVMEM_MIRROR - bool "Enable device memory mirror" +config DRM_XE_PAGEMAP + bool "Enable device memory pool for SVM" depends on DRM_XE_GPUSVM select GET_FREE_REGION default y help - Disable this option only if you want to compile out without device - memory mirror. Will reduce KMD memory footprint when disabled. + Disable this option only if you don't want to expose local device + memory for SVM. Will reduce KMD memory footprint when disabled. + + If in doubt say "Y". config DRM_XE_FORCE_PROBE string "Force probe xe for selected Intel hardware IDs" diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 0d749ed44878..01735c6ece8b 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -86,12 +86,17 @@ config DRM_XE_KUNIT_TEST If in doubt, say "N". -config DRM_XE_LARGE_GUC_BUFFER - bool "Enable larger guc log buffer" +config DRM_XE_DEBUG_GUC + bool "Enable extra GuC related debug options" + depends on DRM_XE_DEBUG default n + select STACKDEPOT help Choose this option when debugging guc issues. - Buffer should be large enough for complex issues. + The GuC log buffer is increased to the maximum allowed, which should + be large enough for complex issues. The tracking of FAST_REQ messages + is extended to include a record of the calling stack, which is then + dumped on a FAST_REQ error notification. Recommended for driver developers only. diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e4bf484d4121..07c71a29963d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -21,6 +21,13 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_wa_oob.rules $(call cmd,wa_oob) +generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h +quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob)) + cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob) +$(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \ + $(src)/xe_device_wa_oob.rules + $(call cmd,device_wa_oob) + # Please keep these build lists sorted! # core driver code @@ -80,6 +87,7 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_nvm.o \ xe_oa.o \ xe_observation.o \ xe_pat.o \ @@ -124,6 +132,7 @@ xe-y += xe_bb.o \ xe_wait_user_fence.o \ xe_wopcm.o +xe-$(CONFIG_I2C) += xe_i2c.o xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o @@ -139,7 +148,8 @@ xe-y += \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o \ - xe_sriov_vf.o + xe_sriov_vf.o \ + xe_tile_sriov_vf.o xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ @@ -153,7 +163,8 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ xe_pci_sriov.o \ - xe_sriov_pf.o + xe_sriov_pf.o \ + xe_sriov_pf_service.o # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST @@ -204,7 +215,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/icl_dsi.o \ i915-display/intel_alpm.o \ i915-display/intel_atomic.o \ - i915-display/intel_atomic_plane.o \ i915-display/intel_audio.o \ i915-display/intel_backlight.o \ i915-display/intel_bios.o \ @@ -254,6 +264,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_fbc.o \ i915-display/intel_fdi.o \ i915-display/intel_fifo_underrun.o \ + i915-display/intel_flipq.o \ i915-display/intel_frontbuffer.o \ i915-display/intel_global_state.o \ i915-display/intel_gmbus.o \ @@ -270,6 +281,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_modeset_verify.o \ i915-display/intel_panel.o \ i915-display/intel_pfit.o \ + i915-display/intel_plane.o \ i915-display/intel_pmdemand.o \ i915-display/intel_pch.o \ i915-display/intel_pps.o \ @@ -337,4 +349,4 @@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) uses_generated_oob := $(addprefix $(obj)/, $(xe-y)) -$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h +$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h $(obj)/generated/xe_device_wa_oob.h diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 448afb86e05c..81eb046aeebf 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -142,6 +142,7 @@ enum xe_guc_action { XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D, + XE_GUC_ACTION_OPT_IN_FEATURE_KLV = 0x550E, XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, @@ -161,6 +162,37 @@ enum xe_guc_preempt_options { XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, }; +enum xe_guc_register_context_param_offsets { + XE_GUC_REGISTER_CONTEXT_DATA_0_MBZ = 0, + XE_GUC_REGISTER_CONTEXT_DATA_1_FLAGS, + XE_GUC_REGISTER_CONTEXT_DATA_2_CONTEXT_INDEX, + XE_GUC_REGISTER_CONTEXT_DATA_3_ENGINE_CLASS, + XE_GUC_REGISTER_CONTEXT_DATA_4_ENGINE_SUBMIT_MASK, + XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER, + XE_GUC_REGISTER_CONTEXT_DATA_6_WQ_DESC_ADDR_UPPER, + XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER, + XE_GUC_REGISTER_CONTEXT_DATA_8_WQ_BUF_BASE_UPPER, + XE_GUC_REGISTER_CONTEXT_DATA_9_WQ_BUF_SIZE, + XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR, + XE_GUC_REGISTER_CONTEXT_MSG_LEN, +}; + +enum xe_guc_register_context_multi_lrc_param_offsets { + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_0_MBZ = 0, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_1_FLAGS, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_2_PARENT_CONTEXT, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_3_ENGINE_CLASS, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_4_ENGINE_SUBMIT_MASK, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_6_WQ_DESC_ADDR_UPPER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_8_WQ_BUF_BASE_UPPER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_9_WQ_BUF_SIZE, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN = 11, +}; + enum xe_guc_report_status { XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, XE_GUC_REPORT_STATUS_ACKED = 0x1, @@ -240,4 +272,7 @@ enum xe_guc_g2g_type { #define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) #define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) +/* invalid type for XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR */ +#define XE_GUC_CAT_ERR_TYPE_INVALID 0xdeadbeef + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h index 2c627a21648f..ecf748fd87df 100644 --- a/drivers/gpu/drm/xe/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -6,8 +6,7 @@ #ifndef _ABI_GUC_ERRORS_ABI_H #define _ABI_GUC_ERRORS_ABI_H -enum xe_guc_response_status { - XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, +enum xe_guc_response { XE_GUC_RESPONSE_ERROR_PROTOCOL = 0x04, XE_GUC_RESPONSE_INVALID_STATE = 0x0A, XE_GUC_RESPONSE_UNSUPPORTED_VERSION = 0x0B, @@ -21,12 +20,20 @@ enum xe_guc_response_status { XE_GUC_RESPONSE_CANNOT_COMPLETE_ACTION = 0x41, XE_GUC_RESPONSE_INVALID_KLV_DATA = 0x50, XE_GUC_RESPONSE_INVALID_PARAMS = 0x60, + XE_GUC_RESPONSE_INVALID_CONTEXT_INDEX = 0x61, + XE_GUC_RESPONSE_INVALID_CONTEXT_REGISTRATION = 0x62, + XE_GUC_RESPONSE_INVALID_DOORBELL_ID = 0x63, + XE_GUC_RESPONSE_INVALID_ENGINE_ID = 0x64, XE_GUC_RESPONSE_INVALID_BUFFER_RANGE = 0x70, XE_GUC_RESPONSE_INVALID_BUFFER = 0x71, + XE_GUC_RESPONSE_BUFFER_ALREADY_REGISTERED = 0x72, XE_GUC_RESPONSE_INVALID_GGTT_ADDRESS = 0x80, XE_GUC_RESPONSE_PENDING_ACTION = 0x90, + XE_GUC_RESPONSE_CONTEXT_NOT_REGISTERED = 0x100, + XE_GUC_RESPONSE_CONTEXT_ALREADY_REGISTERED = 0X101, XE_GUC_RESPONSE_INVALID_SIZE = 0x102, XE_GUC_RESPONSE_MALFORMED_KLV = 0x103, + XE_GUC_RESPONSE_INVALID_CONTEXT = 0x104, XE_GUC_RESPONSE_INVALID_KLV_KEY = 0x105, XE_GUC_RESPONSE_DATA_TOO_LARGE = 0x106, XE_GUC_RESPONSE_VF_MIGRATED = 0x107, @@ -40,10 +47,11 @@ enum xe_guc_response_status { XE_GUC_RESPONSE_CTB_NOT_REGISTERED = 0x304, XE_GUC_RESPONSE_CTB_IN_USE = 0x305, XE_GUC_RESPONSE_CTB_INVALID_DESC = 0x306, + XE_GUC_RESPONSE_HW_TIMEOUT = 0x30C, XE_GUC_RESPONSE_CTB_SOURCE_INVALID_DESCRIPTOR = 0x30D, XE_GUC_RESPONSE_CTB_DESTINATION_INVALID_DESCRIPTOR = 0x30E, XE_GUC_RESPONSE_INVALID_CONFIG_STATE = 0x30F, - XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, + XE_GUC_RESPONSE_GENERIC_FAIL = 0xF000, }; enum xe_guc_load_status { diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 7de8f827281f..0366a9da5977 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -16,6 +16,7 @@ * +===+=======+==============================================================+ * | 0 | 31:16 | **KEY** - KLV key identifier | * | | | - `GuC Self Config KLVs`_ | + * | | | - `GuC Opt In Feature KLVs`_ | * | | | - `GuC VGT Policy KLVs`_ | * | | | - `GuC VF Configuration KLVs`_ | * | | | | @@ -125,6 +126,33 @@ enum { }; /** + * DOC: GuC Opt In Feature KLVs + * + * `GuC KLV`_ keys available for use with OPT_IN_FEATURE_KLV + * + * _`GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE` : 0x4001 + * Adds an extra dword to the XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR G2H + * containing the type of the CAT error. On HW that does not support + * reporting the CAT error type, the extra dword is set to 0xdeadbeef. + * + * _`GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH` : 0x4003 + * This KLV enables the Dynamic Inhibit Context Switch optimization, which + * consists in the GuC setting the CTX_CTRL_INHIBIT_SYN_CTX_SWITCH bit to + * zero in the CTX_CONTEXT_CONTROL register of LRCs that are submitted + * to an oversubscribed engine. This will cause those contexts to be + * switched out immediately if they hit an unsatisfied semaphore wait + * (instead of waiting the full timeslice duration). The bit is instead set + * to one if a single context is queued on the engine, to avoid it being + * switched out if there isn't another context that can run in its place. + */ + +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_KEY 0x4001 +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_LEN 0u + +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003 +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u + +/** * DOC: GuC VGT Policy KLVs * * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h index a473aa6697d0..4fcd3bf6b76f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -6,37 +6,6 @@ #ifndef __INTEL_PCODE_H__ #define __INTEL_PCODE_H__ -#include "intel_uncore.h" #include "xe_pcode.h" -static inline int -snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, - int fast_timeout_us, int slow_timeout_ms) -{ - return xe_pcode_write_timeout(__compat_uncore_to_tile(uncore), mbox, val, - slow_timeout_ms ?: 1); -} - -static inline int -snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) -{ - - return xe_pcode_write(__compat_uncore_to_tile(uncore), mbox, val); -} - -static inline int -snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) -{ - return xe_pcode_read(__compat_uncore_to_tile(uncore), mbox, val, val1); -} - -static inline int -skl_pcode_request(struct intel_uncore *uncore, u32 mbox, - u32 request, u32 reply_mask, u32 reply, - int timeout_base_ms) -{ - return xe_pcode_request(__compat_uncore_to_tile(uncore), mbox, request, reply_mask, reply, - timeout_base_ms); -} - #endif /* __INTEL_PCODE_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 0c1e88e36a1e..d012f02bc84f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -24,13 +24,6 @@ static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncor return xe_root_tile_mmio(xe); } -static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore) -{ - struct xe_device *xe = container_of(uncore, struct xe_device, uncore); - - return xe_device_get_root_tile(xe); -} - static inline u32 intel_uncore_read(struct intel_uncore *uncore, i915_reg_t i915_reg) { @@ -110,12 +103,13 @@ static inline int intel_wait_for_register(struct intel_uncore *uncore, static inline int intel_wait_for_register_fw(struct intel_uncore *uncore, i915_reg_t i915_reg, u32 mask, - u32 value, unsigned int timeout) + u32 value, unsigned int timeout, + u32 *out_value) { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, - timeout * USEC_PER_MSEC, NULL, false); + timeout * USEC_PER_MSEC, out_value, false); } static inline int diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h new file mode 100644 index 000000000000..69e1935e9cdf --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2013-2021 Intel Corporation + */ + +#ifndef _VLV_IOSF_SB_H_ +#define _VLV_IOSF_SB_H_ + +#include <linux/types.h> + +#include "vlv_iosf_sb_reg.h" + +struct drm_device; + +enum vlv_iosf_sb_unit { + VLV_IOSF_SB_BUNIT, + VLV_IOSF_SB_CCK, + VLV_IOSF_SB_CCU, + VLV_IOSF_SB_DPIO, + VLV_IOSF_SB_DPIO_2, + VLV_IOSF_SB_FLISDSI, + VLV_IOSF_SB_GPIO, + VLV_IOSF_SB_NC, + VLV_IOSF_SB_PUNIT, +}; + +static inline void vlv_iosf_sb_get(struct drm_device *drm, unsigned long ports) +{ +} +static inline u32 vlv_iosf_sb_read(struct drm_device *drm, enum vlv_iosf_sb_unit unit, u32 addr) +{ + return 0; +} +static inline int vlv_iosf_sb_write(struct drm_device *drm, enum vlv_iosf_sb_unit unit, u32 addr, u32 val) +{ + return 0; +} +static inline void vlv_iosf_sb_put(struct drm_device *drm, unsigned long ports) +{ +} + +#endif /* _VLV_IOSF_SB_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h index 949f134ce3cf..cb7fa8e794a6 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h @@ -3,4 +3,4 @@ * Copyright © 2023 Intel Corporation */ -#include "../../i915/vlv_sideband_reg.h" +#include "../../i915/vlv_iosf_sb_reg.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h deleted file mode 100644 index ec6f12de5727..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h +++ /dev/null @@ -1,132 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2013-2021 Intel Corporation - */ - -#ifndef _VLV_SIDEBAND_H_ -#define _VLV_SIDEBAND_H_ - -#include <linux/types.h> - -#include "vlv_sideband_reg.h" - -enum pipe; -struct drm_i915_private; - -enum { - VLV_IOSF_SB_BUNIT, - VLV_IOSF_SB_CCK, - VLV_IOSF_SB_CCU, - VLV_IOSF_SB_DPIO, - VLV_IOSF_SB_FLISDSI, - VLV_IOSF_SB_GPIO, - VLV_IOSF_SB_NC, - VLV_IOSF_SB_PUNIT, -}; - -static inline void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports) -{ -} -static inline u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg) -{ - return 0; -} -static inline void vlv_iosf_sb_write(struct drm_i915_private *i915, - u8 port, u32 reg, u32 val) -{ -} -static inline void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports) -{ -} -static inline void vlv_bunit_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_bunit_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_cck_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_cck_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_ccu_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_ccu_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_dpio_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_dpio_read(struct drm_i915_private *i915, int pipe, int reg) -{ - return 0; -} -static inline void vlv_dpio_write(struct drm_i915_private *i915, - int pipe, int reg, u32 val) -{ -} -static inline void vlv_dpio_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_flisdsi_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_flisdsi_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_nc_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) -{ - return 0; -} -static inline void vlv_nc_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_punit_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) -{ - return 0; -} -static inline int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) -{ - return 0; -} -static inline void vlv_punit_put(struct drm_i915_private *i915) -{ -} - -#endif /* _VLV_SIDEBAND_H_ */ diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index 27437c22bd70..910632f57c3d 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -1,7 +1,12 @@ // SPDX-License-Identifier: MIT /* Copyright © 2024 Intel Corporation */ +#include <drm/drm_cache.h> #include <drm/drm_gem.h> +#include <drm/drm_panic.h> + +#include "intel_fb.h" +#include "intel_display_types.h" #include "xe_bo.h" #include "intel_bo.h" @@ -59,3 +64,89 @@ void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) { /* FIXME */ } + +struct xe_panic_data { + struct page **pages; + int page; + void *vaddr; +}; + +struct xe_framebuffer { + struct intel_framebuffer base; + struct xe_panic_data panic; +}; + +static inline struct xe_panic_data *to_xe_panic_data(struct intel_framebuffer *fb) +{ + return &container_of_const(fb, struct xe_framebuffer, base)->panic; +} + +static void xe_panic_kunmap(struct xe_panic_data *panic) +{ + if (panic->vaddr) { + drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); + kunmap_local(panic->vaddr); + panic->vaddr = NULL; + } +} + +/* + * The scanout buffer pages are not mapped, so for each pixel, + * use kmap_local_page_try_from_panic() to map the page, and write the pixel. + * Try to keep the map from the previous pixel, to avoid too much map/unmap. + */ +static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct xe_panic_data *panic = to_xe_panic_data(fb); + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); + unsigned int new_page; + unsigned int offset; + + if (fb->panic_tiling) + offset = fb->panic_tiling(sb->width, x, y); + else + offset = y * sb->pitch[0] + x * sb->format->cpp[0]; + + new_page = offset >> PAGE_SHIFT; + offset = offset % PAGE_SIZE; + if (new_page != panic->page) { + xe_panic_kunmap(panic); + panic->page = new_page; + panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, + panic->page); + } + if (panic->vaddr) { + u32 *pix = panic->vaddr + offset; + *pix = color; + } +} + +struct intel_framebuffer *intel_bo_alloc_framebuffer(void) +{ + struct xe_framebuffer *xe_fb; + + xe_fb = kzalloc(sizeof(*xe_fb), GFP_KERNEL); + if (xe_fb) + return &xe_fb->base; + return NULL; +} + +int intel_bo_panic_setup(struct drm_scanout_buffer *sb) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct xe_panic_data *panic = to_xe_panic_data(fb); + + panic->page = -1; + sb->set_pixel = xe_panic_page_set_pixel; + return 0; +} + +void intel_bo_panic_finish(struct intel_framebuffer *fb) +{ + struct xe_panic_data *panic = to_xe_panic_data(fb); + + xe_panic_kunmap(panic); + panic->page = -1; +} diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index e8191562d122..fba9617a75a5 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -5,6 +5,7 @@ #include <drm/drm_fb_helper.h> +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_fbdev_fb.h" @@ -65,7 +66,11 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, goto err; } - fb = intel_framebuffer_create(&obj->ttm.base, &mode_cmd); + fb = intel_framebuffer_create(&obj->ttm.base, + drm_get_format_info(dev, + mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd); if (IS_ERR(fb)) { xe_bo_unpin_map_no_vm(obj); goto err; diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 68f064f33d4b..e2e0771cf274 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -20,6 +20,7 @@ #include "intel_audio.h" #include "intel_bw.h" #include "intel_display.h" +#include "intel_display_core.h" #include "intel_display_driver.h" #include "intel_display_irq.h" #include "intel_display_types.h" @@ -38,7 +39,9 @@ static bool has_display(struct xe_device *xe) { - return HAS_DISPLAY(&xe->display); + struct intel_display *display = xe->display; + + return HAS_DISPLAY(display); } /** @@ -46,6 +49,8 @@ static bool has_display(struct xe_device *xe) * early on * @pdev: PCI device * + * Note: This is called before xe or display device creation. + * * Returns: true if probe needs to be deferred, false otherwise */ bool xe_display_driver_probe_defer(struct pci_dev *pdev) @@ -63,6 +68,8 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev) * Set features and function hooks in @driver that are needed for driving the * display IP. This sets the driver's capability of driving display, regardless * if the device has it enabled + * + * Note: This is called before xe or display device creation. */ void xe_display_driver_set_hooks(struct drm_driver *driver) { @@ -81,37 +88,10 @@ static void unset_display_features(struct xe_device *xe) xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); } -static void display_destroy(struct drm_device *dev, void *dummy) -{ - struct xe_device *xe = to_xe_device(dev); - - destroy_workqueue(xe->display.hotplug.dp_wq); -} - -/** - * xe_display_create - create display struct - * @xe: XE device instance - * - * Initialize all fields used by the display part. - * - * TODO: once everything can be inside a single struct, make the struct opaque - * to the rest of xe and return it to be xe->display. - * - * Returns: 0 on success - */ -int xe_display_create(struct xe_device *xe) -{ - spin_lock_init(&xe->display.fb_tracking.lock); - - xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); - - return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); -} - static void xe_display_fini_early(void *arg) { struct xe_device *xe = arg; - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -124,7 +104,7 @@ static void xe_display_fini_early(void *arg) int xe_display_init_early(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; int err; if (!xe->info.probe_display) @@ -142,7 +122,9 @@ int xe_display_init_early(struct xe_device *xe) * Fill the dram structure to get the system dram info. This will be * used for memory latency calculation. */ - intel_dram_detect(xe); + err = intel_dram_detect(xe); + if (err) + goto err_opregion; intel_bw_init_hw(display); @@ -168,7 +150,7 @@ err_opregion: static void xe_display_fini(void *arg) { struct xe_device *xe = arg; - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; intel_hpd_poll_fini(display); intel_hdcp_component_fini(display); @@ -178,7 +160,7 @@ static void xe_display_fini(void *arg) int xe_display_init(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; int err; if (!xe->info.probe_display) @@ -193,7 +175,7 @@ int xe_display_init(struct xe_device *xe) void xe_display_register(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -204,7 +186,7 @@ void xe_display_register(struct xe_device *xe) void xe_display_unregister(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -217,7 +199,7 @@ void xe_display_unregister(struct xe_device *xe) void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -228,7 +210,7 @@ void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -239,7 +221,7 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) void xe_display_irq_reset(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -249,7 +231,7 @@ void xe_display_irq_reset(struct xe_device *xe) void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -290,7 +272,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe) static void xe_display_enable_d3cold(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -313,7 +295,7 @@ static void xe_display_enable_d3cold(struct xe_device *xe) static void xe_display_disable_d3cold(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -337,7 +319,7 @@ static void xe_display_disable_d3cold(struct xe_device *xe) void xe_display_pm_suspend(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; bool s2idle = suspend_to_idle(); if (!xe->info.probe_display) @@ -362,7 +344,7 @@ void xe_display_pm_suspend(struct xe_device *xe) if (has_display(xe)) { intel_display_driver_suspend_access(display); - intel_encoder_suspend_all(&xe->display); + intel_encoder_suspend_all(display); } intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); @@ -372,7 +354,7 @@ void xe_display_pm_suspend(struct xe_device *xe) void xe_display_pm_shutdown(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -403,7 +385,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) void xe_display_pm_runtime_suspend(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -418,7 +400,7 @@ void xe_display_pm_runtime_suspend(struct xe_device *xe) void xe_display_pm_suspend_late(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; bool s2idle = suspend_to_idle(); if (!xe->info.probe_display) @@ -429,7 +411,7 @@ void xe_display_pm_suspend_late(struct xe_device *xe) void xe_display_pm_runtime_suspend_late(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -447,7 +429,7 @@ void xe_display_pm_runtime_suspend_late(struct xe_device *xe) void xe_display_pm_shutdown_late(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -462,7 +444,7 @@ void xe_display_pm_shutdown_late(struct xe_device *xe) void xe_display_pm_resume_early(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -472,7 +454,7 @@ void xe_display_pm_resume_early(struct xe_device *xe) void xe_display_pm_resume(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -507,7 +489,7 @@ void xe_display_pm_resume(struct xe_device *xe) void xe_display_pm_runtime_resume(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -530,6 +512,17 @@ static void display_device_remove(struct drm_device *dev, void *arg) intel_display_device_remove(display); } +/** + * xe_display_probe - probe display and create display struct + * @xe: XE device instance + * + * Initialize all fields used by the display part. + * + * TODO: once everything can be inside a single struct, make the struct opaque + * to the rest of xe and return it to be xe->display. + * + * Returns: 0 on success + */ int xe_display_probe(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -540,11 +533,15 @@ int xe_display_probe(struct xe_device *xe) goto no_display; display = intel_display_device_probe(pdev); + if (IS_ERR(display)) + return PTR_ERR(display); err = drmm_add_action_or_reset(&xe->drm, display_device_remove, display); if (err) return err; + xe->display = display; + if (has_display(xe)) return 0; diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 46e14f8dee28..e533aa4750bc 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -15,8 +15,6 @@ struct drm_driver; bool xe_display_driver_probe_defer(struct pci_dev *pdev); void xe_display_driver_set_hooks(struct drm_driver *driver); -int xe_display_create(struct xe_device *xe); - int xe_display_probe(struct xe_device *xe); int xe_display_init_early(struct xe_device *xe); @@ -46,8 +44,6 @@ static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0 static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { } static inline void xe_display_driver_remove(struct xe_device *xe) {} -static inline int xe_display_create(struct xe_device *xe) { return 0; } - static inline int xe_display_probe(struct xe_device *xe) { return 0; } static inline int xe_display_init_early(struct xe_device *xe) { return 0; } diff --git a/drivers/gpu/drm/xe/display/xe_display_rpm.c b/drivers/gpu/drm/xe/display/xe_display_rpm.c index 1955153aadba..3825376e98cc 100644 --- a/drivers/gpu/drm/xe/display/xe_display_rpm.c +++ b/drivers/gpu/drm/xe/display/xe_display_rpm.c @@ -1,13 +1,15 @@ // SPDX-License-Identifier: MIT /* Copyright © 2025 Intel Corporation */ +#include "intel_display_core.h" #include "intel_display_rpm.h" +#include "xe_device.h" #include "xe_device_types.h" #include "xe_pm.h" static struct xe_device *display_to_xe(struct intel_display *display) { - return container_of(display, struct xe_device, display); + return to_xe_device(display->drm); } struct ref_tracker *intel_display_rpm_get_raw(struct intel_display *display) diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c index 2933ca97d673..68d1387d81a0 100644 --- a/drivers/gpu/drm/xe/display/xe_display_wa.c +++ b/drivers/gpu/drm/xe/display/xe_display_wa.c @@ -3,8 +3,8 @@ * Copyright © 2024 Intel Corporation */ +#include "intel_display_core.h" #include "intel_display_wa.h" - #include "xe_device.h" #include "xe_wa.h" diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..9f941fc2e36b 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d918ae1c8061..c38fba18effe 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -6,6 +6,7 @@ #include <drm/ttm/ttm_bo.h> #include "i915_vma.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_dpt.h" #include "intel_fb.h" @@ -23,6 +24,7 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ struct xe_device *xe = xe_bo_device(bo); struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; u32 column, row; + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); /* TODO: Maybe rewrite so we can traverse the bo addresses sequentially, * by writing dpt/ggtt in a different order? @@ -32,10 +34,9 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ u32 src_idx = src_stride * (height - 1) + column + bo_ofs; for (row = 0; row < height; row++) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE]); + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); - iosys_map_wr(map, *dpt_ofs, u64, pte); + iosys_map_wr(map, *dpt_ofs, u64, pte | addr); *dpt_ofs += 8; src_idx -= src_stride; } @@ -55,17 +56,15 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, { struct xe_device *xe = xe_bo_device(bo); struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index) - = ggtt->pt_ops->pte_encode_bo; u32 column, row; + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); for (row = 0; row < height; row++) { u32 src_idx = src_stride * row + bo_ofs; for (column = 0; column < width; column++) { - iosys_map_wr(map, *dpt_ofs, u64, - pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE])); + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); + iosys_map_wr(map, *dpt_ofs, u64, pte | addr); *dpt_ofs += 8; src_idx++; @@ -129,13 +128,13 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, return PTR_ERR(dpt); if (view->type == I915_GTT_VIEW_NORMAL) { + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); u32 x; for (x = 0; x < size / XE_PAGE_SIZE; x++) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE]); + u64 addr = xe_bo_addr(bo, x * XE_PAGE_SIZE, XE_PAGE_SIZE); - iosys_map_wr(&dpt->vmap, x * 8, u64, pte); + iosys_map_wr(&dpt->vmap, x * 8, u64, pte | addr); } } else if (view->type == I915_GTT_VIEW_REMAPPED) { const struct intel_remapped_info *remap_info = &view->remapped; @@ -164,6 +163,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -173,15 +175,15 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo { struct xe_device *xe = xe_bo_device(bo); u32 column, row; + u64 pte = ggtt->pt_ops->pte_encode_flags(bo, xe->pat.idx[XE_CACHE_NONE]); for (column = 0; column < width; column++) { u32 src_idx = src_stride * (height - 1) + column + bo_ofs; for (row = 0; row < height; row++) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE]); + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); - ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte | addr); *ggtt_ofs += XE_PAGE_SIZE; src_idx -= src_stride; } @@ -199,14 +201,15 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, struct drm_gem_object *obj = intel_fb_bo(&fb->base); struct xe_bo *bo = gem_to_xe_bo(obj); struct xe_device *xe = to_xe_device(fb->base.dev); - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + struct xe_tile *tile0 = xe_device_get_root_tile(xe); + struct xe_ggtt *ggtt = tile0->mem.ggtt; u32 align; int ret; /* TODO: Consider sharing framebuffer mapping? * embed i915_vma inside intel_framebuffer */ - xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(xe); ret = mutex_lock_interruptible(&ggtt->lock); if (ret) goto out; @@ -215,29 +218,22 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) { - vma->node = bo->ggtt_node[ggtt->tile->id]; + if (bo->ggtt_node[tile0->id] && view->type == I915_GTT_VIEW_NORMAL) { + vma->node = bo->ggtt_node[tile0->id]; } else if (view->type == I915_GTT_VIEW_NORMAL) { - u32 x, size = bo->ttm.base.size; - vma->node = xe_ggtt_node_init(ggtt); if (IS_ERR(vma->node)) { ret = PTR_ERR(vma->node); goto out_unlock; } - ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0); if (ret) { xe_ggtt_node_fini(vma->node); goto out_unlock; } - for (x = 0; x < size; x += XE_PAGE_SIZE) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, - xe->pat.idx[XE_CACHE_NONE]); - - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); - } + xe_ggtt_map_bo(ggtt, vma->node, bo, xe->pat.idx[XE_CACHE_NONE]); } else { u32 i, ggtt_ofs; const struct intel_rotation_info *rot_info = &view->rotated; @@ -271,7 +267,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, out_unlock: mutex_unlock(&ggtt->lock); out: - xe_pm_runtime_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(xe); return ret; } @@ -333,8 +329,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: @@ -348,7 +342,7 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { - u8 tile_id = vma->node->ggtt->tile->id; + u8 tile_id = xe_device_get_root_tile(xe_bo_device(vma->bo))->id; if (!refcount_dec_and_test(&vma->ref)) return; @@ -389,6 +383,7 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, { struct intel_framebuffer *fb = to_intel_framebuffer(new_plane_state->hw.fb); struct xe_device *xe = to_xe_device(fb->base.dev); + struct intel_display *display = xe->display; struct i915_vma *vma; if (old_plane_state->hw.fb == new_plane_state->hw.fb && @@ -399,8 +394,8 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, goto found; } - if (fb == intel_fbdev_framebuffer(xe->display.fbdev.fbdev)) { - vma = intel_fbdev_vma_pointer(xe->display.fbdev.fbdev); + if (fb == intel_fbdev_framebuffer(display->fbdev.fbdev)) { + vma = intel_fbdev_vma_pointer(display->fbdev.fbdev); if (vma) goto found; } @@ -463,3 +458,8 @@ u64 intel_dpt_offset(struct i915_vma *dpt_vma) { return 0; } + +void intel_fb_get_map(struct i915_vma *vma, struct iosys_map *map) +{ + *map = vma->bo->vmap; +} diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index b35a6f201d4a..30f1073141fc 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -85,7 +85,7 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, cmd_in = xe_bo_ggtt_addr(bo); cmd_out = cmd_in + PAGE_SIZE; - xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); + xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo)); gsc_context->hdcp_bo = bo; gsc_context->hdcp_cmd_in = cmd_in; diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 6502b8274173..dcbc4b2d3fd9 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -10,14 +10,15 @@ #include "xe_ggtt.h" #include "xe_mmio.h" -#include "i915_reg.h" -#include "intel_atomic_plane.h" #include "intel_crtc.h" #include "intel_display.h" +#include "intel_display_core.h" +#include "intel_display_regs.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_frontbuffer.h" +#include "intel_plane.h" #include "intel_plane_initial.h" #include "xe_bo.h" #include "xe_wa.h" @@ -87,12 +88,8 @@ initial_plane_bo(struct xe_device *xe, base = round_down(plane_config->base, page_size); if (IS_DGFX(xe)) { - u64 __iomem *gte = tile0->mem.ggtt->gsm; - u64 pte; + u64 pte = xe_ggtt_read_pte(tile0->mem.ggtt, base); - gte += base / XE_PAGE_SIZE; - - pte = ioread64(gte); if (!(pte & XE_GGTT_PTE_DM)) { drm_err(&xe->drm, "Initial plane programming missing DM bit\n"); @@ -187,7 +184,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; if (intel_framebuffer_init(to_intel_framebuffer(fb), - &bo->ttm.base, &mode_cmd)) { + &bo->ttm.base, fb->format, &mode_cmd)) { drm_dbg_kms(&xe->drm, "intel fb init failed\n"); goto err_bo; } diff --git a/drivers/gpu/drm/xe/display/xe_tdf.c b/drivers/gpu/drm/xe/display/xe_tdf.c index 2a7fccbeb1d5..78bda4c47874 100644 --- a/drivers/gpu/drm/xe/display/xe_tdf.c +++ b/drivers/gpu/drm/xe/display/xe_tdf.c @@ -3,9 +3,9 @@ * Copyright © 2024 Intel Corporation */ -#include "xe_device.h" -#include "intel_display_types.h" +#include "intel_display_core.h" #include "intel_tdf.h" +#include "xe_device.h" void intel_td_flush(struct intel_display *display) { diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 7702364b65f1..9b66cc972a63 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -16,6 +16,10 @@ #define MTL_GSC_HECI1_BASE 0x00116000 #define MTL_GSC_HECI2_BASE 0x00117000 +#define DG1_GSC_HECI2_BASE 0x00259000 +#define PVC_GSC_HECI2_BASE 0x00285000 +#define DG2_GSC_HECI2_BASE 0x00374000 + #define HECI_H_CSR(base) XE_REG((base) + 0x4) #define HECI_H_CSR_IE REG_BIT(0) #define HECI_H_CSR_IS REG_BIT(1) diff --git a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h new file mode 100644 index 000000000000..af781c8e4a80 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_REGS_H_ +#define _XE_I2C_REGS_H_ + +#include <linux/pci_regs.h> + +#include "xe_reg_defs.h" +#include "xe_regs.h" + +#define I2C_BRIDGE_OFFSET (SOC_BASE + 0xd9000) +#define I2C_CONFIG_SPACE_OFFSET (SOC_BASE + 0xf6000) +#define I2C_MEM_SPACE_OFFSET (SOC_BASE + 0xf7400) + +#define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164) +#define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168) + +#define I2C_CONFIG_CMD XE_REG(I2C_CONFIG_SPACE_OFFSET + PCI_COMMAND) +#define I2C_CONFIG_PMCSR XE_REG(I2C_CONFIG_SPACE_OFFSET + 0x84) + +#endif /* _XE_I2C_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index f0ecfcac4003..13635e4331d4 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -19,6 +19,7 @@ #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) #define DISPLAY_IRQ REG_BIT(16) +#define I2C_IRQ REG_BIT(12) #define GT_DW_IRQ(x) REG_BIT(x) /* diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 994af591a2e8..1b101edb838b 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -12,9 +12,13 @@ #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_PER_CTX_PTR (0x12 + 1) +#define CTX_CS_INDIRECT_CTX (0x14 + 1) +#define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1) #define CTX_TIMESTAMP (0x22 + 1) #define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) +#define CTX_ACC_CTR_THOLD (0x2a + 1) +#define CTX_ASID (0x2e + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) @@ -36,4 +40,7 @@ #define INDIRECT_CTX_RING_START_UDW (0x08 + 1) #define INDIRECT_CTX_RING_CTL (0x0a + 1) +#define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6) +#define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index 5394a1373a6b..ef2bf984723f 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -40,6 +40,7 @@ #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) #define PWR_LIM_VAL REG_GENMASK(14, 0) #define PWR_LIM_EN REG_BIT(15) +#define PWR_LIM REG_GENMASK(15, 0) #define PWR_LIM_TIME REG_GENMASK(23, 17) #define PWR_LIM_TIME_X REG_GENMASK(23, 22) #define PWR_LIM_TIME_Y REG_GENMASK(21, 17) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index a79ad2da070c..e693a50706f8 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -97,4 +97,7 @@ #define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) #define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) +#define OAM_COMPRESSION_T3_CONTROL XE_REG(0x1c2e00) +#define OAM_LAT_MEASURE_ENABLE REG_BIT(4) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h index c556a04670ee..fb097607b86c 100644 --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -18,12 +18,10 @@ #define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) #define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) -#define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) #define BMG_FAN_1_SPEED XE_REG(0x138140) #define BMG_FAN_2_SPEED XE_REG(0x138170) #define BMG_FAN_3_SPEED XE_REG(0x1381a0) #define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) #define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) -#define BMG_PLATFORM_ENERGY_STATUS XE_REG(0x138458) #endif /* _XE_PCODE_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index f45abcd96ba8..2995d72c3f78 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -5,11 +5,16 @@ #ifndef _XE_PMT_H_ #define _XE_PMT_H_ -#define SOC_BASE 0x280000 +#include "xe_regs.h" #define BMG_PMT_BASE_OFFSET 0xDB000 #define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) +#define PUNIT_TELEMETRY_GUID XE_REG(BMG_DISCOVERY_OFFSET + 0x4) +#define BMG_ENERGY_STATUS_PMT_OFFSET (0x30) +#define ENERGY_PKG REG_GENMASK64(31, 0) +#define ENERGY_CARD REG_GENMASK64(63, 32) + #define BMG_TELEMETRY_BASE_OFFSET 0xE0000 #define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 3abb17d2ca33..1926b4044314 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,6 +7,8 @@ #include "regs/xe_reg_defs.h" +#define SOC_BASE 0x280000 + #define GU_CNTL_PROTECTED XE_REG(0x10100C) #define DRIVERINT_FLR_DIS REG_BIT(31) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 378dcd0fb414..bb469096d072 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -106,7 +106,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Check last CCS value, or at least last value in page. */ - offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); + offset = xe_device_ccs_bytes(tile_to_xe(tile), xe_bo_size(bo)); offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; if (cpu_map[offset] != get_val) { KUNIT_FAIL(test, @@ -514,9 +514,9 @@ static int shrink_test_run_device(struct xe_device *xe) * other way around, they may not be subject to swapping... */ if (alloced < purgeable) { - xe_ttm_tt_account_subtract(&xe_tt->ttm); + xe_ttm_tt_account_subtract(xe, &xe_tt->ttm); xe_tt->purgeable = true; - xe_ttm_tt_account_add(&xe_tt->ttm); + xe_ttm_tt_account_add(xe, &xe_tt->ttm); bo->ttm.priority = 0; spin_lock(&bo->ttm.bdev->lru_lock); ttm_bo_move_to_lru_tail(&bo->ttm); diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c deleted file mode 100644 index b683585db852..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 AND MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include <kunit/test.h> - -#include "xe_device.h" -#include "xe_kunit_helpers.h" -#include "xe_pci_test.h" - -static int pf_service_test_init(struct kunit *test) -{ - struct xe_pci_fake_data fake = { - .sriov_mode = XE_SRIOV_MODE_PF, - .platform = XE_TIGERLAKE, /* some random platform */ - .subplatform = XE_SUBPLATFORM_NONE, - }; - struct xe_device *xe; - struct xe_gt *gt; - - test->priv = &fake; - xe_kunit_helper_xe_device_test_init(test); - - xe = test->priv; - KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); - - gt = xe_device_get_gt(xe, 0); - pf_init_versions(gt); - - /* - * sanity check: - * - all supported platforms VF/PF ABI versions must be defined - * - base version can't be newer than latest - */ - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.latest.major); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor, - gt->sriov.pf.service.version.latest.minor); - - test->priv = gt; - return 0; -} - -static void pf_negotiate_any(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY, - VF2PF_HANDSHAKE_MINOR_ANY, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_base_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor); -} - -static void pf_negotiate_base_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major); - if (major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.base.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor - 1, - &major, &minor)); -} - -static void pf_negotiate_base_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major - 1, 1, - &major, &minor)); -} - -static void pf_negotiate_latest_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.latest.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor - 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1); -} - -static void pf_negotiate_latest_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - kunit_skip(test, "no prev major"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major - 1, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); -} - -static struct kunit_case pf_service_test_cases[] = { - KUNIT_CASE(pf_negotiate_any), - KUNIT_CASE(pf_negotiate_base_match), - KUNIT_CASE(pf_negotiate_base_newer), - KUNIT_CASE(pf_negotiate_base_next), - KUNIT_CASE(pf_negotiate_base_older), - KUNIT_CASE(pf_negotiate_base_prev), - KUNIT_CASE(pf_negotiate_latest_match), - KUNIT_CASE(pf_negotiate_latest_newer), - KUNIT_CASE(pf_negotiate_latest_next), - KUNIT_CASE(pf_negotiate_latest_older), - KUNIT_CASE(pf_negotiate_latest_prev), - {} -}; - -static struct kunit_suite pf_service_suite = { - .name = "pf_service", - .test_cases = pf_service_test_cases, - .init = pf_service_test_init, -}; - -kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c index 6faffcd74869..d266882adc0e 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -32,7 +32,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * bo->tile = tile; bo->ttm.bdev = &xe->ttm; - bo->size = size; + bo->ttm.base.size = size; iosys_map_set_vaddr(&bo->vmap, buf); if (flags & XE_BO_FLAG_GGTT) { @@ -42,10 +42,8 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo->ggtt_node[tile->id]); KUNIT_ASSERT_EQ(test, 0, - drm_mm_insert_node_in_range(&ggtt->mm, - &bo->ggtt_node[tile->id]->base, - bo->size, SZ_4K, - 0, 0, U64_MAX, 0)); + xe_ggtt_node_insert(bo->ggtt_node[tile->id], + xe_bo_size(bo), SZ_4K)); } return bo; @@ -67,8 +65,9 @@ static int guc_buf_test_init(struct kunit *test) ggtt = xe_device_get_root_tile(test->priv)->mem.ggtt; guc = &xe_device_get_gt(test->priv, 0)->uc.guc; - drm_mm_init(&ggtt->mm, DUT_GGTT_START, DUT_GGTT_SIZE); - mutex_init(&ggtt->lock); + KUNIT_ASSERT_EQ(test, 0, + xe_ggtt_init_kunit(ggtt, DUT_GGTT_START, + DUT_GGTT_START + DUT_GGTT_SIZE)); kunit_activate_static_stub(test, xe_managed_bo_create_pin_map, replacement_xe_managed_bo_create_pin_map); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4a65e3103f77..edd1e701aa1c 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -74,13 +74,13 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, { struct xe_device *xe = tile_to_xe(m->tile); u64 retval, expected = 0; - bool big = bo->size >= SZ_2M; + bool big = xe_bo_size(bo) >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; int err; struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL, - bo->size, + xe_bo_size(bo), ttm_bo_type_kernel, region | XE_BO_FLAG_NEEDS_CPU_ACCESS | @@ -105,7 +105,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, goto out_unlock; } - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); fence = xe_migrate_clear(m, remote, remote->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : @@ -113,15 +113,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "remote first offset should be cleared", test); - retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(remote) - 8, u64); check(retval, expected, "remote last offset should be cleared", test); } dma_fence_put(fence); /* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */ - xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xc0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xd0, xe_bo_size(bo)); expected = 0xc0c0c0c0c0c0c0c0; fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, @@ -131,15 +131,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &bo->vmap, 0, u64); check(retval, expected, "remote -> vram bo first offset should be copied", test); - retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &bo->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "remote -> vram bo offset should be copied", test); } dma_fence_put(fence); /* And other way around.. slightly hacky.. */ - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xc0, xe_bo_size(bo)); fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, remote->ttm.resource, false); @@ -148,7 +148,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "vram -> remote bo first offset should be copied", test); - retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "vram -> remote bo last offset should be copied", test); } @@ -245,9 +245,9 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) if (m->q->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) - xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + xe_res_first(pt->ttm.resource, 0, xe_bo_size(pt), &src_it); else - xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); + xe_res_first_sg(xe_bo_sg(pt), 0, xe_bo_size(pt), &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, &src_it, XE_PAGE_SIZE, pt->ttm.resource); @@ -276,7 +276,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a small bo */ kunit_info(test, "Clearing small buffer object\n"); - xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); + xe_map_memset(xe, &tiny->vmap, 0, 0x22, xe_bo_size(tiny)); expected = 0; fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -286,7 +286,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &tiny->vmap, 0, u32); check(retval, expected, "Command clear small first value", test); - retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); + retval = xe_map_rd(xe, &tiny->vmap, xe_bo_size(tiny) - 4, u32); check(retval, expected, "Command clear small last value", test); kunit_info(test, "Copying small buffer object to system\n"); @@ -298,7 +298,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); - xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); + xe_map_memset(xe, &big->vmap, 0, 0x11, xe_bo_size(big)); expected = 0; fence = xe_migrate_clear(m, big, big->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -308,7 +308,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &big->vmap, 0, u32); check(retval, expected, "Command clear big first value", test); - retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); + retval = xe_map_rd(xe, &big->vmap, xe_bo_size(big) - 4, u32); check(retval, expected, "Command clear big last value", test); kunit_info(test, "Copying big buffer object to system\n"); @@ -370,7 +370,7 @@ static struct dma_fence *blt_copy(struct xe_tile *tile, struct xe_migrate *m = tile->migrate; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it; struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource; u64 src_L0_ofs, dst_L0_ofs; @@ -498,7 +498,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, long ret; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -523,7 +523,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Clear evicted vram data first value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Clear evicted vram data last value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -532,7 +532,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -562,7 +562,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Restored value must be equal to initial value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Restored value must be equal to initial value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -570,7 +570,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -583,7 +583,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, u64 expected, retval; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -597,7 +597,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); } dma_fence_put(fence); @@ -615,7 +615,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear main buffer first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear main buffer last value", test); } dma_fence_put(fence); @@ -625,7 +625,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear ccs data last value", test); } dma_fence_put(fence); diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 1d3e2e50c355..9c715e59f030 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,49 +12,79 @@ #include <kunit/test-bug.h> #include <kunit/visibility.h> +static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) +{ + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s", + param->verx100 / 100, param->verx100 % 100, param->name); +} + +KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); +KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); + +static void xe_pci_id_kunit_desc(const struct pci_device_id *param, char *desc) +{ + const struct xe_device_desc *dev_desc = + (const struct xe_device_desc *)param->driver_data; + + if (dev_desc) + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "0x%X (%s)", + param->device, dev_desc->platform_name); +} + +KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); + /** - * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs - * @xe_fn: Function to call for each device. + * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * - * This function iterates over the descriptors for all graphics IPs recognized - * by the driver and calls @xe_fn: for each one of them. + * This function prepares struct xe_ip parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters */ -void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn) +const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc) { - const struct xe_graphics_desc *desc, *last = NULL; - - for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) { - desc = graphics_ips[i].desc; - if (desc == last) - continue; - - xe_fn(desc); - last = desc; - } + return graphics_ip_gen_params(prev, desc); } -EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip); +EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); /** - * xe_call_for_each_media_ip - Iterate over all recognized media IPs - * @xe_fn: Function to call for each device. + * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct xe_ip parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. * - * This function iterates over the descriptors for all media IPs recognized - * by the driver and calls @xe_fn: for each one of them. + * Return: pointer to the next parameter or NULL if no more parameters */ -void xe_call_for_each_media_ip(xe_media_fn xe_fn) +const void *xe_pci_media_ip_gen_param(const void *prev, char *desc) { - const struct xe_media_desc *desc, *last = NULL; + return media_ip_gen_params(prev, desc); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); - for (int i = 0; i < ARRAY_SIZE(media_ips); i++) { - desc = media_ips[i].desc; - if (desc == last) - continue; +/** + * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct pci_device_id parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_id_gen_param(const void *prev, char *desc) +{ + const struct pci_device_id *pci = pci_id_gen_params(prev, desc); - xe_fn(desc); - last = desc; - } + return pci->driver_data ? pci : NULL; } -EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip); +EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param); static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 744a37583d2d..37b344df2dc3 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -14,9 +14,10 @@ #include "xe_pci_test.h" #include "xe_pci_types.h" -static void check_graphics_ip(const struct xe_graphics_desc *graphics) +static void check_graphics_ip(struct kunit *test) { - struct kunit *test = kunit_get_current_test(); + const struct xe_ip *param = test->param_value; + const struct xe_graphics_desc *graphics = param->desc; u64 mask = graphics->hw_engine_mask; /* RCS, CCS, and BCS engines are allowed on the graphics IP */ @@ -28,9 +29,10 @@ static void check_graphics_ip(const struct xe_graphics_desc *graphics) KUNIT_ASSERT_EQ(test, mask, 0); } -static void check_media_ip(const struct xe_media_desc *media) +static void check_media_ip(struct kunit *test) { - struct kunit *test = kunit_get_current_test(); + const struct xe_ip *param = test->param_value; + const struct xe_media_desc *media = param->desc; u64 mask = media->hw_engine_mask; /* VCS, VECS and GSCCS engines are allowed on the media IP */ @@ -42,19 +44,21 @@ static void check_media_ip(const struct xe_media_desc *media) KUNIT_ASSERT_EQ(test, mask, 0); } -static void xe_gmdid_graphics_ip(struct kunit *test) +static void check_platform_gt_count(struct kunit *test) { - xe_call_for_each_graphics_ip(check_graphics_ip); -} + const struct pci_device_id *pci = test->param_value; + const struct xe_device_desc *desc = + (const struct xe_device_desc *)pci->driver_data; + int max_gt = desc->max_gt_per_tile; -static void xe_gmdid_media_ip(struct kunit *test) -{ - xe_call_for_each_media_ip(check_media_ip); + KUNIT_ASSERT_GT(test, max_gt, 0); + KUNIT_ASSERT_LE(test, max_gt, XE_MAX_GT_PER_TILE); } static struct kunit_case xe_pci_tests[] = { - KUNIT_CASE(xe_gmdid_graphics_ip), - KUNIT_CASE(xe_gmdid_media_ip), + KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), + KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), + KUNIT_CASE_PARAM(check_platform_gt_count, xe_pci_id_gen_param), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index ede46800aff1..ce4d2b86b778 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -12,15 +12,6 @@ #include "xe_sriov_types.h" struct xe_device; -struct xe_graphics_desc; -struct xe_media_desc; - -typedef int (*xe_device_fn)(struct xe_device *); -typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); -typedef void (*xe_media_fn)(const struct xe_media_desc *); - -void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); -void xe_call_for_each_media_ip(xe_media_fn xe_fn); struct xe_pci_fake_data { enum xe_sriov_mode sriov_mode; @@ -34,6 +25,9 @@ struct xe_pci_fake_data { int xe_pci_fake_device_init(struct xe_device *xe); +const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc); +const void *xe_pci_media_ip_gen_param(const void *prev, char *desc); +const void *xe_pci_id_gen_param(const void *prev, char *desc); const void *xe_pci_live_device_gen_param(const void *prev, char *desc); #endif diff --git a/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c new file mode 100644 index 000000000000..ba95e29b597d --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024-2025 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +static int pf_service_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + xe_sriov_pf_service_init(xe); + /* + * sanity check: + * - all supported platforms VF/PF ABI versions must be defined + * - base version can't be newer than latest + */ + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.latest.major); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.minor, + xe->sriov.pf.service.version.latest.minor); + return 0; +} + +static void pf_negotiate_any(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, VF2PF_HANDSHAKE_MAJOR_ANY, + VF2PF_HANDSHAKE_MINOR_ANY, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_base_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.base.minor); +} + +static void pf_negotiate_base_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_GE(test, minor, xe->sriov.pf.service.version.base.minor); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_LE(test, major, xe->sriov.pf.service.version.latest.major); + if (major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.base.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor - 1, + &major, &minor)); +} + +static void pf_negotiate_base_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major - 1, 1, + &major, &minor)); +} + +static void pf_negotiate_latest_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.latest.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor - 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor - 1); +} + +static void pf_negotiate_latest_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + kunit_skip(test, "no prev major"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major - 1, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major - 1); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); +} + +static struct kunit_case pf_service_test_cases[] = { + KUNIT_CASE(pf_negotiate_any), + KUNIT_CASE(pf_negotiate_base_match), + KUNIT_CASE(pf_negotiate_base_newer), + KUNIT_CASE(pf_negotiate_base_next), + KUNIT_CASE(pf_negotiate_base_older), + KUNIT_CASE(pf_negotiate_base_prev), + KUNIT_CASE(pf_negotiate_latest_match), + KUNIT_CASE(pf_negotiate_latest_newer), + KUNIT_CASE(pf_negotiate_latest_next), + KUNIT_CASE(pf_negotiate_latest_older), + KUNIT_CASE(pf_negotiate_latest_prev), + {} +}; + +static struct kunit_suite pf_service_suite = { + .name = "pf_service", + .test_cases = pf_service_test_cases, + .init = pf_service_test_init, +}; + +kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 9570672fce33..5ce0e26822f2 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -19,7 +19,7 @@ static int bb_prefetch(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt)) /* * RCS and CCS require 1K, although other engines would be * okay with 512. diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index fafacd73dcc3..b5cc65506696 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -14,7 +14,7 @@ struct xe_gt; struct xe_exec_queue; struct xe_sched_job; -struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 7aa2c17825da..18f27da47a36 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -19,6 +19,8 @@ #include <kunit/static_stub.h> +#include <trace/events/gpu_mem.h> + #include "xe_device.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" @@ -336,15 +338,13 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, /* struct xe_ttm_tt - Subclassed ttm_tt for xe */ struct xe_ttm_tt { struct ttm_tt ttm; - /** @xe - The xe device */ - struct xe_device *xe; struct sg_table sgt; struct sg_table *sg; /** @purgeable: Whether the content of the pages of @ttm is purgeable. */ bool purgeable; }; -static int xe_tt_map_sg(struct ttm_tt *tt) +static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); unsigned long num_pages = tt->num_pages; @@ -359,13 +359,13 @@ static int xe_tt_map_sg(struct ttm_tt *tt) ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages, num_pages, 0, (u64)num_pages << PAGE_SHIFT, - xe_sg_segment_size(xe_tt->xe->drm.dev), + xe_sg_segment_size(xe->drm.dev), GFP_KERNEL); if (ret) return ret; xe_tt->sg = &xe_tt->sgt; - ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, + ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); if (ret) { sg_free_table(xe_tt->sg); @@ -376,12 +376,12 @@ static int xe_tt_map_sg(struct ttm_tt *tt) return 0; } -static void xe_tt_unmap_sg(struct ttm_tt *tt) +static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); if (xe_tt->sg) { - dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, + dma_unmap_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, 0); sg_free_table(xe_tt->sg); xe_tt->sg = NULL; @@ -400,24 +400,37 @@ struct sg_table *xe_bo_sg(struct xe_bo *bo) * Account ttm pages against the device shrinker's shrinkable and * purgeable counts. */ -static void xe_ttm_tt_account_add(struct ttm_tt *tt) +static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); if (xe_tt->purgeable) - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, tt->num_pages); + xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages); else - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, tt->num_pages, 0); + xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0); } -static void xe_ttm_tt_account_subtract(struct ttm_tt *tt) +static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); if (xe_tt->purgeable) - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, -(long)tt->num_pages); + xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages); else - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -(long)tt->num_pages, 0); + xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0); +} + +static void update_global_total_pages(struct ttm_device *ttm_dev, + long num_pages) +{ +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + struct xe_device *xe = ttm_to_xe_device(ttm_dev); + u64 global_total_pages = + atomic64_add_return(num_pages, &xe->global_total_pages); + + trace_gpu_mem_total(xe->drm.primary->index, 0, + global_total_pages << PAGE_SHIFT); +#endif } static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, @@ -436,11 +449,10 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, return NULL; tt = &xe_tt->ttm; - xe_tt->xe = xe; extra_pages = 0; if (xe_bo_needs_ccs_pages(bo)) - extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), + extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)), PAGE_SIZE); /* @@ -527,21 +539,25 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, return err; xe_tt->purgeable = false; - xe_ttm_tt_account_add(tt); + xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt); + update_global_total_pages(ttm_dev, tt->num_pages); return 0; } static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) { + struct xe_device *xe = ttm_to_xe_device(ttm_dev); + if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) && !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE)) return; - xe_tt_unmap_sg(tt); + xe_tt_unmap_sg(xe, tt); ttm_pool_free(&ttm_dev->pool, tt); - xe_ttm_tt_account_subtract(tt); + xe_ttm_tt_account_subtract(xe, tt); + update_global_total_pages(ttm_dev, -(long)tt->num_pages); } static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) @@ -789,7 +805,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { if (new_mem->mem_type == XE_PL_TT) - ret = xe_tt_map_sg(ttm); + ret = xe_tt_map_sg(xe, ttm); if (!ret) ttm_bo_move_null(ttm_bo, new_mem); goto out; @@ -812,7 +828,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, (!ttm && ttm_bo->type == ttm_bo_type_device); if (new_mem->mem_type == XE_PL_TT) { - ret = xe_tt_map_sg(ttm); + ret = xe_tt_map_sg(xe, ttm); if (ret) goto out; } @@ -958,7 +974,7 @@ out: if (timeout < 0) ret = timeout; - xe_tt_unmap_sg(ttm_bo->ttm); + xe_tt_unmap_sg(xe, ttm_bo->ttm); } return ret; @@ -968,6 +984,7 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, unsigned long *scanned) { + struct xe_device *xe = ttm_to_xe_device(bo->bdev); long lret; /* Fake move to system, without copying data. */ @@ -982,7 +999,7 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, if (lret) return lret; - xe_tt_unmap_sg(bo->ttm); + xe_tt_unmap_sg(xe, bo->ttm); ttm_bo_move_null(bo, new_resource); } @@ -993,7 +1010,7 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, .allow_move = false}); if (lret > 0) - xe_ttm_tt_account_subtract(bo->ttm); + xe_ttm_tt_account_subtract(xe, bo->ttm); return lret; } @@ -1043,7 +1060,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); struct ttm_place place = {.mem_type = bo->resource->mem_type}; struct xe_bo *xe_bo = ttm_to_xe_bo(bo); - struct xe_device *xe = xe_tt->xe; + struct xe_device *xe = ttm_to_xe_device(bo->bdev); bool needs_rpm; long lret = 0L; @@ -1080,7 +1097,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, xe_pm_runtime_put(xe); if (lret > 0) - xe_ttm_tt_account_subtract(tt); + xe_ttm_tt_account_subtract(xe, tt); out_unref: xe_bo_put(xe_bo); @@ -1122,7 +1139,7 @@ int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) goto out_unlock_bo; - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED); @@ -1200,7 +1217,8 @@ int xe_bo_evict_pinned(struct xe_bo *bo) goto out_unlock_bo; if (!backup) { - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, + NULL, xe_bo_size(bo), DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED); @@ -1254,7 +1272,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo) } xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, - bo->size); + xe_bo_size(bo)); } if (!bo->backup_obj) @@ -1347,7 +1365,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) } xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr, - bo->size); + xe_bo_size(bo)); } bo->backup_obj = NULL; @@ -1381,7 +1399,8 @@ int xe_bo_dma_unmap_pinned(struct xe_bo *bo) ttm_bo->sg = NULL; xe_tt->sg = NULL; } else if (xe_tt->sg) { - dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, + dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev, + xe_tt->sg, DMA_BIDIRECTIONAL, 0); sg_free_table(xe_tt->sg); xe_tt->sg = NULL; @@ -1557,7 +1576,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, vram = res_to_mem_region(ttm_bo->resource); xe_res_first(ttm_bo->resource, offset & PAGE_MASK, - bo->size - (offset & PAGE_MASK), &cursor); + xe_bo_size(bo) - (offset & PAGE_MASK), &cursor); do { unsigned long page_offset = (offset & ~PAGE_MASK); @@ -1857,7 +1876,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->ccs_cleared = false; bo->tile = tile; - bo->size = size; bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; @@ -2035,7 +2053,7 @@ __xe_bo_create_locked(struct xe_device *xe, if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, - start + bo->size, U64_MAX); + start + xe_bo_size(bo), U64_MAX); } else { err = xe_ggtt_insert_bo(t->mem.ggtt, bo); } @@ -2156,21 +2174,6 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); } -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags) -{ - struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - type, flags); - if (IS_ERR(bo)) - return bo; - - xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); - - return bo; -} - static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); @@ -2233,7 +2236,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str xe_assert(xe, !(*src)->vmap.is_iomem); bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, - (*src)->size, dst_flags); + xe_bo_size(*src), dst_flags); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -2293,7 +2296,7 @@ int xe_bo_pin_external(struct xe_bo *bo) ttm_bo_pin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_subtract(bo->ttm.ttm); + xe_ttm_tt_account_subtract(xe, bo->ttm.ttm); /* * FIXME: If we always use the reserve / unreserve functions for locking @@ -2341,7 +2344,7 @@ int xe_bo_pin(struct xe_bo *bo) ttm_bo_pin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_subtract(bo->ttm.ttm); + xe_ttm_tt_account_subtract(xe, bo->ttm.ttm); /* * FIXME: If we always use the reserve / unreserve functions for locking @@ -2377,7 +2380,7 @@ void xe_bo_unpin_external(struct xe_bo *bo) ttm_bo_unpin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_add(bo->ttm.ttm); + xe_ttm_tt_account_add(xe, bo->ttm.ttm); /* * FIXME: If we always use the reserve / unreserve functions for locking @@ -2409,7 +2412,7 @@ void xe_bo_unpin(struct xe_bo *bo) } ttm_bo_unpin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_add(bo->ttm.ttm); + xe_ttm_tt_account_add(xe, bo->ttm.ttm); } /** @@ -2523,7 +2526,7 @@ int xe_bo_vmap(struct xe_bo *bo) * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap * to use struct iosys_map. */ - ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); + ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap); if (ret) return ret; @@ -2992,6 +2995,14 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM)) return false; + /* + * Compression implies coh_none, therefore we know for sure that WB + * memory can't currently use compression, which is likely one of the + * common cases. + */ + if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB) + return false; + return true; } @@ -3067,7 +3078,7 @@ void xe_bo_put(struct xe_bo *bo) #endif for_each_tile(tile, xe_bo_device(bo), id) if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt) - might_lock(&bo->ggtt_node[id]->ggtt->lock); + xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt); drm_gem_object_put(&bo->ttm.base); } } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 02ada1fb8a23..02e8cde4c6b2 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -118,9 +118,6 @@ struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, size_t size, u64 offset, enum ttm_bo_type type, u32 flags, u64 alignment); -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, size_t size, u32 flags); struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, @@ -238,6 +235,19 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) return xe_bo_addr(bo, 0, page_size); } +/** + * xe_bo_size() - Xe BO size + * @bo: The bo object. + * + * Simple helper to return Xe BO's size. + * + * Return: Xe BO's size + */ +static inline size_t xe_bo_size(struct xe_bo *bo) +{ + return bo->ttm.base.size; +} + static inline u32 __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) { @@ -246,7 +256,7 @@ __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) if (XE_WARN_ON(!ggtt_node)) return 0; - XE_WARN_ON(ggtt_node->base.size > bo->size); + XE_WARN_ON(ggtt_node->base.size > xe_bo_size(bo)); XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); return ggtt_node->base.start; } @@ -300,7 +310,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo); static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) { - return PAGE_ALIGN(bo->ttm.base.size); + return PAGE_ALIGN(xe_bo_size(bo)); } static inline bool xe_bo_has_pages(struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index ed3746d32b27..7484ce55a303 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -197,9 +197,7 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) continue; - mutex_lock(&tile->mem.ggtt->lock); - xe_ggtt_map_bo(tile->mem.ggtt, bo); - mutex_unlock(&tile->mem.ggtt->lock); + xe_ggtt_map_bo_unlocked(tile->mem.ggtt, bo); } } diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index eb5e83c5f233..ff560d82496f 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -32,8 +32,6 @@ struct xe_bo { struct xe_bo *backup_obj; /** @parent_obj: Ref to parent bo if this a backup_obj */ struct xe_bo *parent_obj; - /** @size: Size of this buffer object */ - size_t size; /** @flags: flags for this buffer object */ u32 flags; /** @vm: VM this BO is attached to, for extobj this will be NULL */ @@ -86,7 +84,7 @@ struct xe_bo { u16 cpu_caching; /** @devmem_allocation: SVM device memory allocation */ - struct drm_gpusvm_devmem devmem_allocation; + struct drm_pagemap_devmem devmem_allocation; /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ struct list_head vram_userfault_link; diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index cb9f175c89a1..8ec1ff1e4e80 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -3,14 +3,19 @@ * Copyright © 2025 Intel Corporation */ +#include <linux/bitops.h> #include <linux/configfs.h> +#include <linux/find.h> #include <linux/init.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/string.h> #include "xe_configfs.h" #include "xe_module.h" +#include "xe_hw_engine_types.h" + /** * DOC: Xe Configfs * @@ -48,6 +53,30 @@ * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported) * + * Allowed engines: + * ---------------- + * + * Allow only a set of engine(s) to be available, disabling the other engines + * even if they are available in hardware. This is applied after HW fuses are + * considered on each tile. Examples: + * + * Allow only one render and one copy engines, nothing else:: + * + * # echo 'rcs0,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed + * + * Allow only compute engines and first copy engine:: + * + * # echo 'ccs*,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed + * + * Note that the engine names are the per-GT hardware names. On multi-tile + * platforms, writing ``rcs0,bcs0`` to this file would allow the first render + * and copy engines on each tile. + * + * The requested configuration may not be supported by the platform and driver + * may fail to probe. For example: if at least one copy engine is expected to be + * available for migrations, but it's disabled. This is intended for debugging + * purposes only. + * * Remove devices * ============== * @@ -60,11 +89,30 @@ struct xe_config_device { struct config_group group; bool survivability_mode; + u64 engines_allowed; /* protects attributes */ struct mutex lock; }; +struct engine_info { + const char *cls; + u64 mask; +}; + +/* Some helpful macros to aid on the sizing of buffer allocation when parsing */ +#define MAX_ENGINE_CLASS_CHARS 5 +#define MAX_ENGINE_INSTANCE_CHARS 2 + +static const struct engine_info engine_info[] = { + { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK }, + { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK }, + { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK }, + { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK }, + { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK }, + { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK }, +}; + static struct xe_config_device *to_xe_config_device(struct config_item *item) { return container_of(to_config_group(item), struct xe_config_device, group); @@ -94,10 +142,96 @@ static ssize_t survivability_mode_store(struct config_item *item, const char *pa return len; } +static ssize_t engines_allowed_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + char *p = page; + + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { + u64 mask = engine_info[i].mask; + + if ((dev->engines_allowed & mask) == mask) { + p += sprintf(p, "%s*\n", engine_info[i].cls); + } else if (mask & dev->engines_allowed) { + u16 bit0 = __ffs64(mask), bit; + + mask &= dev->engines_allowed; + + for_each_set_bit(bit, (const unsigned long *)&mask, 64) + p += sprintf(p, "%s%u\n", engine_info[i].cls, + bit - bit0); + } + } + + return p - page; +} + +static bool lookup_engine_mask(const char *pattern, u64 *mask) +{ + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { + u8 instance; + u16 bit; + + if (!str_has_prefix(pattern, engine_info[i].cls)) + continue; + + pattern += strlen(engine_info[i].cls); + + if (!strcmp(pattern, "*")) { + *mask = engine_info[i].mask; + return true; + } + + if (kstrtou8(pattern, 10, &instance)) + return false; + + bit = __ffs64(engine_info[i].mask) + instance; + if (bit >= fls64(engine_info[i].mask)) + return false; + + *mask = BIT_ULL(bit); + return true; + } + + return false; +} + +static ssize_t engines_allowed_store(struct config_item *item, const char *page, + size_t len) +{ + struct xe_config_device *dev = to_xe_config_device(item); + size_t patternlen, p; + u64 mask, val = 0; + + for (p = 0; p < len; p += patternlen + 1) { + char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; + + patternlen = strcspn(page + p, ",\n"); + if (patternlen >= sizeof(buf)) + return -EINVAL; + + memcpy(buf, page + p, patternlen); + buf[patternlen] = '\0'; + + if (!lookup_engine_mask(buf, &mask)) + return -EINVAL; + + val |= mask; + } + + mutex_lock(&dev->lock); + dev->engines_allowed = val; + mutex_unlock(&dev->lock); + + return len; +} + CONFIGFS_ATTR(, survivability_mode); +CONFIGFS_ATTR(, engines_allowed); static struct configfs_attribute *xe_config_device_attrs[] = { &attr_survivability_mode, + &attr_engines_allowed, NULL, }; @@ -139,6 +273,9 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro if (!dev) return ERR_PTR(-ENOMEM); + /* Default values */ + dev->engines_allowed = U64_MAX; + config_group_init_type_name(&dev->group, name, &xe_config_device_type); mutex_init(&dev->lock); @@ -226,6 +363,29 @@ void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) config_item_put(&dev->group.cg_item); } +/** + * xe_configfs_get_engines_allowed - get engine allowed mask from configfs + * @pdev: pci device + * + * Find the configfs group that belongs to the pci device and return + * the mask of engines allowed to be used. + * + * Return: engine mask with allowed engines + */ +u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) +{ + struct xe_config_device *dev = configfs_find_group(pdev); + u64 engines_allowed; + + if (!dev) + return U64_MAX; + + engines_allowed = dev->engines_allowed; + config_item_put(&dev->group.cg_item); + + return engines_allowed; +} + int __init xe_configfs_init(void) { struct config_group *root = &xe_configfs.su_group; diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index d7d041ec2611..fb8764008089 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -5,6 +5,7 @@ #ifndef _XE_CONFIGFS_H_ #define _XE_CONFIGFS_H_ +#include <linux/limits.h> #include <linux/types.h> struct pci_dev; @@ -14,11 +15,13 @@ int xe_configfs_init(void); void xe_configfs_exit(void); bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); void xe_configfs_clear_survivability_mode(struct pci_dev *pdev); +u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); #else -static inline int xe_configfs_init(void) { return 0; }; -static inline void xe_configfs_exit(void) {}; -static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; }; -static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {}; +static inline int xe_configfs_init(void) { return 0; } +static inline void xe_configfs_exit(void) { } +static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; } +static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { } +static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } #endif #endif diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index d0503959a8ed..26e9d146ccbf 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -20,7 +20,9 @@ #include "xe_pm.h" #include "xe_pxp_debugfs.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" #include "xe_step.h" +#include "xe_wa.h" #ifdef CONFIG_DRM_XE_DEBUG #include "xe_bo_evict.h" @@ -82,9 +84,28 @@ static int sriov_info(struct seq_file *m, void *data) return 0; } +static int workarounds(struct xe_device *xe, struct drm_printer *p) +{ + xe_pm_runtime_get(xe); + xe_wa_device_dump(xe, p); + xe_pm_runtime_put(xe); + + return 0; +} + +static int workaround_info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + workarounds(xe, &p); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, { .name = "sriov_info", .show = sriov_info, }, + { .name = "workarounds", .show = workaround_info, }, }; static int forcewake_open(struct inode *inode, struct file *file) @@ -191,6 +212,41 @@ static const struct file_operations wedged_mode_fops = { .write = wedged_mode_set, }; +static ssize_t atomic_svm_timeslice_ms_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + char buf[32]; + int len = 0; + + len = scnprintf(buf, sizeof(buf), "%d\n", xe->atomic_svm_timeslice_ms); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t atomic_svm_timeslice_ms_set(struct file *f, + const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + u32 atomic_svm_timeslice_ms; + ssize_t ret; + + ret = kstrtouint_from_user(ubuf, size, 0, &atomic_svm_timeslice_ms); + if (ret) + return ret; + + xe->atomic_svm_timeslice_ms = atomic_svm_timeslice_ms; + + return size; +} + +static const struct file_operations atomic_svm_timeslice_ms_fops = { + .owner = THIS_MODULE, + .read = atomic_svm_timeslice_ms_show, + .write = atomic_svm_timeslice_ms_set, +}; + void xe_debugfs_register(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; @@ -211,6 +267,9 @@ void xe_debugfs_register(struct xe_device *xe) debugfs_create_file("wedged_mode", 0600, root, xe, &wedged_mode_fops); + debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe, + &atomic_svm_timeslice_ms_fops); + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { man = ttm_manager_type(bdev, mem_type); @@ -235,4 +294,7 @@ void xe_debugfs_register(struct xe_device *xe) xe_pxp_debugfs_register(xe->pxp); fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); + + if (IS_SRIOV_PF(xe)) + xe_sriov_pf_debugfs_register(xe, root); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7a8af2311318..203e3038cc81 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) #define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; u32 chunk_offset; ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) xe_pm_runtime_get(gt_to_xe(ss->gt)); mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; - } + if (offset >= ss->read.size) + goto unlock; new_chunk_position = div_u64_rem(offset, XE_DEVCOREDUMP_CHUNK_MAX, @@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, ss->read.size - offset; memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + if (pm_needed) xe_pm_runtime_put(gt_to_xe(ss->gt)); - return byte_copied; + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) @@ -313,13 +331,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_guc *guc = exec_queue_to_guc(q); - u32 adj_logical_mask = q->logical_mask; - u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - unsigned int fw_ref; bool cookie; - int i; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); @@ -335,14 +349,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { - if (adj_logical_mask & BIT(i)) { - adj_logical_mask |= width_mask << i; - i += q->width; - } else { - ++i; - } - } /* keep going if fw fails as we still want to save the memory and SW data */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index c02c4c4e9412..6dc84e4ed281 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -40,12 +40,14 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" +#include "xe_i2c.h" #include "xe_irq.h" -#include "xe_memirq.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_nvm.h" #include "xe_oa.h" #include "xe_observation.h" #include "xe_pat.h" @@ -66,6 +68,7 @@ #include "xe_wait_user_fence.h" #include "xe_wa.h" +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> static int xe_file_open(struct drm_device *dev, struct drm_file *file) @@ -402,9 +405,6 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->unordered_wq) destroy_workqueue(xe->unordered_wq); - if (!IS_ERR_OR_NULL(xe->mem.shrinker)) - xe_shrinker_destroy(xe->mem.shrinker); - if (xe->destroy_wq) destroy_workqueue(xe->destroy_wq); @@ -438,13 +438,14 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (err) goto err; - xe->mem.shrinker = xe_shrinker_create(xe); - if (IS_ERR(xe->mem.shrinker)) - return ERR_CAST(xe->mem.shrinker); + err = xe_shrinker_create(xe); + if (err) + goto err; xe->info.devid = pdev->device; xe->info.revid = pdev->revision; xe->info.force_execlist = xe_modparam.force_execlist; + xe->atomic_svm_timeslice_ms = 5; err = xe_irq_init(xe); if (err) @@ -493,10 +494,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (err) goto err; - err = xe_display_create(xe); - if (WARN_ON(err)) - goto err; - return xe; err: @@ -704,6 +701,9 @@ int xe_device_probe_early(struct xe_device *xe) { int err; + xe_wa_device_init(xe); + xe_wa_process_device_oob(xe); + err = xe_mmio_probe_early(xe); if (err) return err; @@ -789,45 +789,16 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - err = xe_ttm_sys_mgr_init(xe); - if (err) - return err; - for_each_gt(gt, xe, id) { err = xe_gt_init_early(gt); if (err) return err; - - /* - * Only after this point can GT-specific MMIO operations - * (including things like communication with the GuC) - * be performed. - */ - xe_gt_mmio_init(gt); } for_each_tile(tile, xe, id) { - if (IS_SRIOV_VF(xe)) { - xe_guc_comm_init_early(&tile->primary_gt->uc.guc); - err = xe_gt_sriov_vf_bootstrap(tile->primary_gt); - if (err) - return err; - err = xe_gt_sriov_vf_query_config(tile->primary_gt); - if (err) - return err; - } err = xe_ggtt_init_early(tile->mem.ggtt); if (err) return err; - err = xe_memirq_init(&tile->memirq); - if (err) - return err; - } - - for_each_gt(gt, xe, id) { - err = xe_gt_init_hwconfig(gt); - if (err) - return err; } err = xe_devcoredump_init(xe); @@ -855,6 +826,14 @@ int xe_device_probe(struct xe_device *xe) return err; } + /* + * Allow allocations only now to ensure xe_display_init_early() + * is the first to allocate, always. + */ + err = xe_ttm_sys_mgr_init(xe); + if (err) + return err; + /* Allocate and map stolen after potential VRAM resize */ err = xe_ttm_stolen_mgr_init(xe); if (err) @@ -886,6 +865,12 @@ int xe_device_probe(struct xe_device *xe) return err; } + if (xe->tiles->media_gt && + XE_WA(xe->tiles->media_gt, 15015404425_disable)) + XE_DEVICE_WA_DISABLE(xe, 15015404425); + + xe_nvm_init(xe); + err = xe_heci_gsc_init(xe); if (err) return err; @@ -926,6 +911,10 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; + err = xe_i2c_probe(xe); + if (err) + goto err_unregister_display; + for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); @@ -943,6 +932,8 @@ void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); + xe_nvm_fini(xe); + drm_dev_unplug(&xe->drm); xe_bo_pci_dev_remove_all(xe); @@ -986,38 +977,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1027,6 +995,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1057,15 +1026,52 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (XE_WA(root_gt, 16023588340)) { + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); + tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? @@ -1168,7 +1174,8 @@ void xe_device_declare_wedged(struct xe_device *xe) /* Notify userspace of wedged device */ drm_dev_wedged_event(&xe->drm, - DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET); + DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET, + NULL); } for_each_gt(gt, xe, id) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 0bc3bc8e6803..bc802e066a7d 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -60,35 +60,32 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) return &xe->tiles[0]; } +/* + * Highest GT/tile count for any platform. Used only for memory allocation + * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT + * structures should use the per-platform xe->info.max_gt_per_tile instead. + */ #define XE_MAX_GT_PER_TILE 2 -static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) -{ - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) - gt_id = 0; - - return gt_id ? tile->media_gt : tile->primary_gt; -} - static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { - struct xe_tile *root_tile = xe_device_get_root_tile(xe); + struct xe_tile *tile; struct xe_gt *gt; - /* - * FIXME: This only works for now because multi-tile and standalone - * media are mutually exclusive on the platforms we have today. - * - * id => GT mapping may change once we settle on how we want to handle - * our UAPI. - */ - if (MEDIA_VER(xe) >= 13) { - gt = xe_tile_get_gt(root_tile, gt_id); - } else { - if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) - gt_id = 0; - - gt = xe->tiles[gt_id].primary_gt; + if (gt_id >= xe->info.tile_count * xe->info.max_gt_per_tile) + return NULL; + + tile = &xe->tiles[gt_id / xe->info.max_gt_per_tile]; + switch (gt_id % xe->info.max_gt_per_tile) { + default: + xe_assert(xe, false); + fallthrough; + case 0: + gt = tile->primary_gt; + break; + case 1: + gt = tile->media_gt; + break; } if (!gt) @@ -130,14 +127,14 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \ for_each_if((tile__) = &(xe__)->tiles[(id__)]) -/* - * FIXME: This only works for now since multi-tile and standalone media - * happen to be mutually exclusive. Future platforms may change this... - */ #define for_each_gt(gt__, xe__, id__) \ - for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \ for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) +#define for_each_gt_on_tile(gt__, tile__, id__) \ + for_each_gt((gt__), (tile__)->xe, (id__)) \ + for_each_if((gt__)->tile == (tile__)) + static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) { return >->pm.fw; @@ -195,6 +192,8 @@ void xe_device_declare_wedged(struct xe_device *xe); struct xe_file *xe_file_get(struct xe_file *xef); void xe_file_put(struct xe_file *xef); +int xe_is_injection_active(void); + /* * Occasionally it is seen that the G2H worker starts running after a delay of more than * a second even after being queued and activated by the Linux workqueue subsystem. This diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index b9440f8c781e..e5fd0cd537bc 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -24,6 +24,12 @@ * * vram_d3cold_threshold - Report/change vram used threshold(in MB) below * which vram save/restore is permissible during runtime D3cold entry/exit. + * + * lb_fan_control_version - Fan control version provisioned by late binding. + * Exposed only if supported by the device. + * + * lb_voltage_regulator_version - Voltage regulator version provisioned by late + * binding. Exposed only if supported by the device. */ static ssize_t @@ -65,6 +71,135 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RW(vram_d3cold_threshold); +static ssize_t +lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap, ver_low = FAN_TABLE, ver_high = FAN_TABLE; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version); + +static ssize_t +lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap, ver_low = VR_CONFIG, ver_high = VR_CONFIG; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); + +static int late_bind_create_files(struct device *dev) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); + if (ret) + goto out; + } + + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); +out: + xe_pm_runtime_put(xe); + + return ret; +} + +static void late_bind_remove_files(struct device *dev) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) + sysfs_remove_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); + + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + sysfs_remove_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); +out: + xe_pm_runtime_put(xe); +} + /** * DOC: PCIe Gen5 Limitations * @@ -151,8 +286,10 @@ static void xe_device_sysfs_fini(void *arg) if (xe->d3cold.capable) sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - if (xe->info.platform == XE_BATTLEMAGE) + if (xe->info.platform == XE_BATTLEMAGE) { sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); + late_bind_remove_files(xe->drm.dev); + } } int xe_device_sysfs_init(struct xe_device *xe) @@ -170,6 +307,10 @@ int xe_device_sysfs_init(struct xe_device *xe) ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); if (ret) return ret; + + ret = late_bind_create_files(dev); + if (ret) + return ret; } return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 6383a1c0d478..d4d2c6854790 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -21,7 +21,9 @@ #include "xe_platform_types.h" #include "xe_pmu_types.h" #include "xe_pt_types.h" +#include "xe_sriov_pf_types.h" #include "xe_sriov_types.h" +#include "xe_sriov_vf_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" #include "xe_ttm_vram_mgr_types.h" @@ -30,12 +32,11 @@ #define TEST_VM_OPS_ERROR #endif -#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) -#include "intel_display_core.h" -#include "intel_display_device.h" -#endif - +struct dram_info; +struct intel_display; +struct intel_dg_nvm_dev; struct xe_ggtt; +struct xe_i2c; struct xe_pat_ops; struct xe_pxp; @@ -108,7 +109,7 @@ struct xe_vram_region { void __iomem *mapping; /** @ttm: VRAM TTM manager */ struct xe_ttm_vram_mgr ttm; -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) /** @pagemap: Used to remap device memory as ZONE_DEVICE */ struct dev_pagemap pagemap; /** @@ -296,6 +297,8 @@ struct xe_device { u8 vram_flags; /** @info.tile_count: Number of tiles */ u8 tile_count; + /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */ + u8 max_gt_per_tile; /** @info.gt_count: Total number of GTs for entire device */ u8 gt_count; /** @info.vm_max_level: Max VM level */ @@ -319,6 +322,8 @@ struct xe_device { u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; + /** @info.has_gsc_nvm: Device has gsc non-volatile memory */ + u8 has_gsc_nvm:1; /** @info.has_heci_cscfi: device has heci cscfi */ u8 has_heci_cscfi:1; /** @info.has_heci_gscfi: device has heci gscfi */ @@ -360,6 +365,19 @@ struct xe_device { u8 skip_pcode:1; } info; + /** @wa_active: keep track of active workarounds */ + struct { + /** @wa_active.oob: bitmap with active OOB workarounds */ + unsigned long *oob; + + /** + * @wa_active.oob_initialized: Mark oob as initialized to help detecting misuse + * of XE_DEVICE_WA() - it can only be called on initialization after + * Device OOB WAs have been processed. + */ + bool oob_initialized; + } wa_active; + /** @survivability: survivability information for device */ struct xe_survivability survivability; @@ -406,10 +424,12 @@ struct xe_device { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; - /** @sriov.pf: PF specific data */ - struct xe_device_pf pf; - /** @sriov.vf: VF specific data */ - struct xe_device_vf vf; + union { + /** @sriov.pf: PF specific data */ + struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; + }; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; @@ -502,6 +522,10 @@ struct xe_device { const struct xe_pat_table_entry *table; /** @pat.n_entries: Number of PAT entries */ int n_entries; + /** @pat.ats_entry: PAT entry for PCIe ATS responses */ + const struct xe_pat_table_entry *pat_ats; + /** @pat.pta_entry: PAT entry for page table accesses */ + const struct xe_pat_table_entry *pat_pta; u32 idx[__XE_CACHE_LEVEL_COUNT]; } pat; @@ -548,6 +572,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @nvm: discrete graphics non-volatile memory */ + struct intel_dg_nvm_dev *nvm; + /** @oa: oa observation subsystem */ struct xe_oa oa; @@ -576,6 +603,12 @@ struct xe_device { /** @pmu: performance monitoring unit */ struct xe_pmu pmu; + /** @i2c: I2C host controller */ + struct xe_i2c *i2c; + + /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */ + u32 atomic_svm_timeslice_ms; + #ifdef TEST_VM_OPS_ERROR /** * @vm_inject_error_position: inject errors at different places in VM @@ -584,6 +617,14 @@ struct xe_device { u8 vm_inject_error_position; #endif +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + /** + * @global_total_pages: global GPU page usage tracked for gpu_mem + * tracepoints + */ + atomic64_t global_total_pages; +#endif + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) @@ -593,27 +634,9 @@ struct xe_device { * drm_i915_private during build. After cleanup these should go away, * migrating to the right sub-structs */ - struct intel_display display; - - struct dram_info { - bool wm_lv_0_adjust_needed; - u8 num_channels; - bool symmetric_memory; - enum intel_dram_type { - INTEL_DRAM_UNKNOWN, - INTEL_DRAM_DDR3, - INTEL_DRAM_DDR4, - INTEL_DRAM_LPDDR3, - INTEL_DRAM_LPDDR4, - INTEL_DRAM_DDR5, - INTEL_DRAM_LPDDR5, - INTEL_DRAM_GDDR, - INTEL_DRAM_GDDR_ECC, - __INTEL_DRAM_TYPE_MAX, - } type; - u8 num_qgv_points; - u8 num_psf_gv_points; - } dram_info; + struct intel_display *display; + + const struct dram_info *dram_info; /* * edram size in MB. diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules new file mode 100644 index 000000000000..3a0c4ccc4224 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -0,0 +1,2 @@ +15015404425 PLATFORM(LUNARLAKE) + PLATFORM(PANTHERLAKE) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 31f688e953d7..f931ff9b1ec0 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -167,7 +167,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { - u64 sz = bo->size; + u64 sz = xe_bo_size(bo); u32 mem_type = bo->ttm.resource->mem_type; xe_bo_assert_held(bo); diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index d61650d4aa0b..95242a375e54 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -9,7 +9,7 @@ #include <drm/drm_drv.h> #define DRIVER_NAME "xe" -#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DESC "Intel Xe2 Graphics" /* Interface history: * diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 96732613b4b7..af7916315ac6 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -258,11 +258,13 @@ static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, struct eu_stall_open_properties *props) { - if (value >= xe->info.gt_count) { + struct xe_gt *gt = xe_device_get_gt(xe, value); + + if (!gt) { drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); return -EINVAL; } - props->gt = xe_device_get_gt(xe, value); + props->gt = gt; return 0; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index fee22358cc09..8991b4aed440 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -610,7 +610,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) + if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) return -EINVAL; if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 8a5cba22b586..c59a9b330697 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -64,7 +64,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) { int i, j; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) init_domain(fw, XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, FORCEWAKE_ACK_RENDER); diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index ed9183599e31..6581cb0f0e59 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -18,8 +18,8 @@ " *\n" \ " * This file was generated from rules: %s\n" \ " */\n" \ - "#ifndef _GENERATED_XE_WA_OOB_\n" \ - "#define _GENERATED_XE_WA_OOB_\n" \ + "#ifndef _GENERATED_%s_\n" \ + "#define _GENERATED_%s_\n" \ "\n" \ "enum {\n" @@ -52,7 +52,7 @@ static char *strip(char *line, size_t linelen) } #define MAX_LINE_LEN 4096 -static int parse(FILE *input, FILE *csource, FILE *cheader) +static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) { char line[MAX_LINE_LEN + 1]; char *name, *prev_name = NULL, *rules; @@ -96,7 +96,7 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) } if (name) { - fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); + fprintf(cheader, "\t%s_%s = %u,\n", prefix, name, idx); /* Close previous entry before starting a new one */ if (idx) @@ -118,7 +118,33 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) if (idx) fprintf(csource, ") },\n"); - fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); + fprintf(cheader, "\t_%s_COUNT = %u\n", prefix, idx); + + return 0; +} + +static int fn_to_prefix(const char *fn, char *prefix, size_t size) +{ + size_t len; + + fn = basename(fn); + len = strlen(fn); + + if (len > size - 1) + return -ENAMETOOLONG; + + memcpy(prefix, fn, len + 1); + + for (char *p = prefix; *p; p++) { + switch (*p) { + case '.': + *p = '\0'; + return 0; + default: + *p = toupper(*p); + break; + } + } return 0; } @@ -141,6 +167,7 @@ int main(int argc, const char *argv[]) [ARGS_CHEADER] = { .fn = argv[3], .mode = "w" }, }; int ret = 1; + char prefix[128]; if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); @@ -148,6 +175,9 @@ int main(int argc, const char *argv[]) return 1; } + if (fn_to_prefix(args[ARGS_CHEADER].fn, prefix, sizeof(prefix)) < 0) + return 1; + for (int i = 0; i < _ARGS_COUNT; i++) { args[i].f = fopen(args[i].fn, args[i].mode); if (!args[i].f) { @@ -157,9 +187,10 @@ int main(int argc, const char *argv[]) } } - fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn); + fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix); + ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, - args[ARGS_CHEADER].f); + args[ARGS_CHEADER].f, prefix); if (!ret) fprintf(args[ARGS_CHEADER].f, FOOTER); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7062115909f2..29d4d3f51da1 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -5,6 +5,7 @@ #include "xe_ggtt.h" +#include <kunit/visibility.h> #include <linux/fault-inject.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/sizes.h> @@ -22,12 +23,13 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_printk.h" -#include "xe_gt_sriov_vf.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_res_cursor.h" #include "xe_sriov.h" +#include "xe_tile_sriov_vf.h" #include "xe_wa.h" #include "xe_wopcm.h" @@ -64,13 +66,9 @@ * give us the correct placement for free. */ -static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - u16 pat_index) +static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) { - u64 pte; - - pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pte |= XE_PAGE_PRESENT; + u64 pte = XE_PAGE_PRESENT; if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) pte |= XE_GGTT_PTE_DM; @@ -78,13 +76,12 @@ static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, return pte; } -static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - u16 pat_index) +static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) { struct xe_device *xe = xe_bo_device(bo); u64 pte; - pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index); + pte = xelp_ggtt_pte_flags(bo, pat_index); xe_assert(xe, pat_index <= 3); @@ -149,8 +146,9 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) xe_tile_assert(ggtt->tile, start < end); if (ggtt->scratch) - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, - pat_index); + scratch_pte = xe_bo_addr(ggtt->scratch, 0, XE_PAGE_SIZE) | + ggtt->pt_ops->pte_encode_flags(ggtt->scratch, + pat_index); else scratch_pte = 0; @@ -160,6 +158,22 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) } } +/** + * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile + * @tile: &xe_tile + * + * Allocates a &xe_ggtt for a given tile. + * + * Return: &xe_ggtt on success, or NULL when out of memory. + */ +struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile) +{ + struct xe_ggtt *ggtt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*ggtt), GFP_KERNEL); + if (ggtt) + ggtt->tile = tile; + return ggtt; +} + static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; @@ -176,6 +190,13 @@ static void ggtt_fini(void *arg) ggtt->scratch = NULL; } +#ifdef CONFIG_LOCKDEP +void xe_ggtt_might_lock(struct xe_ggtt *ggtt) +{ + might_lock(&ggtt->lock); +} +#endif + static void primelockdep(struct xe_ggtt *ggtt) { if (!IS_ENABLED(CONFIG_LOCKDEP)) @@ -187,20 +208,43 @@ static void primelockdep(struct xe_ggtt *ggtt) } static const struct xe_ggtt_pt_ops xelp_pt_ops = { - .pte_encode_bo = xelp_ggtt_pte_encode_bo, + .pte_encode_flags = xelp_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_ops = { - .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .pte_encode_flags = xelpg_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { - .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .pte_encode_flags = xelpg_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; +static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved) +{ + drm_mm_init(&ggtt->mm, reserved, + ggtt->size - reserved); + mutex_init(&ggtt->lock); + primelockdep(ggtt); +} + +int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) +{ + ggtt->size = size; + __xe_ggtt_init_early(ggtt, reserved); + return 0; +} +EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); + +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -219,7 +263,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) unsigned int gsm_size; int err; - if (IS_SRIOV_VF(xe)) + if (IS_SRIOV_VF(xe) || GRAPHICS_VERx100(xe) >= 1250) gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */ else gsm_size = probe_gsm_size(pdev); @@ -247,18 +291,18 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->pt_ops = &xelp_pt_ops; ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); - - drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), - ggtt->size - xe_wopcm_size(xe)); - mutex_init(&ggtt->lock); - primelockdep(ggtt); + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { - err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0)); + err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); if (err) return err; } @@ -377,7 +421,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) goto err; } - xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); + xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); xe_ggtt_initial_clear(ggtt); @@ -429,16 +473,17 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, } /** - * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses + * xe_ggtt_node_insert_balloon_locked - prevent allocation of specified GGTT addresses * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region * - * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. + * To be used in cases where ggtt->lock is already taken. + * Use xe_ggtt_node_remove_balloon_locked() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) +int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 end) { struct xe_ggtt *ggtt = node->ggtt; int err; @@ -447,14 +492,13 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); + lockdep_assert_held(&ggtt->lock); node->base.color = 0; node->base.start = start; node->base.size = end - start; - mutex_lock(&ggtt->lock); err = drm_mm_reserve_node(&ggtt->mm, &node->base); - mutex_unlock(&ggtt->lock); if (xe_gt_WARN(ggtt->tile->primary_gt, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", @@ -466,27 +510,72 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) } /** - * xe_ggtt_node_remove_balloon - release a reserved GGTT region + * xe_ggtt_node_remove_balloon_locked - release a reserved GGTT region * @node: the &xe_ggtt_node with reserved GGTT region * - * See xe_ggtt_node_insert_balloon() for details. + * To be used in cases where ggtt->lock is already taken. + * See xe_ggtt_node_insert_balloon_locked() for details. */ -void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) +void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node) { - if (!node || !node->ggtt) + if (!xe_ggtt_node_allocated(node)) return; - if (!drm_mm_node_allocated(&node->base)) - goto free_node; + lockdep_assert_held(&node->ggtt->lock); xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); - mutex_lock(&node->ggtt->lock); drm_mm_remove_node(&node->base); - mutex_unlock(&node->ggtt->lock); +} -free_node: - xe_ggtt_node_fini(node); +static void xe_ggtt_assert_fit(struct xe_ggtt *ggtt, u64 start, u64 size) +{ + struct xe_tile *tile = ggtt->tile; + struct xe_device *xe = tile_to_xe(tile); + u64 __maybe_unused wopcm = xe_wopcm_size(xe); + + xe_tile_assert(tile, start >= wopcm); + xe_tile_assert(tile, start + size < ggtt->size - wopcm); +} + +/** + * xe_ggtt_shift_nodes_locked - Shift GGTT nodes to adjust for a change in usable address range. + * @ggtt: the &xe_ggtt struct instance + * @shift: change to the location of area provisioned for current VF + * + * This function moves all nodes from the GGTT VM, to a temp list. These nodes are expected + * to represent allocations in range formerly assigned to current VF, before the range changed. + * When the GGTT VM is completely clear of any nodes, they are re-added with shifted offsets. + * + * The function has no ability of failing - because it shifts existing nodes, without + * any additional processing. If the nodes were successfully existing at the old address, + * they will do the same at the new one. A fail inside this function would indicate that + * the list of nodes was either already damaged, or that the shift brings the address range + * outside of valid bounds. Both cases justify an assert rather than error code. + */ +void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift) +{ + struct xe_tile *tile __maybe_unused = ggtt->tile; + struct drm_mm_node *node, *tmpn; + LIST_HEAD(temp_list_head); + + lockdep_assert_held(&ggtt->lock); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) + xe_ggtt_assert_fit(ggtt, node->start + shift, node->size); + + drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) { + drm_mm_remove_node(node); + list_add(&node->node_list, &temp_list_head); + } + + list_for_each_entry_safe(node, tmpn, &temp_list_head, node_list) { + list_del(&node->node_list); + node->start += shift; + drm_mm_reserve_node(&ggtt->mm, node); + xe_tile_assert(tile, drm_mm_node_allocated(node)); + } } /** @@ -537,12 +626,12 @@ int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) * xe_ggtt_node_init - Initialize %xe_ggtt_node struct * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. * - * This function will allocated the struct %xe_ggtt_node and return it's pointer. + * This function will allocate the struct %xe_ggtt_node and return its pointer. * This struct will then be freed after the node removal upon xe_ggtt_node_remove() - * or xe_ggtt_node_remove_balloon(). + * or xe_ggtt_node_remove_balloon_locked(). * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), - * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. + * xe_ggtt_node_insert_balloon_locked() will ensure the node is inserted or reserved in GGTT. * * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. **/ @@ -564,7 +653,7 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) * @node: the &xe_ggtt_node to be freed * * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), - * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, + * or xe_ggtt_node_insert_balloon_locked(); and this @node is not going to be reused, then, * this function needs to be called to free the %xe_ggtt_node struct **/ void xe_ggtt_node_fini(struct xe_ggtt_node *node) @@ -589,26 +678,59 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) /** * xe_ggtt_map_bo - Map the BO into GGTT * @ggtt: the &xe_ggtt where node will be mapped + * @node: the &xe_ggtt_node where this BO is mapped * @bo: the &xe_bo to be mapped + * @pat_index: Which pat_index to use. */ -void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + struct xe_bo *bo, u16 pat_index) { - u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start; - u64 offset, pte; - if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id])) + u64 start, pte, end; + struct xe_res_cursor cur; + + if (XE_WARN_ON(!node)) return; - start = bo->ggtt_node[ggtt->tile->id]->base.start; + start = node->base.start; + end = start + xe_bo_size(bo); - for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { - pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); - ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); + pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); + if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { + xe_assert(xe_bo_device(bo), bo->ttm.ttm); + + for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); + cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) + ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, + pte | xe_res_dma(&cur)); + } else { + /* Prepend GPU offset */ + pte |= vram_region_gpu_offset(bo->ttm.resource); + + for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); + cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) + ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, + pte + cur.start); } } +/** + * xe_ggtt_map_bo_unlocked - Restore a mapping of a BO into GGTT + * @ggtt: the &xe_ggtt where node will be mapped + * @bo: the &xe_bo to be mapped + * + * This is used to restore a GGTT mapping after suspend. + */ +void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; + + mutex_lock(&ggtt->lock); + xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index); + mutex_unlock(&ggtt->lock); +} + static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { @@ -621,7 +743,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (XE_WARN_ON(bo->ggtt_node[tile_id])) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); return 0; } @@ -640,12 +762,15 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, - bo->size, alignment, 0, start, end, 0); + xe_bo_size(bo), alignment, 0, start, end, 0); if (err) { xe_ggtt_node_fini(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; } else { - xe_ggtt_map_bo(ggtt, bo); + u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; + + xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index); } mutex_unlock(&ggtt->lock); @@ -698,7 +823,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); xe_ggtt_node_remove(bo->ggtt_node[tile_id], bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); @@ -841,3 +966,30 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer return total; } + +/** + * xe_ggtt_encode_pte_flags - Get PTE encoding flags for BO + * @ggtt: &xe_ggtt + * @bo: &xe_bo + * @pat_index: The pat_index for the PTE. + * + * This function returns the pte_flags for a given BO, without address. + * It's used for DPT to fill a GGTT mapped BO with a linear lookup table. + */ +u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, + struct xe_bo *bo, u16 pat_index) +{ + return ggtt->pt_ops->pte_encode_flags(bo, pat_index); +} + +/** + * xe_ggtt_read_pte - Read a PTE from the GGTT + * @ggtt: &xe_ggtt + * @offset: the offset for which the mapping should be read. + * + * Used by testcases, and by display reading out an inherited bios FB. + */ +u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) +{ + return ioread64(ggtt->gsm + (offset / XE_PAGE_SIZE)); +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 27e7d67de004..fbe1e397d05d 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -9,22 +9,28 @@ #include "xe_ggtt_types.h" struct drm_printer; +struct xe_tile; +struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile); int xe_ggtt_init_early(struct xe_ggtt *ggtt); +int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size); int xe_ggtt_init(struct xe_ggtt *ggtt); struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); void xe_ggtt_node_fini(struct xe_ggtt_node *node); -int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, - u64 start, u64 size); -void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, + u64 start, u64 size); +void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node); +void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift); int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags); void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); -void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + struct xe_bo *bo, u16 pat_index); +void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end); @@ -38,4 +44,14 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); #endif +#ifndef CONFIG_LOCKDEP +static inline void xe_ggtt_might_lock(struct xe_ggtt *ggtt) +{ } +#else +void xe_ggtt_might_lock(struct xe_ggtt *ggtt); +#endif + +u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, struct xe_bo *bo, u16 pat_index); +u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset); + #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index cb02b7994a9a..c5e999d58ff2 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -74,8 +74,8 @@ struct xe_ggtt_node { * Which can vary from platform to platform. */ struct xe_ggtt_pt_ops { - /** @pte_encode_bo: Encode PTE address for a given BO */ - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + /** @pte_encode_flags: Encode PTE flags for a given BO */ + u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); }; diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 0bcf97063ff6..1d84bf2f2cef 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -59,7 +59,8 @@ static int memcpy_fw(struct xe_gsc *gsc) xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); - xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); + xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, + xe_bo_size(gsc->private) - fw_size); kfree(storage); @@ -82,7 +83,8 @@ static int emit_gsc_upload(struct xe_gsc *gsc) bb->cs[bb->len++] = GSC_FW_LOAD; bb->cs[bb->len++] = lower_32_bits(offset); bb->cs[bb->len++] = upper_32_bits(offset); - bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; + bb->cs[bb->len++] = (xe_bo_size(gsc->private) / SZ_4K) | + GSC_FW_LOAD_LIMIT_VALID; job = xe_bb_create_job(gsc->q, bb); if (IS_ERR(job)) { diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index d0519cd6704a..464282a89eef 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -23,6 +23,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_tile.h" /* * GSC proxy: @@ -483,7 +484,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) } /* no multi-tile devices with this feature yet */ - if (tile->id > 0) { + if (!xe_tile_is_root(tile)) { xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0e5d243c9451..c8eda36546d3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -112,13 +112,13 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) if (!fw_ref) return; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); xe_force_wake_put(gt_to_fw(gt), fw_ref); } @@ -146,30 +146,23 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) static void gt_reset_worker(struct work_struct *w); -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, + long timeout_jiffies) { struct xe_sched_job *job; - struct xe_bb *bb; struct dma_fence *fence; long timeout; - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); + if (IS_ERR(job)) return PTR_ERR(job); - } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); + timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies); dma_fence_put(fence); - xe_bb_free(bb, NULL); if (timeout < 0) return timeout; else if (!timeout) @@ -178,27 +171,30 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +{ + struct xe_bb *bb; + int ret; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + ret = emit_job_sync(q, bb, HZ); + xe_bb_free(bb, NULL); + + return ret; +} + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; + int count_rmw = 0, count = 0, ret; unsigned long idx; - struct xe_sched_job *job; struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count_rmw = 0; - int count = 0; - - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); - - if (IS_ERR(bb)) - return PTR_ERR(bb); + size_t bb_len = 0; + u32 *cs; /* count RMW registers as those will be handled separately */ xa_for_each(&sr->xa, idx, entry) { @@ -208,13 +204,34 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) ++count_rmw; } - if (count || count_rmw) - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + if (count) + bb_len += count * 2 + 1; + + if (count_rmw) + bb_len += count_rmw * 20 + 7; + + if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * Big enough to emit all of the context's 3DSTATE via + * xe_lrc_emit_hwe_state_instructions() + */ + bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + + bb = xe_bb_new(gt, bb_len, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + cs = bb->cs; if (count) { - /* emit single LRI with all non RMW regs */ + /* + * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per + * reg + 1 + */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; @@ -229,79 +246,68 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) val |= entry->set_bits; - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; + *cs++ = reg.addr; + *cs++ = val; xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); } } if (count_rmw) { - /* emit MI_MATH for each RMW reg */ + /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; - bb->cs[bb->len++] = entry->reg.addr; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = entry->clr_bits; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = entry->set_bits; - - bb->cs[bb->len++] = MI_MATH(8); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1); - bb->cs[bb->len++] = CS_ALU_INSTR_AND; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2); - bb->cs[bb->len++] = CS_ALU_INSTR_OR; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = entry->reg.addr; + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + *cs++ = entry->reg.addr; + *cs++ = CS_GPR_REG(0, 0).addr; + + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = entry->clr_bits; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = entry->set_bits; + + *cs++ = MI_MATH(8); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1); + *cs++ = CS_ALU_INSTR_AND; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2); + *cs++ = CS_ALU_INSTR_OR; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = entry->reg.addr; xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", entry->reg.addr, entry->clr_bits, entry->set_bits); } /* reset used GPR */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = 0; + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = 0; } - xe_lrc_emit_hwe_state_instructions(q, bb); + cs = xe_lrc_emit_hwe_state_instructions(q, cs); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } + bb->len = cs - bb->cs; - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); + ret = emit_job_sync(q, bb, HZ); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - return 0; + return ret; } int xe_gt_record_default_lrcs(struct xe_gt *gt) @@ -363,14 +369,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) goto put_nop_q; } - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - xe_map_memcpy_from(xe, default_lrc, &q->lrc[0]->bo->vmap, xe_lrc_pphwsp_offset(q->lrc[0]), @@ -390,6 +388,7 @@ put_exec_queue: int xe_gt_init_early(struct xe_gt *gt) { + unsigned int fw_ref; int err; if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -417,6 +416,27 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; + xe_mocs_init_early(gt); + + /* + * Only after this point can GT-specific MMIO operations + * (including things like communication with the GuC) + * be performed. + */ + xe_gt_mmio_init(gt); + + err = xe_uc_init_noalloc(>->uc); + if (err) + return err; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + + xe_gt_mcr_init_early(gt); + xe_pat_init(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } @@ -431,7 +451,7 @@ static void dump_pat_on_error(struct xe_gt *gt) xe_pat_dump(gt, &p); } -static int gt_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_gt_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -440,7 +460,15 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (!fw_ref) return -ETIMEDOUT; - if (!xe_gt_is_media_type(gt)) { + err = xe_uc_init(>->uc); + if (err) + goto err_force_wake; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); + + if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; @@ -455,8 +483,10 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_gt_mcr_init(gt); err = xe_hw_engines_init_early(gt); - if (err) + if (err) { + dump_pat_on_error(gt); goto err_force_wake; + } err = xe_hw_engine_class_sysfs_init(gt); if (err) @@ -477,13 +507,12 @@ static int gt_fw_domain_init(struct xe_gt *gt) return 0; err_force_wake: - dump_pat_on_error(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } -static int all_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_all_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -516,7 +545,7 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* * USM has its only SA pool to non-block behind user operations */ @@ -532,7 +561,7 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { struct xe_tile *tile = gt_to_tile(gt); tile->migrate = xe_migrate_init(tile); @@ -542,7 +571,7 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) goto err_force_wake; @@ -552,7 +581,7 @@ static int all_fw_domain_init(struct xe_gt *gt) xe_gt_apply_ccs_mode(gt); } - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -570,39 +599,6 @@ err_force_wake: return err; } -/* - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and - * enable CTB communication. - */ -int xe_gt_init_hwconfig(struct xe_gt *gt) -{ - unsigned int fw_ref; - int err; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return -ETIMEDOUT; - - xe_gt_mcr_init_early(gt); - xe_pat_init(gt); - - err = xe_uc_init(>->uc); - if (err) - goto out_fw; - - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto out_fw; - - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - xe_gt_enable_host_l2_vram(gt); - -out_fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return err; -} - static void xe_gt_fini(void *arg) { struct xe_gt *gt = arg; @@ -630,17 +626,15 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); + err = xe_gt_sysfs_init(gt); if (err) return err; - xe_mocs_init_early(gt); - - err = xe_gt_sysfs_init(gt); + err = gt_init_with_gt_forcewake(gt); if (err) return err; - err = gt_fw_domain_init(gt); + err = xe_gt_pagefault_init(gt); if (err) return err; @@ -654,7 +648,7 @@ int xe_gt_init(struct xe_gt *gt) xe_force_wake_init_engines(gt, gt_to_fw(gt)); - err = all_fw_domain_init(gt); + err = gt_init_with_all_forcewake(gt); if (err) return err; @@ -742,7 +736,7 @@ static int vf_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; @@ -780,11 +774,11 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) @@ -839,6 +833,9 @@ static int gt_reset(struct xe_gt *gt) goto err_out; } + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 187fa6490eaf..41880979f4de 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -24,11 +24,10 @@ extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { - return should_fail(>_reset_failure, 1); + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(>_reset_failure, 1); } struct xe_gt *xe_gt_alloc(struct xe_tile *tile); -int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); void xe_gt_mmio_init(struct xe_gt *gt); @@ -107,6 +106,11 @@ static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt) xe_device_uc_enabled(gt_to_xe(gt)); } +static inline bool xe_gt_is_main_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MAIN; +} + static inline bool xe_gt_is_media_type(struct xe_gt *gt) { return gt->info.type == XE_GT_TYPE_MEDIA; diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 119a55bb7580..848618acdca8 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -122,24 +122,6 @@ static int powergate_info(struct xe_gt *gt, struct drm_printer *p) return ret; } -static int force_reset(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_reset_async(gt); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_reset(gt); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - static int sa_info(struct xe_gt *gt, struct drm_printer *p) { struct xe_tile *tile = gt_to_tile(gt); @@ -306,8 +288,6 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p) * - without access to the PF specific data */ static const struct drm_info_list vf_safe_debugfs_list[] = { - {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, - {"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync}, {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, @@ -332,6 +312,78 @@ static const struct drm_info_list pf_only_debugfs_list[] = { {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, }; +static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t *ppos, + void (*call)(struct xe_gt *), struct xe_gt *gt) +{ + bool yes; + int ret; + + if (*ppos) + return -EINVAL; + ret = kstrtobool_from_user(userbuf, count, &yes); + if (ret < 0) + return ret; + if (yes) + call(gt); + return count; +} + +static void force_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); + xe_gt_reset_async(gt); + xe_pm_runtime_put(xe); +} + +static ssize_t force_reset_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct xe_gt *gt = s->private; + + return write_to_gt_call(userbuf, count, ppos, force_reset, gt); +} + +static int force_reset_show(struct seq_file *s, void *unused) +{ + struct xe_gt *gt = s->private; + + force_reset(gt); /* to be deprecated! */ + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset); + +static void force_reset_sync(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); + xe_gt_reset(gt); + xe_pm_runtime_put(xe); +} + +static ssize_t force_reset_sync_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct xe_gt *gt = s->private; + + return write_to_gt_call(userbuf, count, ppos, force_reset_sync, gt); +} + +static int force_reset_sync_show(struct seq_file *s, void *unused) +{ + struct xe_gt *gt = s->private; + + force_reset_sync(gt); /* to be deprecated! */ + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset_sync); + void xe_gt_debugfs_register(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -355,6 +407,10 @@ void xe_gt_debugfs_register(struct xe_gt *gt) */ root->d_inode->i_private = gt; + /* VF safe */ + debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops); + debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops); + drm_debugfs_create_files(vf_safe_debugfs_list, ARRAY_SIZE(vf_safe_debugfs_list), root, minor); diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index c11206410a4d..ffb210216aa9 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -121,7 +121,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (vcs_mask || vecs_mask) gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; if (xe->info.platform != XE_DG1) { diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index d4d9730f0d2c..64a2f0d6aaf9 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -420,12 +420,6 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) gt->steering[SQIDI_PSMI].instance_target = select & 0x1; } -static void init_steering_inst0(struct xe_gt *gt) -{ - gt->steering[INSTANCE0].group_target = 0; /* unused */ - gt->steering[INSTANCE0].instance_target = 0; /* unused */ -} - static const struct { const char *name; void (*init)(struct xe_gt *gt); @@ -436,7 +430,7 @@ static const struct { [DSS] = { "DSS", init_steering_dss }, [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, - [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, + [INSTANCE0] = { "INSTANCE 0", NULL }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, }; @@ -446,25 +440,17 @@ static const struct { * * Perform early software only initialization of the MCR lock to allow * the synchronization on accessing the STEER_SEMAPHORE register and - * use the xe_gt_mcr_multicast_write() function. + * use the xe_gt_mcr_multicast_write() function, plus the minimum + * safe MCR registers required for VRAM/CCS probing. */ void xe_gt_mcr_init_early(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); + BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); spin_lock_init(>->mcr_lock); -} - -/** - * xe_gt_mcr_init - Normal initialization of the MCR support - * @gt: GT structure - * - * Perform normal initialization of the MCR for all usages. - */ -void xe_gt_mcr_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); if (IS_SRIOV_VF(xe)) return; @@ -505,10 +491,27 @@ void xe_gt_mcr_init(struct xe_gt *gt) } } + /* Mark instance 0 as initialized, we need this early for VRAM and CCS probe. */ + gt->steering[INSTANCE0].initialized = true; +} + +/** + * xe_gt_mcr_init - Normal initialization of the MCR support + * @gt: GT structure + * + * Perform normal initialization of the MCR for all usages. + */ +void xe_gt_mcr_init(struct xe_gt *gt) +{ + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + /* Select non-terminated steering target for each type */ - for (int i = 0; i < NUM_STEERING_TYPES; i++) + for (int i = 0; i < NUM_STEERING_TYPES; i++) { + gt->steering[i].initialized = true; if (gt->steering[i].ranges && xe_steering_types[i].init) xe_steering_types[i].init(gt); + } } /** @@ -570,6 +573,10 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { if (xe_mmio_in_range(>->mmio, >->steering[type].ranges[i], reg)) { + drm_WARN(>_to_xe(gt)->drm, !gt->steering[type].initialized, + "Uninitialized usage of MCR register %s/%#x\n", + xe_steering_types[type].name, reg.addr); + *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; return true; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 10622ca471a2..5a75d56d8558 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -14,6 +14,7 @@ #include "abi/guc_actions_abi.h" #include "xe_bo.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_stats.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" @@ -68,31 +69,8 @@ static bool access_is_atomic(enum access_type access_type) static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) { - return BIT(tile->id) & vma->tile_present && - !(BIT(tile->id) & vma->tile_invalidated); -} - -static bool vma_matches(struct xe_vma *vma, u64 page_addr) -{ - if (page_addr > xe_vma_end(vma) - 1 || - page_addr + SZ_4K - 1 < xe_vma_start(vma)) - return false; - - return true; -} - -static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) -{ - struct xe_vma *vma = NULL; - - if (vm->usm.last_fault_vma) { /* Fast lookup */ - if (vma_matches(vm->usm.last_fault_vma, page_addr)) - vma = vm->usm.last_fault_vma; - } - if (!vma) - vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); - - return vma; + return xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, + vma->tile_invalidated); } static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, @@ -143,7 +121,7 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, trace_xe_vma_pagefault(vma); - /* Check if VMA is valid */ + /* Check if VMA is valid, opportunistic check only */ if (vma_is_valid(tile, vma) && !atomic) return 0; @@ -180,7 +158,6 @@ retry_userptr: dma_fence_wait(fence, false); dma_fence_put(fence); - vma->tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: drm_exec_fini(&exec); @@ -231,7 +208,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) goto unlock_vm; } - vma = lookup_vma(vm, pf->page_addr); + vma = xe_vm_find_vma_by_addr(vm, pf->page_addr); if (!vma) { err = -EINVAL; goto unlock_vm; @@ -266,22 +243,22 @@ static int send_pagefault_reply(struct xe_guc *guc, return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } -static void print_pagefault(struct xe_device *xe, struct pagefault *pf) +static void print_pagefault(struct xe_gt *gt, struct pagefault *pf) { - drm_dbg(&xe->drm, "\n\tASID: %d\n" - "\tVFID: %d\n" - "\tPDATA: 0x%04x\n" - "\tFaulted Address: 0x%08x%08x\n" - "\tFaultType: %d\n" - "\tAccessType: %d\n" - "\tFaultLevel: %d\n" - "\tEngineClass: %d %s\n" - "\tEngineInstance: %d\n", - pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), - lower_32_bits(pf->page_addr), - pf->fault_type, pf->access_type, pf->fault_level, - pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), - pf->engine_instance); + xe_gt_dbg(gt, "\n\tASID: %d\n" + "\tVFID: %d\n" + "\tPDATA: 0x%04x\n" + "\tFaulted Address: 0x%08x%08x\n" + "\tFaultType: %d\n" + "\tAccessType: %d\n" + "\tFaultLevel: %d\n" + "\tEngineClass: %d %s\n" + "\tEngineInstance: %d\n", + pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), + lower_32_bits(pf->page_addr), + pf->fault_type, pf->access_type, pf->fault_level, + pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), + pf->engine_instance); } #define PF_MSG_LEN_DW 4 @@ -333,7 +310,6 @@ static bool pf_queue_full(struct pf_queue *pf_queue) int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); struct pf_queue *pf_queue; unsigned long flags; u32 asid; @@ -358,7 +334,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) pf_queue->num_dw; queue_work(gt->usm.pf_wq, &pf_queue->worker); } else { - drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); + xe_gt_warn(gt, "PageFault Queue full, shouldn't be possible\n"); } spin_unlock_irqrestore(&pf_queue->lock, flags); @@ -371,7 +347,6 @@ static void pf_queue_work_func(struct work_struct *w) { struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); struct xe_gt *gt = pf_queue->gt; - struct xe_device *xe = gt_to_xe(gt); struct xe_guc_pagefault_reply reply = {}; struct pagefault pf = {}; unsigned long threshold; @@ -382,9 +357,9 @@ static void pf_queue_work_func(struct work_struct *w) while (get_pagefault(pf_queue, &pf)) { ret = handle_pagefault(gt, &pf); if (unlikely(ret)) { - print_pagefault(xe, &pf); + print_pagefault(gt, &pf); pf.fault_unsuccessful = 1; - drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); + xe_gt_dbg(gt, "Fault response: Unsuccessful %pe\n", ERR_PTR(ret)); } reply.dw0 = FIELD_PREP(PFR_VALID, 1) | @@ -444,6 +419,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) #define PF_MULTIPLIER 8 pf_queue->num_dw = (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); #undef PF_MULTIPLIER pf_queue->gt = gt; @@ -537,21 +513,21 @@ static int sub_granularity_in_byte(int val) return (granularity_in_byte(val) / 32); } -static void print_acc(struct xe_device *xe, struct acc *acc) +static void print_acc(struct xe_gt *gt, struct acc *acc) { - drm_warn(&xe->drm, "Access counter request:\n" - "\tType: %s\n" - "\tASID: %d\n" - "\tVFID: %d\n" - "\tEngine: %d:%d\n" - "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" - "\tSub_Granularity Vector: 0x%08x\n" - "\tVA Range base: 0x%016llx\n", - acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", - acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, - granularity_in_byte(acc->granularity) / SZ_1K, - sub_granularity_in_byte(acc->granularity) / SZ_1K, - acc->sub_granularity, acc->va_range_base); + xe_gt_warn(gt, "Access counter request:\n" + "\tType: %s\n" + "\tASID: %d\n" + "\tVFID: %d\n" + "\tEngine: %d:%d\n" + "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" + "\tSub_Granularity Vector: 0x%08x\n" + "\tVA Range base: 0x%016llx\n", + acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", + acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, + granularity_in_byte(acc->granularity) / SZ_1K, + sub_granularity_in_byte(acc->granularity) / SZ_1K, + acc->sub_granularity, acc->va_range_base); } static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) @@ -649,7 +625,6 @@ static void acc_queue_work_func(struct work_struct *w) { struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); struct xe_gt *gt = acc_queue->gt; - struct xe_device *xe = gt_to_xe(gt); struct acc acc = {}; unsigned long threshold; int ret; @@ -659,8 +634,8 @@ static void acc_queue_work_func(struct work_struct *w) while (get_acc(acc_queue, &acc)) { ret = handle_acc(gt, &acc); if (unlikely(ret)) { - print_acc(xe, &acc); - drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); + print_acc(gt, &acc); + xe_gt_warn(gt, "ACC: Unsuccessful %pe\n", ERR_PTR(ret)); } if (time_after(jiffies, threshold) && @@ -705,7 +680,7 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; queue_work(gt->usm.acc_wq, &acc_queue->worker); } else { - drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); + xe_gt_warn(gt, "ACC Queue full, dropping ACC\n"); } spin_unlock(&acc_queue->lock); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index c08efca6420e..35489fa81825 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -172,6 +172,25 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_cancel_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (cancel_work_sync(>->sriov.pf.workers.restart)) + xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); +} + +/** + * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support. + * @gt: the &xe_gt + * + * This function can only be called on the PF. + */ +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ + pf_cancel_restart(gt); +} + static void pf_restart(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f474509411c0..e2b2ff8132dc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -13,6 +13,7 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt); int xe_gt_sriov_pf_init(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) @@ -29,6 +30,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } +static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ +} + static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 2420a548cacc..494909f74eb2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -104,13 +104,13 @@ static int pf_push_vf_buf_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs } if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); void *klvs = xe_guc_buf_cpu_ptr(buf); char name[8]; - xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n", - xe_sriov_function_name(vfid, name, sizeof(name)), - num_klvs, str_plural(num_klvs)); + xe_gt_sriov_dbg(gt, "pushed %s config with %u KLV%s:\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs)); xe_guc_klv_print(klvs, num_dwords, &p); } @@ -238,26 +238,35 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i } /* Return: number of configuration dwords written */ -static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +static u32 encode_ggtt(u32 *cfg, u64 start, u64 size, bool details) { u32 n = 0; - if (xe_ggtt_node_allocated(config->ggtt_region)) { - if (details) { - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region->base.start); - cfg[n++] = upper_32_bits(config->ggtt_region->base.start); - } - - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region->base.size); - cfg[n++] = upper_32_bits(config->ggtt_region->base.size); + if (details) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); + cfg[n++] = lower_32_bits(start); + cfg[n++] = upper_32_bits(start); } + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); + cfg[n++] = lower_32_bits(size); + cfg[n++] = upper_32_bits(size); + return n; } /* Return: number of configuration dwords written */ +static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +{ + struct xe_ggtt_node *node = config->ggtt_region; + + if (!xe_ggtt_node_allocated(node)) + return 0; + + return encode_ggtt(cfg, node->base.start, node->base.size, details); +} + +/* Return: number of configuration dwords written */ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) { u32 n = 0; @@ -282,8 +291,8 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool if (config->lmem_obj) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE); - cfg[n++] = lower_32_bits(config->lmem_obj->size); - cfg[n++] = upper_32_bits(config->lmem_obj->size); + cfg[n++] = lower_32_bits(xe_bo_size(config->lmem_obj)); + cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj)); } cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); @@ -332,6 +341,17 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) } xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + if (vfid == PFID) { + u64 ggtt_start = xe_wopcm_size(gt_to_xe(gt)); + u64 ggtt_size = gt_to_tile(gt)->mem.ggtt->size - ggtt_start; + + /* plain PF config data will never include a real GGTT region */ + xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true)); + + /* fake PF GGTT config covers full GGTT range except reserved WOPCM */ + num_dwords += encode_ggtt(cfg + num_dwords, ggtt_start, ggtt_size, true); + } + num_klvs = xe_guc_klv_count(cfg, num_dwords); err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); @@ -376,7 +396,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) { u64 spare; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -388,7 +408,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size) { - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -443,7 +463,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); size = round_up(size, alignment); @@ -492,7 +512,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_ggtt_node *node = config->ggtt_region; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return xe_ggtt_node_allocated(node) ? node->base.size : 0; } @@ -560,7 +580,7 @@ int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size { int err; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) @@ -622,7 +642,7 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -693,7 +713,7 @@ int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); fair = pf_estimate_fair_ggtt(gt, num_vfs); @@ -1299,7 +1319,7 @@ static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid) struct xe_bo *bo; bo = config->lmem_obj; - return bo ? bo->size : 0; + return bo ? xe_bo_size(bo) : 0; } static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1327,7 +1347,17 @@ static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 si static void pf_force_lmtt_invalidate(struct xe_device *xe) { - /* TODO */ + struct xe_lmtt *lmtt; + struct xe_tile *tile; + unsigned int tid; + + xe_assert(xe, xe_device_has_lmtt(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_invalidate_hw(lmtt); + } } static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) @@ -1388,7 +1418,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset); if (err) goto fail; - offset += bo->size; + offset += xe_bo_size(bo); } } @@ -1406,7 +1436,7 @@ fail: static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); if (config->lmem_obj) { @@ -1425,7 +1455,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) xe_gt_assert(gt, vfid); xe_gt_assert(gt, IS_DGFX(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1469,12 +1499,12 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) goto release; } - err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); + err = pf_push_vf_cfg_lmem(gt, vfid, xe_bo_size(bo)); if (unlikely(err)) goto reset_lmtt; xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n", - vfid, bo->size, bo->size / SZ_1M); + vfid, xe_bo_size(bo), xe_bo_size(bo) / SZ_1M); return 0; reset_lmtt: @@ -1520,6 +1550,8 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size { int err; + xe_gt_assert(gt, xe_device_has_lmtt(gt_to_xe(gt))); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) err = pf_provision_vf_lmem(gt, vfid, size); @@ -1550,7 +1582,7 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -1627,9 +1659,9 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); - if (!IS_DGFX(gt_to_xe(gt))) + if (!xe_device_has_lmtt(gt_to_xe(gt))) return 0; mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); @@ -1661,7 +1693,7 @@ int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs); result = result ?: err; err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs); @@ -1989,7 +2021,7 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_device *xe = gt_to_xe(gt); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_release_vf_config_ggtt(gt, config); if (IS_DGFX(xe)) { pf_release_vf_config_lmem(gt, config); @@ -2080,7 +2112,7 @@ static int pf_sanitize_vf_resources(struct xe_gt *gt, u32 vfid, long timeout) * Only GGTT and LMEM requires to be cleared by the PF. * GuC doorbell IDs and context IDs do not need any clearing. */ - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_sanitize_ggtt(config->ggtt_region, vfid); if (IS_DGFX(xe)) err = pf_sanitize_lmem(tile, config->lmem_obj, timeout); @@ -2147,7 +2179,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool is_primary = !xe_gt_is_media_type(gt); + bool is_primary = xe_gt_is_main_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -2163,7 +2195,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_all = valid_all && valid_ggtt; valid_any = valid_any || (valid_ggtt && is_primary); - if (IS_DGFX(xe)) { + if (xe_device_has_lmtt(xe)) { bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid); valid_any = valid_any || (valid_lmem && is_primary); @@ -2347,7 +2379,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return -EINVAL; if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); drm_printf(&p, "restoring VF%u config:\n", vfid); xe_guc_klv_print(buf, size / sizeof(u32), &p); @@ -2364,6 +2396,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void pf_prepare_self_config(struct xe_gt *gt) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, PFID); + + /* + * We want PF to be allowed to use all of context ID, doorbells IDs + * and whole usable GGTT area. While we can store ctxs/dbs numbers + * directly in the config structure, can't do the same with the GGTT + * configuration, so let it be prepared on demand while pushing KLVs. + */ + config->num_ctxs = GUC_ID_MAX; + config->num_dbs = GUC_NUM_DOORBELLS; +} + +static int pf_push_self_config(struct xe_gt *gt) +{ + int err; + + err = pf_push_full_vf_config(gt, PFID); + if (err) { + xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n", + ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n"); + return 0; +} + static void fini_config(void *arg) { struct xe_gt *gt = arg; @@ -2387,9 +2448,18 @@ static void fini_config(void *arg) int xe_gt_sriov_pf_config_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_prepare_self_config(gt); + err = pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (err) + return err; + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); } @@ -2407,6 +2477,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt) unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); unsigned int fail = 0, skip = 0; + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) { if (xe_gt_sriov_pf_config_is_empty(gt, n)) skip++; @@ -2550,10 +2624,10 @@ int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p) if (!config->lmem_obj) continue; - string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2, + string_get_size(xe_bo_size(config->lmem_obj), 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "VF%u:\t%zu\t(%s)\n", - n, config->lmem_obj->size, buf); + n, xe_bo_size(config->lmem_obj), buf); } mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 1f50aec3a059..4f7fff892bc0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -15,10 +15,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_monitor.h" -#include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" +#include "xe_sriov_pf_service.h" +#include "xe_tile.h" static const char *control_cmd_to_string(u32 cmd) { @@ -1064,7 +1065,9 @@ static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) return false; - xe_gt_sriov_pf_service_reset(gt, vfid); + if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) + xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); + xe_gt_sriov_pf_monitor_flr(gt, vfid); pf_enter_vf_flr_reset_mmio(gt, vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 0fe47f41b63c..bf679b21f485 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -78,11 +78,6 @@ static const struct drm_info_list pf_info[] = { .data = xe_gt_sriov_pf_service_print_runtime, }, { - "negotiated_versions", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_service_print_version, - }, - { "adverse_events", .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_monitor_print_events, @@ -305,10 +300,10 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne xe_gt_assert(gt, gt == extract_gt(parent)); xe_gt_assert(gt, vfid == extract_vfid(parent)); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", 0644, parent, parent, &ggtt_fops); - if (IS_DGFX(gt_to_xe(gt))) + if (xe_device_has_lmtt(gt_to_xe(gt))) debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare", 0644, parent, parent, &lmem_fops); } @@ -554,11 +549,11 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) pfdentry->d_inode->i_private = gt; drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { drm_debugfs_create_files(pf_ggtt_info, ARRAY_SIZE(pf_ggtt_info), pfdentry, minor); - if (IS_DGFX(gt_to_xe(gt))) + if (xe_device_has_lmtt(gt_to_xe(gt))) drm_debugfs_create_files(pf_lmem_info, ARRAY_SIZE(pf_lmem_info), pfdentry, minor); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 821cfcc34e6b..76dd9233ef9f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -19,91 +19,7 @@ #include "xe_gt_sriov_pf_service_types.h" #include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" - -static void pf_init_versions(struct xe_gt *gt) -{ - BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); - BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); - - /* base versions may differ between platforms */ - gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; - gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; - - /* latest version is same for all platforms */ - gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; - gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_negotiate_version(struct xe_gt *gt, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base; - struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest; - - xe_gt_assert(gt, base.major); - xe_gt_assert(gt, base.major <= latest.major); - xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor)); - - /* VF doesn't care - return our latest */ - if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && - wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants newer than our - return our latest */ - if (wanted_major > latest.major) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants older than min required - reject */ - if (wanted_major < base.major || - (wanted_major == base.major && wanted_minor < base.minor)) { - return -EPERM; - } - - /* previous major - return wanted, as we should still support it */ - if (wanted_major < latest.major) { - /* XXX: we are not prepared for multi-versions yet */ - xe_gt_assert(gt, base.major == latest.major); - return -ENOPKG; - } - - /* same major - return common minor */ - *major = wanted_major; - *minor = min_t(u32, latest.minor, wanted_minor); - return 0; -} - -static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - xe_gt_assert(gt, major || minor); - - gt->sriov.pf.vfs[vfid].version.major = major; - gt->sriov.pf.vfs[vfid].version.minor = minor; -} - -static void pf_disconnect(struct xe_gt *gt, u32 vfid) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - gt->sriov.pf.vfs[vfid].version.major = 0; - gt->sriov.pf.vfs[vfid].version.minor = 0; -} - -static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - return major == gt->sriov.pf.vfs[vfid].version.major && - minor <= gt->sriov.pf.vfs[vfid].version.minor; -} +#include "xe_sriov_pf_service.h" static const struct xe_reg tgl_runtime_regs[] = { RPM_CONFIG0, /* _MMIO(0x0d00) */ @@ -266,7 +182,7 @@ static void pf_prepare_runtime_info(struct xe_gt *gt) read_many(gt, size, regs, values); if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); xe_gt_sriov_pf_service_print_runtime(gt, &p); } @@ -285,8 +201,6 @@ int xe_gt_sriov_pf_service_init(struct xe_gt *gt) { int err; - pf_init_versions(gt); - err = pf_alloc_runtime_info(gt); if (unlikely(err)) goto failed; @@ -311,47 +225,6 @@ void xe_gt_sriov_pf_service_update(struct xe_gt *gt) pf_prepare_runtime_info(gt); } -/** - * xe_gt_sriov_pf_service_reset - Reset a connection with the VF. - * @gt: the &xe_gt - * @vfid: the VF identifier - * - * Reset a VF driver negotiated VF/PF ABI version. - * After that point, the VF driver will have to perform new version handshake - * to continue use of the PF services again. - * - * This function can only be called on PF. - */ -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid) -{ - pf_disconnect(gt, vfid); -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_process_handshake(struct xe_gt *gt, u32 vfid, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - int err; - - xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n", - vfid, wanted_major, wanted_minor); - - err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor); - - if (err < 0) { - xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n", - vfid, wanted_major, wanted_minor, ERR_PTR(err)); - pf_disconnect(gt, vfid); - } else { - xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n", - vfid, *major, *minor); - pf_connect(gt, vfid, *major, *minor); - } - - return 0; -} - /* Return: length of the response message or a negative error code on failure. */ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, const u32 *request, u32 len, u32 *response, u32 size) @@ -371,7 +244,8 @@ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]); wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]); - err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor); + err = xe_sriov_pf_service_handshake_vf(gt_to_xe(gt), origin, wanted_major, wanted_minor, + &major, &minor); if (err < 0) return err; @@ -430,8 +304,10 @@ static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin, u32 remaining = 0; int ret; - if (!pf_is_negotiated(gt, origin, 1, 0)) + /* this action is available from ABI 1.0 */ + if (!xe_sriov_pf_service_is_negotiated(gt_to_xe(gt), origin, 1, 0)) return -EACCES; + if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) return -EMSGSIZE; if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) @@ -528,33 +404,3 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p return 0; } - -/** - * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs. - * @gt: the &xe_gt - * @p: the &drm_printer - * - * This function is for PF use only. - */ -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); - struct xe_gt_sriov_pf_service_version *version; - - xe_gt_assert(gt, IS_SRIOV_PF(xe)); - - for (n = 1; n <= total_vfs; n++) { - version = >->sriov.pf.vfs[n].version; - if (!version->major && !version->minor) - continue; - - drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor); - } - - return 0; -} - -#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) -#include "tests/xe_gt_sriov_pf_service_test.c" -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h index 56aaadf0360d..10b02c9b651c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h @@ -14,9 +14,7 @@ struct xe_gt; int xe_gt_sriov_pf_service_init(struct xe_gt *gt); void xe_gt_sriov_pf_service_update(struct xe_gt *gt); -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid); -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index a439261bf4d7..b282838d59e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -82,17 +82,17 @@ int xe_gt_sriov_vf_reset(struct xe_gt *gt) } static int guc_action_match_version(struct xe_guc *guc, - u32 wanted_branch, u32 wanted_major, u32 wanted_minor, - u32 *branch, u32 *major, u32 *minor, u32 *patch) + struct xe_uc_fw_version *wanted, + struct xe_uc_fw_version *found) { u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = { FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_MATCH_VERSION), - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted_branch) | - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted_major) | - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted_minor), + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted->branch) | + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted->major) | + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted->minor), }; u32 response[GUC_MAX_MMIO_MSG_LEN]; int ret; @@ -106,120 +106,138 @@ static int guc_action_match_version(struct xe_guc *guc, if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0]))) return -EPROTO; - *branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); - *major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); - *minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); - *patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); + memset(found, 0, sizeof(struct xe_uc_fw_version)); + found->branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); + found->major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); + found->minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); + found->patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); return 0; } -static void vf_minimum_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) +static int guc_action_match_version_any(struct xe_guc *guc, + struct xe_uc_fw_version *found) +{ + struct xe_uc_fw_version wanted = { + .branch = GUC_VERSION_BRANCH_ANY, + .major = GUC_VERSION_MAJOR_ANY, + .minor = GUC_VERSION_MINOR_ANY, + .patch = 0 + }; + + return guc_action_match_version(guc, &wanted, found); +} + +static void vf_minimum_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) { struct xe_device *xe = gt_to_xe(gt); + memset(ver, 0, sizeof(struct xe_uc_fw_version)); + switch (xe->info.platform) { case XE_TIGERLAKE ... XE_PVC: /* 1.1 this is current baseline for Xe driver */ - *branch = 0; - *major = 1; - *minor = 1; + ver->branch = 0; + ver->major = 1; + ver->minor = 1; break; default: /* 1.2 has support for the GMD_ID KLV */ - *branch = 0; - *major = 1; - *minor = 2; + ver->branch = 0; + ver->major = 1; + ver->minor = 2; break; } } -static void vf_wanted_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) +static void vf_wanted_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) { /* for now it's the same as minimum */ - return vf_minimum_guc_version(gt, branch, major, minor); + return vf_minimum_guc_version(gt, ver); } static int vf_handshake_with_guc(struct xe_gt *gt) { - struct xe_gt_sriov_vf_guc_version *guc_version = >->sriov.vf.guc_version; + struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; + struct xe_uc_fw_version wanted = {0}; struct xe_guc *guc = >->uc.guc; - u32 wanted_branch, wanted_major, wanted_minor; - u32 branch, major, minor, patch; + bool old = false; int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); /* select wanted version - prefer previous (if any) */ if (guc_version->major || guc_version->minor) { - wanted_branch = guc_version->branch; - wanted_major = guc_version->major; - wanted_minor = guc_version->minor; + wanted = *guc_version; + old = true; } else { - vf_wanted_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); - xe_gt_assert(gt, wanted_major != GUC_VERSION_MAJOR_ANY); + vf_wanted_guc_version(gt, &wanted); + xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); + + /* First time we handshake, so record the minimum wanted */ + gt->sriov.vf.wanted_guc_version = wanted; } - err = guc_action_match_version(guc, wanted_branch, wanted_major, wanted_minor, - &branch, &major, &minor, &patch); + err = guc_action_match_version(guc, &wanted, guc_version); if (unlikely(err)) goto fail; - /* we don't support interface version change */ - if ((guc_version->major || guc_version->minor) && - (guc_version->branch != branch || guc_version->major != major || - guc_version->minor != minor)) { - xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", - branch, major, minor, patch); - xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", - guc_version->branch, guc_version->major, - guc_version->minor, guc_version->patch); - err = -EREMCHG; - goto fail; + if (old) { + /* we don't support interface version change */ + if (MAKE_GUC_VER_STRUCT(*guc_version) != MAKE_GUC_VER_STRUCT(wanted)) { + xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch); + xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", + wanted.branch, wanted.major, + wanted.minor, wanted.patch); + err = -EREMCHG; + goto fail; + } else { + /* version is unchanged, no need to re-verify it */ + return 0; + } } /* illegal */ - if (major > wanted_major) { + if (guc_version->major > wanted.major) { err = -EPROTO; goto unsupported; } /* there's no fallback on major version. */ - if (major != wanted_major) { + if (guc_version->major != wanted.major) { err = -ENOPKG; goto unsupported; } /* check against minimum version supported by us */ - vf_minimum_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); - xe_gt_assert(gt, major != GUC_VERSION_MAJOR_ANY); - if (major < wanted_major || (major == wanted_major && minor < wanted_minor)) { + vf_minimum_guc_version(gt, &wanted); + xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); + if (MAKE_GUC_VER_STRUCT(*guc_version) < MAKE_GUC_VER_STRUCT(wanted)) { err = -ENOKEY; goto unsupported; } xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n", - branch, major, minor, patch); + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch); - guc_version->branch = branch; - guc_version->major = major; - guc_version->minor = minor; - guc_version->patch = patch; return 0; unsupported: xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n", - branch, major, minor, patch, ERR_PTR(err)); + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch, + ERR_PTR(err)); fail: xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n", - wanted_major, wanted_minor, ERR_PTR(err)); + wanted.major, wanted.minor, ERR_PTR(err)); /* try again with *any* just to query which version is supported */ - if (!guc_action_match_version(guc, GUC_VERSION_BRANCH_ANY, - GUC_VERSION_MAJOR_ANY, GUC_VERSION_MINOR_ANY, - &branch, &major, &minor, &patch)) + if (!guc_action_match_version_any(guc, &wanted)) xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n", - branch, major, minor, patch); + wanted.branch, wanted.major, wanted.minor, wanted.patch); return err; } @@ -250,6 +268,29 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) return 0; } +/** + * xe_gt_sriov_vf_guc_versions - Minimum required and found GuC ABI versions + * @gt: the &xe_gt + * @wanted: pointer to the xe_uc_fw_version to be filled with the wanted version + * @found: pointer to the xe_uc_fw_version to be filled with the found version + * + * This function is for VF use only and it can only be used after successful + * version handshake with the GuC. + */ +void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, + struct xe_uc_fw_version *wanted, + struct xe_uc_fw_version *found) +{ + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); + + if (wanted) + *wanted = gt->sriov.vf.wanted_guc_version; + + if (found) + *found = gt->sriov.vf.guc_version; +} + static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) { u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { @@ -415,6 +456,7 @@ static int vf_get_ggtt_info(struct xe_gt *gt) xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n", start, start + size - 1, size / SZ_1K); + config->ggtt_shift = start - (s64)config->ggtt_base; config->ggtt_base = start; config->ggtt_size = size; @@ -510,7 +552,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt) if (unlikely(err)) return err; - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { err = vf_get_lmem_info(gt); if (unlikely(err)) return err; @@ -560,106 +602,56 @@ u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt) return gt->sriov.vf.self_config.lmem_size; } -static struct xe_ggtt_node * -vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) -{ - struct xe_ggtt_node *node; - int err; - - node = xe_ggtt_node_init(ggtt); - if (IS_ERR(node)) - return node; - - err = xe_ggtt_node_insert_balloon(node, start, end); - if (err) { - xe_ggtt_node_fini(node); - return ERR_PTR(err); - } - - return node; -} - -static int vf_balloon_ggtt(struct xe_gt *gt) +/** + * xe_gt_sriov_vf_ggtt - VF GGTT configuration. + * @gt: the &xe_gt + * + * This function is for VF use only. + * + * Return: size of the GGTT assigned to VF. + */ +u64 xe_gt_sriov_vf_ggtt(struct xe_gt *gt) { - struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; - struct xe_tile *tile = gt_to_tile(gt); - struct xe_ggtt *ggtt = tile->mem.ggtt; - struct xe_device *xe = gt_to_xe(gt); - u64 start, end; - - xe_gt_assert(gt, IS_SRIOV_VF(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - - if (!config->ggtt_size) - return -ENODATA; - - /* - * VF can only use part of the GGTT as allocated by the PF: - * - * WOPCM GUC_GGTT_TOP - * |<------------ Total GGTT size ------------------>| - * - * VF GGTT base -->|<- size ->| - * - * +--------------------+----------+-----------------+ - * |////////////////////| block |\\\\\\\\\\\\\\\\\| - * +--------------------+----------+-----------------+ - * - * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| - */ - - start = xe_wopcm_size(xe); - end = config->ggtt_base; - if (end != start) { - tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) - return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); - } - - start = config->ggtt_base + config->ggtt_size; - end = GUC_GGTT_TOP; - if (end != start) { - tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); - return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); - } - } + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); + xe_gt_assert(gt, gt->sriov.vf.self_config.ggtt_size); - return 0; + return gt->sriov.vf.self_config.ggtt_size; } -static void deballoon_ggtt(struct drm_device *drm, void *arg) +/** + * xe_gt_sriov_vf_ggtt_base - VF GGTT base offset. + * @gt: the &xe_gt + * + * This function is for VF use only. + * + * Return: base offset of the GGTT assigned to VF. + */ +u64 xe_gt_sriov_vf_ggtt_base(struct xe_gt *gt) { - struct xe_tile *tile = arg; + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); + xe_gt_assert(gt, gt->sriov.vf.self_config.ggtt_size); - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); + return gt->sriov.vf.self_config.ggtt_base; } /** - * xe_gt_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. - * @gt: the &xe_gt + * xe_gt_sriov_vf_ggtt_shift - Return shift in GGTT range due to VF migration + * @gt: the &xe_gt struct instance * * This function is for VF use only. * - * Return: 0 on success or a negative error code on failure. + * Return: The shift value; could be negative */ -int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt) +s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt) { - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); - int err; - - if (xe_gt_is_media_type(gt)) - return 0; + struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; - err = vf_balloon_ggtt(gt); - if (err) - return err; + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); - return drmm_add_action_or_reset(&xe->drm, deballoon_ggtt, tile); + return config->ggtt_shift; } static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) @@ -694,21 +686,22 @@ static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) return 0; } -static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor) +static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_assert(xe, IS_SRIOV_VF(xe)); - gt->sriov.vf.pf_version.major = major; - gt->sriov.vf.pf_version.minor = minor; + xe->sriov.vf.pf_version.major = major; + xe->sriov.vf.pf_version.minor = minor; } -static void vf_disconnect_pf(struct xe_gt *gt) +static void vf_disconnect_pf(struct xe_device *xe) { - vf_connect_pf(gt, 0, 0); + vf_connect_pf(xe, 0, 0); } static int vf_handshake_with_pf(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR; u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR; u32 major = major_wanted, minor = minor_wanted; @@ -724,13 +717,13 @@ static int vf_handshake_with_pf(struct xe_gt *gt) } xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor); - vf_connect_pf(gt, major, minor); + vf_connect_pf(xe, major, minor); return 0; failed: xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n", major, minor, ERR_PTR(err)); - vf_disconnect_pf(gt); + vf_disconnect_pf(xe); return err; } @@ -783,10 +776,12 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + struct xe_device *xe = gt_to_xe(gt); - return major == gt->sriov.vf.pf_version.major && - minor <= gt->sriov.vf.pf_version.minor; + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + return major == xe->sriov.vf.pf_version.major && + minor <= xe->sriov.vf.pf_version.minor; } static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs) @@ -974,7 +969,6 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) struct vf_runtime_reg *rr; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.pf_version.major); xe_gt_assert(gt, !reg.vf); if (reg.addr == GMD_ID.addr) { @@ -1043,7 +1037,9 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) string_get_size(config->ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "GGTT size:\t%llu (%s)\n", config->ggtt_size, buf); - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + drm_printf(p, "GGTT shift on last restore:\t%lld\n", config->ggtt_shift); + + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); } @@ -1079,19 +1075,21 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) */ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt_sriov_vf_guc_version *guc_version = >->sriov.vf.guc_version; - struct xe_gt_sriov_vf_relay_version *pf_version = >->sriov.vf.pf_version; - u32 branch, major, minor; + struct xe_device *xe = gt_to_xe(gt); + struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; + struct xe_uc_fw_version *wanted = >->sriov.vf.wanted_guc_version; + struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version; + struct xe_uc_fw_version ver; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); drm_printf(p, "GuC ABI:\n"); - vf_minimum_guc_version(gt, &branch, &major, &minor); - drm_printf(p, "\tbase:\t%u.%u.%u.*\n", branch, major, minor); + vf_minimum_guc_version(gt, &ver); + drm_printf(p, "\tbase:\t%u.%u.%u.*\n", ver.branch, ver.major, ver.minor); - vf_wanted_guc_version(gt, &branch, &major, &minor); - drm_printf(p, "\twanted:\t%u.%u.%u.*\n", branch, major, minor); + drm_printf(p, "\twanted:\t%u.%u.%u.*\n", + wanted->branch, wanted->major, wanted->minor); drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n", guc_version->branch, guc_version->major, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index ba6c5d74e326..e0357f341a2d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -11,19 +11,26 @@ struct drm_printer; struct xe_gt; struct xe_reg; +struct xe_uc_fw_version; int xe_gt_sriov_vf_reset(struct xe_gt *gt); int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt); +void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, + struct xe_uc_fw_version *wanted, + struct xe_uc_fw_version *found); int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); -int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt); +u64 xe_gt_sriov_vf_ggtt(struct xe_gt *gt); +u64 xe_gt_sriov_vf_ggtt_base(struct xe_gt *gt); +s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt); + u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index a57f13b5afcd..298dedf4b009 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -7,30 +7,7 @@ #define _XE_GT_SRIOV_VF_TYPES_H_ #include <linux/types.h> - -/** - * struct xe_gt_sriov_vf_guc_version - GuC ABI version details. - */ -struct xe_gt_sriov_vf_guc_version { - /** @branch: branch version. */ - u8 branch; - /** @major: major version. */ - u8 major; - /** @minor: minor version. */ - u8 minor; - /** @patch: patch version. */ - u8 patch; -}; - -/** - * struct xe_gt_sriov_vf_relay_version - PF ABI version details. - */ -struct xe_gt_sriov_vf_relay_version { - /** @major: major version. */ - u16 major; - /** @minor: minor version. */ - u16 minor; -}; +#include "xe_uc_fw_types.h" /** * struct xe_gt_sriov_vf_selfconfig - VF configuration data. @@ -40,6 +17,8 @@ struct xe_gt_sriov_vf_selfconfig { u64 ggtt_base; /** @ggtt_size: assigned size of the GGTT region. */ u64 ggtt_size; + /** @ggtt_shift: difference in ggtt_base on last migration */ + s64 ggtt_shift; /** @lmem_size: assigned size of the LMEM. */ u64 lmem_size; /** @num_ctxs: assigned number of GuC submission context IDs. */ @@ -71,12 +50,12 @@ struct xe_gt_sriov_vf_runtime { * struct xe_gt_sriov_vf - GT level VF virtualization data. */ struct xe_gt_sriov_vf { + /** @wanted_guc_version: minimum wanted GuC ABI version. */ + struct xe_uc_fw_version wanted_guc_version; /** @guc_version: negotiated GuC ABI version. */ - struct xe_gt_sriov_vf_guc_version guc_version; + struct xe_uc_fw_version guc_version; /** @self_config: resource configurations. */ struct xe_gt_sriov_vf_selfconfig self_config; - /** @pf_version: negotiated VF/PF ABI version. */ - struct xe_gt_sriov_vf_relay_version pf_version; /** @runtime: runtime data retrieved from the PF. */ struct xe_gt_sriov_vf_runtime runtime; }; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 084cbdeba8ea..086c12ee3d9d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -138,6 +138,14 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) int pending_seqno; /* + * we can get here before the CTs are even initialized if we're wedging + * very early, in which case there are not going to be any pending + * fences so we can bail immediately. + */ + if (!xe_guc_ct_initialized(>->uc.guc.ct)) + return; + + /* * CT channel is already disabled at this point. No new TLB requests can * appear. */ @@ -322,6 +330,40 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +static int send_tlb_invalidation_all(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) +{ + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION_ALL, + 0, /* seqno, replaced in send_tlb_invalidation */ + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); +} + +/** + * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs. + * @gt: the &xe_gt structure + * @fence: the &xe_gt_tlb_invalidation_fence to be signaled on completion + * + * Send a request to invalidate all TLBs across PF and all VFs. + * + * Return: 0 on success, negative error code on error + */ +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence) +{ + int err; + + xe_gt_assert(gt, gt == fence->gt); + + err = send_tlb_invalidation_all(gt, fence); + if (err) + xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err)); + + return err; +} + /* * Ensure that roundup_pow_of_two(length) doesn't overflow. * Note that roundup_pow_of_two() operates on unsigned long, @@ -441,30 +483,6 @@ void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm) } /** - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA - * @gt: GT structure - * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL - * @vma: VMA to invalidate - * - * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can use - * the invalidation fence to wait for completion. - * - * Return: Negative error code on error, 0 on success - */ -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma) -{ - xe_gt_assert(gt, vma); - - return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); -} - -/** * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * @guc: guc * @msg: message indicating TLB invalidation done diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index abe9b03d543e..f7f0f2eaf4b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -19,10 +19,8 @@ int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma); void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence); int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 516c81e3b8dd..8c63e3263643 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -12,23 +12,20 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_mmio.h" #include "xe_wa.h" -static void -load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) +static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, + const struct xe_reg regs[]) { - va_list argp; u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; int i; - if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS)) - numregs = XE_MAX_DSS_FUSE_REGS; + xe_gt_assert(gt, numregs <= ARRAY_SIZE(fuse_val)); - va_start(argp, numregs); for (i = 0; i < numregs; i++) - fuse_val[i] = xe_mmio_read32(>->mmio, va_arg(argp, struct xe_reg)); - va_end(argp); + fuse_val[i] = xe_mmio_read32(>->mmio, regs[i]); bitmap_from_arr32(mask, fuse_val, numregs * 32); } @@ -218,9 +215,19 @@ get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) void xe_gt_topology_init(struct xe_gt *gt) { + static const struct xe_reg geometry_regs[] = { + XELP_GT_GEOMETRY_DSS_ENABLE, + XE2_GT_GEOMETRY_DSS_1, + XE2_GT_GEOMETRY_DSS_2, + }; + static const struct xe_reg compute_regs[] = { + XEHP_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, + XE2_GT_COMPUTE_DSS_2, + }; + int num_geometry_regs, num_compute_regs; struct xe_device *xe = gt_to_xe(gt); struct drm_printer p; - int num_geometry_regs, num_compute_regs; get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); @@ -228,23 +235,18 @@ xe_gt_topology_init(struct xe_gt *gt) * Register counts returned shouldn't exceed the number of registers * passed as parameters below. */ - drm_WARN_ON(&xe->drm, num_geometry_regs > 3); - drm_WARN_ON(&xe->drm, num_compute_regs > 3); + xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs)); + xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs)); load_dss_mask(gt, gt->fuse_topo.g_dss_mask, - num_geometry_regs, - XELP_GT_GEOMETRY_DSS_ENABLE, - XE2_GT_GEOMETRY_DSS_1, - XE2_GT_GEOMETRY_DSS_2); - load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, - XEHP_GT_COMPUTE_DSS_ENABLE, - XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, - XE2_GT_COMPUTE_DSS_2); + num_geometry_regs, geometry_regs); + load_dss_mask(gt, gt->fuse_topo.c_dss_mask, + num_compute_regs, compute_regs); + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); - p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); - + p = xe_gt_dbg_printer(gt); xe_gt_topology_dump(gt, &p); } @@ -288,11 +290,6 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } -bool xe_dss_mask_empty(const xe_dss_mask_t mask) -{ - return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); -} - /** * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant * @gt: GT to check diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index a72d26ba0653..c8140704ad4c 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -41,8 +41,6 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask) unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); -bool xe_dss_mask_empty(const xe_dss_mask_t mask); - bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 7def0959da35..96344c604726 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -377,6 +377,8 @@ struct xe_gt { u16 group_target; /** @steering.instance_target: instance to steer accesses to */ u16 instance_target; + /** @steering.initialized: Whether this steering range is initialized */ + bool initialized; } steering[NUM_STEERING_TYPES]; /** diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index bac5471a1a78..b1d1d6da3758 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -29,6 +29,7 @@ #include "xe_guc_db_mgr.h" #include "xe_guc_engine_activity.h" #include "xe_guc_hwconfig.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" #include "xe_guc_relay.h" @@ -59,7 +60,7 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); xe_assert(xe, addr < GUC_GGTT_TOP); - xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); + xe_assert(xe, xe_bo_size(bo) <= GUC_GGTT_TOP - addr); return addr; } @@ -420,7 +421,7 @@ static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 t buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); - xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(g2g_bo)); return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, desc, buf, G2G_BUFFER_SIZE); @@ -570,6 +571,86 @@ err_deregister: return err; } +static int __guc_opt_in_features_enable(struct xe_guc *guc, u64 addr, u32 num_dwords) +{ + u32 action[] = { + XE_GUC_ACTION_OPT_IN_FEATURE_KLV, + lower_32_bits(addr), + upper_32_bits(addr), + num_dwords + }; + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static bool supports_dynamic_ics(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + /* Dynamic ICS is available for PVC and Xe2 and newer platforms. */ + if (xe->info.platform != XE_PVC && GRAPHICS_VER(xe) < 20) + return false; + + /* + * The feature is currently not compatible with multi-lrc, so the GuC + * does not support it at all on the media engines (which are the main + * users of mlrc). On the primary GT side, to avoid it being used in + * conjunction with mlrc, we only enable it if we are in single CCS + * mode. + */ + if (xe_gt_is_media_type(gt) || gt->ccs_mode > 1) + return false; + + /* + * Dynamic ICS requires GuC v70.40.1, which maps to compatibility + * version v1.18.4. + */ + return GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 18, 4); +} + +#define OPT_IN_MAX_DWORDS 16 +int xe_guc_opt_in_features_enable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + CLASS(xe_guc_buf, buf)(&guc->buf, OPT_IN_MAX_DWORDS); + u32 count = 0; + u32 *klvs; + int ret; + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + klvs = xe_guc_buf_cpu_ptr(buf); + + /* + * The extra CAT error type opt-in was added in GuC v70.17.0, which maps + * to compatibility version v1.7.0. + * Note that the GuC allows enabling this KLV even on platforms that do + * not support the extra type; in such case the returned type variable + * will be set to a known invalid value which we can check against. + */ + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 7, 0)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_EXT_CAT_ERR_TYPE); + + if (supports_dynamic_ics(guc)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH); + + if (count) { + xe_assert(xe, count <= OPT_IN_MAX_DWORDS); + + ret = __guc_opt_in_features_enable(guc, xe_guc_buf_flush(buf), count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC opt-in features: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; @@ -577,7 +658,7 @@ static void guc_fini_hw(void *arg) unsigned int fw_ref; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_fini_hw(&guc_to_gt(guc)->uc); + xe_uc_sanitize_reset(&guc_to_gt(guc)->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); guc_g2g_fini(guc); @@ -627,23 +708,51 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc) return 0; } -static int vf_guc_init(struct xe_guc *guc) +static int vf_guc_init_noalloc(struct xe_guc *guc) { + struct xe_gt *gt = guc_to_gt(guc); int err; - xe_guc_comm_init_early(guc); - - err = xe_guc_ct_init(&guc->ct); + err = xe_gt_sriov_vf_bootstrap(gt); if (err) return err; - err = xe_guc_relay_init(&guc->relay); + err = xe_gt_sriov_vf_query_config(gt); if (err) return err; return 0; } +int xe_guc_init_noalloc(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + xe_guc_comm_init_early(guc); + + ret = xe_guc_ct_init_noalloc(&guc->ct); + if (ret) + goto out; + + ret = xe_guc_relay_init(&guc->relay); + if (ret) + goto out; + + if (IS_SRIOV_VF(xe)) { + ret = vf_guc_init_noalloc(guc); + if (ret) + goto out; + } + + return 0; + +out: + xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret)); + return ret; +} + int xe_guc_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -653,13 +762,13 @@ int xe_guc_init(struct xe_guc *guc) guc->fw.type = XE_UC_FW_TYPE_GUC; ret = xe_uc_fw_init(&guc->fw); if (ret) - goto out; + return ret; if (!xe_uc_fw_is_enabled(&guc->fw)) return 0; if (IS_SRIOV_VF(xe)) { - ret = vf_guc_init(guc); + ret = xe_guc_ct_init(&guc->ct); if (ret) goto out; return 0; @@ -681,10 +790,6 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; - ret = xe_guc_relay_init(&guc->relay); - if (ret) - goto out; - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); @@ -693,8 +798,6 @@ int xe_guc_init(struct xe_guc *guc) guc_init_params(guc); - xe_guc_comm_init_early(guc); - return 0; out: @@ -710,6 +813,10 @@ static int vf_guc_init_post_hwconfig(struct xe_guc *guc) if (err) return err; + err = xe_guc_buf_cache_init(&guc->buf); + if (err) + return err; + /* XXX xe_guc_db_mgr_init not needed for now */ return 0; @@ -763,6 +870,10 @@ int xe_guc_post_load_init(struct xe_guc *guc) xe_guc_ads_populate_post_load(&guc->ads); + ret = xe_guc_opt_in_features_enable(guc); + if (ret) + return ret; + if (xe_guc_g2g_wanted(guc_to_xe(guc))) { ret = guc_g2g_start(guc); if (ret) @@ -1098,14 +1209,6 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); int ret; - ret = xe_gt_sriov_vf_bootstrap(gt); - if (ret) - return ret; - - ret = xe_gt_sriov_vf_query_config(gt); - if (ret) - return ret; - ret = xe_guc_hwconfig_init(guc); if (ret) return ret; @@ -1116,13 +1219,17 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) ret = xe_gt_sriov_vf_connect(gt); if (ret) - return ret; + goto err_out; ret = xe_gt_sriov_vf_query_runtime(gt); if (ret) - return ret; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(guc); + return ret; } /** @@ -1285,6 +1392,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; + bool lost = false; int ret; int i; @@ -1318,6 +1426,12 @@ retry: FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), 50000, &reply, false); if (ret) { + /* scratch registers might be cleared during FLR, try once more */ + if (!reply && !lost) { + xe_gt_dbg(gt, "GuC mmio request %#x: lost, trying again\n", request[0]); + lost = true; + goto retry; + } timeout: xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n", request[0], reply); diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 58338be44558..22cf019a11bf 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -26,6 +26,7 @@ struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); +int xe_guc_init_noalloc(struct xe_guc *guc); int xe_guc_init(struct xe_guc *guc); int xe_guc_init_post_hwconfig(struct xe_guc *guc); int xe_guc_post_load_init(struct xe_guc *guc); @@ -33,6 +34,7 @@ int xe_guc_reset(struct xe_guc *guc); int xe_guc_upload(struct xe_guc *guc); int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); int xe_guc_enable_communication(struct xe_guc *guc); +int xe_guc_opt_in_features_enable(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 44c1fa2fe7c8..131cfc56be00 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -20,6 +20,7 @@ #include "xe_gt_ccs_mode.h" #include "xe_gt_printk.h" #include "xe_guc.h" +#include "xe_guc_buf.h" #include "xe_guc_capture.h" #include "xe_guc_ct.h" #include "xe_hw_engine.h" @@ -889,7 +890,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); guc_golden_lrc_init(ads); guc_mapping_table_init_invalid(gt, &info_map); @@ -913,7 +914,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); fill_engine_enable_masks(gt, &info_map); guc_mmio_reg_state_init(ads); @@ -1004,16 +1005,16 @@ static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_off */ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) { - struct xe_device *xe = ads_to_xe(ads); - struct xe_gt *gt = ads_to_gt(ads); - struct xe_tile *tile = gt_to_tile(gt); struct guc_policies *policies; - struct xe_bo *bo; - int ret = 0; + struct xe_guc *guc = ads_to_guc(ads); + struct xe_device *xe = ads_to_xe(ads); + CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies)); + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; - policies = kmalloc(sizeof(*policies), GFP_KERNEL); - if (!policies) - return -ENOMEM; + policies = xe_guc_buf_cpu_ptr(buf); + memset(policies, 0, sizeof(*policies)); policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); @@ -1023,16 +1024,5 @@ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) else policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; - bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies), - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) { - ret = PTR_ERR(bo); - goto out; - } - - ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo)); -out: - kfree(policies); - return ret; + return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); } diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c index 0193c94dd6a0..14a07dca48e7 100644 --- a/drivers/gpu/drm/xe/xe_guc_buf.c +++ b/drivers/gpu/drm/xe/xe_guc_buf.c @@ -37,10 +37,6 @@ int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) struct xe_gt *gt = cache_to_gt(cache); struct xe_sa_manager *sam; - /* XXX: currently it's useful only for the PF actions */ - if (!IS_SRIOV_PF(gt_to_xe(gt))) - return 0; - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), SZ_8K, 0, sizeof(u32)); if (IS_ERR(sam)) return PTR_ERR(sam); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 2447de0ebedf..b6acccfcd351 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -25,6 +25,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_monitor.h" +#include "xe_gt_sriov_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_log.h" @@ -34,6 +35,11 @@ #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -79,11 +85,14 @@ struct g2h_fence { u16 error; u16 hint; u16 reason; + bool cancel; bool retry; bool fail; bool done; }; +#define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) + static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { g2h_fence->response_buffer = response_buffer; @@ -95,6 +104,13 @@ static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) g2h_fence->seqno = ~0x0; } +static void g2h_fence_cancel(struct g2h_fence *g2h_fence) +{ + g2h_fence->cancel = true; + g2h_fence->fail = true; + g2h_fence->done = true; +} + static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) { return g2h_fence->seqno == ~0x0; @@ -186,14 +202,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) @@ -204,12 +217,10 @@ static void primelockdep(struct xe_guc_ct *ct) fs_reclaim_release(GFP_KERNEL); } -int xe_guc_ct_init(struct xe_guc_ct *ct) +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_bo *bo; int err; xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); @@ -235,6 +246,23 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) primelockdep(ct); + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); + if (err) + return err; + + xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); + ct->state = XE_GUC_CT_STATE_DISABLED; + return 0; +} +ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */ + +int xe_guc_ct_init(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | @@ -244,13 +272,6 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) return PTR_ERR(bo); ct->bo = bo; - - err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); - if (err) - return err; - - xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); - ct->state = XE_GUC_CT_STATE_DISABLED; return 0; } ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ @@ -371,9 +392,13 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) return ret > 0 ? -EPROTO : ret; } -static void xe_guc_ct_set_state(struct xe_guc_ct *ct, +static void guc_ct_change_state(struct xe_guc_ct *ct, enum xe_guc_ct_state state) { + struct xe_gt *gt = ct_to_gt(ct); + struct g2h_fence *g2h_fence; + unsigned long idx; + mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ @@ -385,8 +410,20 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, ct->g2h_outstanding = 0; ct->state = state; + xe_gt_dbg(gt, "GuC CT communication channel %s\n", + state == XE_GUC_CT_STATE_STOPPED ? "stopped" : + str_enabled_disabled(state == XE_GUC_CT_STATE_ENABLED)); + spin_unlock_irq(&ct->fast_lock); + /* cancel all in-flight send-recv requests */ + xa_for_each(&ct->fence_lookup, idx, g2h_fence) + g2h_fence_cancel(g2h_fence); + + /* make sure guc_ct_send_recv() will see g2h_fence changes */ + smp_mb(); + wake_up_all(&ct->g2h_fence_wq); + /* * Lockdep doesn't like this under the fast lock and he destroy only * needs to be serialized with the send path which ct lock provides. @@ -440,7 +477,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); - xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size); + xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -456,11 +493,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) if (err) goto err_out; - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED); smp_mb(); wake_up_all(&ct->wq); - xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); if (ct_needs_safe_mode(ct)) ct_enter_safe_mode(ct); @@ -501,7 +537,7 @@ static void stop_g2h_handler(struct xe_guc_ct *ct) */ void xe_guc_ct_disable(struct xe_guc_ct *ct) { - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); ct_exit_safe_mode(ct); stop_g2h_handler(ct); } @@ -514,7 +550,10 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct) */ void xe_guc_ct_stop(struct xe_guc_ct *ct) { - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); + if (!xe_guc_ct_initialized(ct)) + return; + + guc_ct_change_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -625,6 +664,47 @@ static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) spin_unlock_irq(&ct->fast_lock); } +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) +{ + unsigned int slot = fence % ARRAY_SIZE(ct->fast_req); +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + unsigned long entries[SZ_32]; + unsigned int n; + + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); + + /* May be called under spinlock, so avoid sleeping */ + ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT); +#endif + ct->fast_req[slot].fence = fence; + ct->fast_req[slot].action = action; +} +#else +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) +{ +} +#endif + +/* + * The CT protocol accepts a 16 bits fence. This field is fully owned by the + * driver, the GuC will just copy it to the reply message. Since we need to + * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, + * we use one bit of the seqno as an indicator for that and a rolling counter + * for the remaining 15 bits. + */ +#define CT_SEQNO_MASK GENMASK(14, 0) +#define CT_SEQNO_UNTRACKED BIT(15) +static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) +{ + u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; + + if (!is_g2h_fence) + seqno |= CT_SEQNO_UNTRACKED; + + return seqno; +} + #define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, @@ -701,6 +781,9 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } else { + fast_req_track(ct, ct_fence_value, + FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0])); + cmd[1] = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | @@ -733,25 +816,6 @@ corrupted: return -EPIPE; } -/* - * The CT protocol accepts a 16 bits fence. This field is fully owned by the - * driver, the GuC will just copy it to the reply message. Since we need to - * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, - * we use one bit of the seqno as an indicator for that and a rolling counter - * for the remaining 15 bits. - */ -#define CT_SEQNO_MASK GENMASK(14, 0) -#define CT_SEQNO_UNTRACKED BIT(15) -static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) -{ - u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; - - if (!is_g2h_fence) - seqno |= CT_SEQNO_UNTRACKED; - - return seqno; -} - static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) @@ -760,7 +824,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u16 seqno; int ret; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); xe_gt_assert(gt, !g2h_len || !g2h_fence); xe_gt_assert(gt, !num_g2h || !g2h_fence); xe_gt_assert(gt, !g2h_len || num_g2h); @@ -1052,6 +1116,11 @@ retry_same_fence: goto retry; } if (g2h_fence.fail) { + if (g2h_fence.cancel) { + xe_gt_dbg(gt, "H2G request %#x canceled!\n", action[0]); + ret = -ECANCELED; + goto unlock; + } xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; @@ -1060,6 +1129,7 @@ retry_same_fence: if (ret > 0) ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; +unlock: mutex_unlock(&ct->lock); return ret; @@ -1143,6 +1213,55 @@ static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action) return 0; } +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) +{ + u16 fence_min = U16_MAX, fence_max = 0; + struct xe_gt *gt = ct_to_gt(ct); + bool found = false; + unsigned int n; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + char *buf; +#endif + + lockdep_assert_held(&ct->lock); + + for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) { + if (ct->fast_req[n].fence < fence_min) + fence_min = ct->fast_req[n].fence; + if (ct->fast_req[n].fence > fence_max) + fence_max = ct->fast_req[n].fence; + + if (ct->fast_req[n].fence != fence) + continue; + found = true; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + buf = kmalloc(SZ_4K, GFP_NOWAIT); + if (buf && stack_depot_snprint(ct->fast_req[n].stack, buf, SZ_4K, 0)) + xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s", + fence, ct->fast_req[n].action, buf); + else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n", + fence, ct->fast_req[n].action); + kfree(buf); +#else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n", + fence, ct->fast_req[n].action); +#endif + break; + } + + if (!found) + xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n", + fence, fence_min, fence_max, ct->fence_seqno); +} +#else +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) +{ +} +#endif + static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); @@ -1171,6 +1290,9 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) else xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n", type, fence); + + fast_req_report(ct, fence); + CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE); return -EPROTO; @@ -1344,7 +1466,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) u32 action; u32 *hxg; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) @@ -1624,6 +1746,186 @@ static void g2h_worker_func(struct work_struct *w) receive_g2h(ct); } +static void xe_fixup_u64_in_cmds(struct xe_device *xe, struct iosys_map *cmds, + u32 size, u32 idx, s64 shift) +{ + u32 hi, lo; + u64 offset; + + lo = xe_map_rd_ring_u32(xe, cmds, idx, size); + hi = xe_map_rd_ring_u32(xe, cmds, idx + 1, size); + offset = make_u64(hi, lo); + offset += shift; + lo = lower_32_bits(offset); + hi = upper_32_bits(offset); + xe_map_wr_ring_u32(xe, cmds, idx, size, lo); + xe_map_wr_ring_u32(xe, cmds, idx + 1, size, hi); +} + +/* + * Shift any GGTT addresses within a single message left within CTB from + * before post-migration recovery. + * @ct: pointer to CT struct of the target GuC + * @cmds: iomap buffer containing CT messages + * @head: start of the target message within the buffer + * @len: length of the target message + * @size: size of the commands buffer + * @shift: the address shift to be added to each GGTT reference + * Return: true if the message was fixed or needed no fixups, false on failure + */ +static bool ct_fixup_ggtt_in_message(struct xe_guc_ct *ct, + struct iosys_map *cmds, u32 head, + u32 len, u32 size, s64 shift) +{ + struct xe_gt *gt = ct_to_gt(ct); + struct xe_device *xe = ct_to_xe(ct); + u32 msg[GUC_HXG_MSG_MIN_LEN]; + u32 action, i, n; + + xe_gt_assert(gt, len >= GUC_HXG_MSG_MIN_LEN); + + msg[0] = xe_map_rd_ring_u32(xe, cmds, head, size); + action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]); + + xe_gt_sriov_dbg_verbose(gt, "fixing H2G %#x\n", action); + + switch (action) { + case XE_GUC_ACTION_REGISTER_CONTEXT: + if (len != XE_GUC_REGISTER_CONTEXT_MSG_LEN) + goto err_len; + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER, + shift); + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER, + shift); + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR, shift); + break; + case XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC: + if (len < XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN) + goto err_len; + n = xe_map_rd_ring_u32(xe, cmds, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS, size); + if (len != XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN + 2 * n) + goto err_len; + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER, + shift); + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER, + shift); + for (i = 0; i < n; i++) + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR + + 2 * i, shift); + break; + default: + break; + } + return true; + +err_len: + xe_gt_err(gt, "Skipped G2G %#x message fixups, unexpected length (%u)\n", action, len); + return false; +} + +/* + * Apply fixups to the next outgoing CT message within given CTB + * @ct: the &xe_guc_ct struct instance representing the target GuC + * @h2g: the &guc_ctb struct instance of the target buffer + * @shift: shift to be added to all GGTT addresses within the CTB + * @mhead: pointer to an integer storing message start position; the + * position is changed to next message before this function return + * @avail: size of the area available for parsing, that is length + * of all remaining messages stored within the CTB + * Return: size of the area available for parsing after one message + * has been parsed, that is length remaining from the updated mhead + */ +static int ct_fixup_ggtt_in_buffer(struct xe_guc_ct *ct, struct guc_ctb *h2g, + s64 shift, u32 *mhead, s32 avail) +{ + struct xe_gt *gt = ct_to_gt(ct); + struct xe_device *xe = ct_to_xe(ct); + u32 msg[GUC_HXG_MSG_MIN_LEN]; + u32 size = h2g->info.size; + u32 head = *mhead; + u32 len; + + xe_gt_assert(gt, avail >= (s32)GUC_CTB_MSG_MIN_LEN); + + /* Read header */ + msg[0] = xe_map_rd_ring_u32(xe, &h2g->cmds, head, size); + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; + + if (unlikely(len > (u32)avail)) { + xe_gt_err(gt, "H2G channel broken on read, avail=%d, len=%d, fixups skipped\n", + avail, len); + return 0; + } + + head = (head + GUC_CTB_MSG_MIN_LEN) % size; + if (!ct_fixup_ggtt_in_message(ct, &h2g->cmds, head, msg_len_to_hxg_len(len), size, shift)) + return 0; + *mhead = (head + msg_len_to_hxg_len(len)) % size; + + return avail - len; +} + +/** + * xe_guc_ct_fixup_messages_with_ggtt - Fixup any pending H2G CTB messages + * @ct: pointer to CT struct of the target GuC + * @ggtt_shift: shift to be added to all GGTT addresses within the CTB + * + * Messages in GuC to Host CTB are owned by GuC and any fixups in them + * are made by GuC. But content of the Host to GuC CTB is owned by the + * KMD, so fixups to GGTT references in any pending messages need to be + * applied here. + * This function updates GGTT offsets in payloads of pending H2G CTB + * messages (messages which were not consumed by GuC before the VF got + * paused). + */ +void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift) +{ + struct guc_ctb *h2g = &ct->ctbs.h2g; + struct xe_guc *guc = ct_to_guc(ct); + struct xe_gt *gt = guc_to_gt(guc); + u32 head, tail, size; + s32 avail; + + if (unlikely(h2g->info.broken)) + return; + + h2g->info.head = desc_read(ct_to_xe(ct), h2g, head); + head = h2g->info.head; + tail = READ_ONCE(h2g->info.tail); + size = h2g->info.size; + + if (unlikely(head > size)) + goto corrupted; + + if (unlikely(tail >= size)) + goto corrupted; + + avail = tail - head; + + /* beware of buffer wrap case */ + if (unlikely(avail < 0)) + avail += size; + xe_gt_dbg(gt, "available %d (%u:%u:%u)\n", avail, head, tail, size); + xe_gt_assert(gt, avail >= 0); + + while (avail > 0) + avail = ct_fixup_ggtt_in_buffer(ct, h2g, ggtt_shift, &head, avail); + + return; + +corrupted: + xe_gt_err(gt, "Corrupted H2G descriptor head=%u tail=%u size=%u, fixups not applied\n", + head, tail, size); + h2g->info.broken = true; +} + static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic, bool want_ctb) { @@ -1634,7 +1936,7 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bo return NULL; if (ct->bo && want_ctb) { - snapshot->ctb_size = ct->bo->size; + snapshot->ctb_size = xe_bo_size(ct->bo); snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); } @@ -1770,6 +2072,24 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb) } #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + +#ifdef CONFIG_FUNCTION_ERROR_INJECTION +/* + * This is a helper function which assists the driver in identifying if a fault + * injection test is currently active, allowing it to reduce unnecessary debug + * output. Typically, the function returns zero, but the fault injection + * framework can alter this to return an error. Since faults are injected + * through this function, it's important to ensure the compiler doesn't optimize + * it into an inline function. To avoid such optimization, the 'noinline' + * attribute is applied. Compiler optimizes the static function defined in the + * header file as an inline function. + */ +noinline int xe_is_injection_active(void) { return 0; } +ALLOW_ERROR_INJECTION(xe_is_injection_active, ERRNO); +#else +int xe_is_injection_active(void) { return 0; } +#endif + static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code) { struct xe_guc_log_snapshot *snapshot_log; @@ -1780,6 +2100,12 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso if (ctb) ctb->info.broken = true; + /* + * Huge dump is getting generated when injecting error for guc CT/MMIO + * functions. So, let us suppress the dump when fault is injected. + */ + if (xe_is_injection_active()) + return; /* Ignore further errors after the first dump until a reset */ if (ct->dead.reported) @@ -1830,7 +2156,6 @@ static void ct_dead_print(struct xe_dead_ct *dead) return; } - /* Can't generate a genuine core dump at this point, so just do the good bits */ drm_puts(&lp, "**** Xe Device Coredump ****\n"); drm_printf(&lp, "Reason: CTB is dead - 0x%X\n", dead->reason); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 82c4ae458dda..18d4225e6502 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -11,6 +11,7 @@ struct drm_printer; struct xe_device; +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); int xe_guc_ct_init(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); @@ -22,6 +23,13 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); +void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift); + +static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) +{ + return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; +} + static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { return ct->state == XE_GUC_CT_STATE_ENABLED; diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 8e1b9d981d61..8b03b50313d9 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include <linux/iosys-map.h> #include <linux/spinlock_types.h> +#include <linux/stackdepot.h> #include <linux/wait.h> #include <linux/xarray.h> @@ -104,6 +105,18 @@ struct xe_dead_ct { /** snapshot_log: copy of GuC log at point of error */ struct xe_guc_log_snapshot *snapshot_log; }; + +/** struct xe_fast_req_fence - Used to track FAST_REQ messages by fence to match error responses */ +struct xe_fast_req_fence { + /** @fence: sequence number sent in H2G and return in G2H error */ + u16 fence; + /** @action: H2G action code */ + u16 action; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + /** @stack: call stack from when the H2G was sent */ + depot_stack_handle_t stack; +#endif +}; #endif /** @@ -152,6 +165,8 @@ struct xe_guc_ct { #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) /** @dead: information for debugging dead CTs */ struct xe_dead_ct dead; + /** @fast_req: history of FAST_REQ messages for matching with G2H error responses */ + struct xe_fast_req_fence fast_req[SZ_32]; #endif }; diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c index 0fb48f8f05d8..92e1f9f41b8c 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c @@ -124,7 +124,7 @@ static void free_engine_activity_buffers(struct engine_activity_buffer *buffer) static bool is_engine_activity_supported(struct xe_guc *guc) { struct xe_uc_fw_version *version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; - struct xe_uc_fw_version required = { 1, 14, 1 }; + struct xe_uc_fw_version required = { .major = 1, .minor = 14, .patch = 1 }; struct xe_gt *gt = guc_to_gt(guc); if (IS_SRIOV_VF(gt_to_xe(gt))) { diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index 4c39f01e4f52..a3f421e2adc0 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -20,6 +20,8 @@ struct xe_exec_queue; struct xe_guc_exec_queue { /** @q: Backpointer to parent xe_exec_queue */ struct xe_exec_queue *q; + /** @rcu: For safe freeing of exported dma fences */ + struct rcu_head rcu; /** @sched: GPU scheduler for this xe_exec_queue */ struct xe_gpu_scheduler sched; /** @entity: Scheduler entity for this xe_exec_queue */ diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 38039c411387..c01ccb35dc75 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -79,7 +79,7 @@ static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log * * Also, can't use vmalloc as might be called from atomic context. So need * to break the buffer up into smaller chunks that can be allocated. */ - snapshot->size = log->bo->size; + snapshot->size = xe_bo_size(log->bo); snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE); snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy), diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h index 5b896f5fafaf..f1e2b0be90a9 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.h +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -12,7 +12,7 @@ struct drm_printer; struct xe_device; -#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER) +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) #define CRASH_BUFFER_SIZE SZ_1M #define DEBUG_BUFFER_SIZE SZ_8M #define CAPTURE_BUFFER_SIZE SZ_2M diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 18c623992035..68b192fe3b32 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,8 +5,11 @@ #include "xe_guc_pc.h" +#include <linux/cleanup.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> @@ -51,9 +54,12 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -141,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -153,7 +189,7 @@ static int pc_action_reset(struct xe_guc_pc *pc) int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n", ERR_PTR(ret)); @@ -177,7 +213,7 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) /* Blocking here to ensure the results are ready before reading them */ ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n", ERR_PTR(ret)); @@ -200,7 +236,7 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n", id, value, ERR_PTR(ret)); @@ -222,7 +258,7 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe", ERR_PTR(ret)); @@ -239,7 +275,7 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", mode, ERR_PTR(ret)); return ret; @@ -553,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -563,26 +618,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -596,24 +653,28 @@ out: */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -626,24 +687,28 @@ out: */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -657,24 +722,14 @@ out: */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; - - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; + if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; } - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; - - pc->user_requested_max = freq; + guard(mutex)(&pc->freq_lock); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** @@ -817,6 +872,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc) static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { + struct xe_tile *tile = gt_to_tile(pc_to_gt(pc)); int ret; lockdep_assert_held(&pc->freq_lock); @@ -843,6 +899,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); + if (XE_WA(tile->primary_gt, 14022085890)) + ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); + out: return ret; } @@ -868,30 +927,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { - int ret = 0; + struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + return XE_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; } + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret; + + if (!XE_WA(pc_to_gt(pc), 22019338487)) + return 0; + + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + return ret; } @@ -1068,7 +1214,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; } - memset(pc->bo->vmap.vaddr, 0, size); + xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); slpc_shared_data_write(pc, header.size, size); earlier = ktime_get(); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0a2664d5c811..52ecdd5ddbff 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..c02053948a57 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpa_freq: HW RPa frequency - The Achievable one */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6d84a52b660a..cafb47711e9b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -498,6 +498,15 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc, action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); } + /* explicitly checks some fields that we might fixup later */ + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); + xe_gt_assert(guc_to_gt(guc), q->width == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); #undef MAX_MLRC_REG_SIZE @@ -522,6 +531,14 @@ static void __register_exec_queue(struct xe_guc *guc, info->hwlrca_hi, }; + /* explicitly checks some fields that we might fixup later */ + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } @@ -891,12 +908,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -970,10 +988,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) */ xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); - if (ctx_timestamp < ctx_job_timestamp) - diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; - else - diff = ctx_timestamp - ctx_job_timestamp; + diff = ctx_timestamp - ctx_job_timestamp; /* * Ensure timeout is within 5% to account for an GuC scheduling latency @@ -1070,7 +1085,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -1078,12 +1093,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * list so job can be freed and kick scheduler ensuring free job is not * lost. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; - } + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) + return DRM_GPU_SCHED_STAT_NO_HANG; /* Kill the run_job entry point */ xe_sched_submission_stop(sched); @@ -1116,7 +1127,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { @@ -1251,7 +1263,7 @@ trigger_reset: /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; sched_enable: enable_scheduling(q); @@ -1261,10 +1273,8 @@ rearm: * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ - xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_NO_HANG; } static void __guc_exec_queue_fini_async(struct work_struct *w) @@ -1285,7 +1295,11 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); - kfree(ge); + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); xe_exec_queue_fini(q); xe_pm_runtime_put(guc_to_xe(guc)); } @@ -1468,6 +1482,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->guc = ge; ge->q = q; + init_rcu_head(&ge->rcu); init_waitqueue_head(&ge->suspend_wait); for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) @@ -1762,6 +1777,9 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; + if (!guc->submission_state.initialized) + return 0; + /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset @@ -2068,12 +2086,16 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; u32 guc_id; + u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; - if (unlikely(len < 1)) + if (unlikely(!len || len > 2)) return -EPROTO; guc_id = msg[0]; + if (len == 2) + type = msg[1]; + if (guc_id == GUC_ID_UNKNOWN) { /* * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF @@ -2087,8 +2109,19 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, if (unlikely(!q)) return -EPROTO; - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + /* + * The type is HW-defined and changes based on platform, so we don't + * decode it in the kernel and only check if it is valid. + * See bspec 54047 and 72187 for details. + */ + if (type != XE_GUC_CAT_ERR_TYPE_INVALID) + xe_gt_dbg(gt, + "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", + type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + else + xe_gt_dbg(gt, + "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); trace_xe_exec_queue_memory_cat_error(q); diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 27d11e06a82b..6d7b62724126 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -11,15 +11,12 @@ #include "xe_device_types.h" #include "xe_drv.h" #include "xe_heci_gsc.h" +#include "regs/xe_gsc_regs.h" #include "xe_platform_types.h" #include "xe_survivability_mode.h" #define GSC_BAR_LENGTH 0x00000FFC -#define DG1_GSC_HECI2_BASE 0x259000 -#define PVC_GSC_HECI2_BASE 0x285000 -#define DG2_GSC_HECI2_BASE 0x374000 - static void heci_gsc_irq_mask(struct irq_data *d) { /* generic irq handling */ diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 6a846e4cb221..7e43b2dd6a32 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -171,7 +171,7 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) sizeof(struct pxp43_new_huc_auth_in)); wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset, xe_bo_ggtt_addr(huc->fw.bo), - huc->fw.bo->size); + xe_bo_size(huc->fw.bo)); do { err = xe_gsc_pkt_submit_kernel(>->uc.gsc, ggtt_offset, wr_offset, ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE, diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 93241fd0a4ba..796ba8c34a16 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -17,6 +17,7 @@ #include "regs/xe_irq_regs.h" #include "xe_assert.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_execlist.h" #include "xe_force_wake.h" @@ -693,7 +694,7 @@ static void read_media_fuses(struct xe_gt *gt) if (!(BIT(j) & vdbox_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "vcs%u fused off\n", j); + xe_gt_info(gt, "vcs%u fused off\n", j); } } @@ -703,7 +704,7 @@ static void read_media_fuses(struct xe_gt *gt) if (!(BIT(j) & vebox_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "vecs%u fused off\n", j); + xe_gt_info(gt, "vecs%u fused off\n", j); } } } @@ -728,15 +729,13 @@ static void read_copy_fuses(struct xe_gt *gt) if (!(BIT(j / 2) & bcs_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "bcs%u fused off\n", j); + xe_gt_info(gt, "bcs%u fused off\n", j); } } } static void read_compute_fuses_from_dss(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); - /* * CCS fusing based on DSS masks only applies to platforms that can * have more than one CCS. @@ -755,14 +754,13 @@ static void read_compute_fuses_from_dss(struct xe_gt *gt) if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "ccs%u fused off\n", j); + xe_gt_info(gt, "ccs%u fused off\n", j); } } } static void read_compute_fuses_from_reg(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); u32 ccs_mask; ccs_mask = xe_mmio_read32(>->mmio, XEHP_FUSE4); @@ -774,7 +772,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt) if ((ccs_mask & BIT(j)) == 0) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "ccs%u fused off\n", j); + xe_gt_info(gt, "ccs%u fused off\n", j); } } } @@ -789,8 +787,6 @@ static void read_compute_fuses(struct xe_gt *gt) static void check_gsc_availability(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); - if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) return; @@ -806,7 +802,25 @@ static void check_gsc_availability(struct xe_gt *gt) xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); - drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n"); + xe_gt_dbg(gt, "GSC FW not used, disabling gsccs\n"); + } +} + +static void check_sw_disable(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u64 sw_allowed = xe_configfs_get_engines_allowed(to_pci_dev(xe->drm.dev)); + enum xe_hw_engine_id id; + + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { + if (!(gt->info.engine_mask & BIT(id))) + continue; + + if (!(sw_allowed & BIT(id))) { + gt->info.engine_mask &= ~BIT(id); + xe_gt_info(gt, "%s disabled via configfs\n", + engine_infos[id].name); + } } } @@ -818,6 +832,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt) read_copy_fuses(gt); read_compute_fuses(gt); check_gsc_availability(gt); + check_sw_disable(gt); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); @@ -1044,12 +1059,13 @@ struct xe_hw_engine * xe_hw_engine_lookup(struct xe_device *xe, struct drm_xe_engine_class_instance eci) { + struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); unsigned int idx; if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; - if (eci.gt_id >= xe->info.gt_count) + if (!gt) return NULL; idx = array_index_nospec(eci.engine_class, diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 2d68c5b5262a..87a6dcb1b4b5 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -13,15 +13,6 @@ #include "xe_vm.h" static void -hw_engine_group_free(struct drm_device *drm, void *arg) -{ - struct xe_hw_engine_group *group = arg; - - destroy_workqueue(group->resume_wq); - kfree(group); -} - -static void hw_engine_group_resume_lr_jobs_func(struct work_struct *w) { struct xe_exec_queue *q; @@ -53,7 +44,7 @@ hw_engine_group_alloc(struct xe_device *xe) struct xe_hw_engine_group *group; int err; - group = kzalloc(sizeof(*group), GFP_KERNEL); + group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -61,14 +52,14 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group->resume_wq) return ERR_PTR(-ENOMEM); + err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); + if (err) + return ERR_PTR(err); + init_rwsem(&group->mode_sem); INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); INIT_LIST_HEAD(&group->exec_queue_list); - err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); - if (err) - return ERR_PTR(err); - return group; } diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 0b4f12be3692..b2a0c46dfcd4 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -100,6 +100,9 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) spin_unlock_irqrestore(&irq->lock, flags); dma_fence_end_signalling(tmp); } + + /* Safe release of the irq->lock used in dma_fence_init. */ + synchronize_rcu(); } void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) @@ -165,7 +168,7 @@ static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || - !__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops); + !__dma_fence_is_later(dma_fence, dma_fence->seqno, seqno); } static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 74f31639b37f..f08fc4377d25 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -20,6 +20,8 @@ #include "xe_pcode_api.h" #include "xe_sriov.h" #include "xe_pm.h" +#include "xe_vsec.h" +#include "regs/xe_pmt.h" enum xe_hwmon_reg { REG_TEMP, @@ -51,6 +53,14 @@ enum xe_fan_channel { FAN_MAX, }; +/* Attribute index for powerX_xxx_interval sysfs entries */ +enum sensor_attr_power { + SENSOR_INDEX_PSYS_PL1, + SENSOR_INDEX_PKG_PL1, + SENSOR_INDEX_PSYS_PL2, + SENSOR_INDEX_PKG_PL2, +}; + /* * For platforms that support mailbox commands for power limits, REG_PKG_POWER_SKU_UNIT is * not supported and below are SKU units to be used. @@ -72,8 +82,9 @@ enum xe_fan_channel { * PL*_HWMON_ATTR - mapping of hardware power limits to corresponding hwmon power attribute. */ #define PL1_HWMON_ATTR hwmon_power_max +#define PL2_HWMON_ATTR hwmon_power_cap -#define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "Invalid") +#define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "PL2") /* * Timeout for power limit write mailbox command. @@ -124,6 +135,9 @@ struct xe_hwmon { bool boot_power_limit_read; /** @pl1_on_boot: power limit PL1 on boot */ u32 pl1_on_boot[CHANNEL_MAX]; + /** @pl2_on_boot: power limit PL2 on boot */ + u32 pl2_on_boot[CHANNEL_MAX]; + }; static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel, @@ -151,16 +165,18 @@ static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 att /* return the value only if limit is enabled */ if (attr == PL1_HWMON_ATTR) *uval = (val0 & PWR_LIM_EN) ? val0 : 0; + else if (attr == PL2_HWMON_ATTR) + *uval = (val1 & PWR_LIM_EN) ? val1 : 0; else if (attr == hwmon_power_label) - *uval = (val0 & PWR_LIM_EN) ? 1 : 0; + *uval = (val0 & PWR_LIM_EN) ? 1 : (val1 & PWR_LIM_EN) ? 1 : 0; else *uval = 0; return ret; } -static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, - u32 uval) +static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, + u32 clr, u32 set) { struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); u32 val0, val1; @@ -179,7 +195,9 @@ static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 at channel, val0, val1, ret); if (attr == PL1_HWMON_ATTR) - val0 = uval; + val0 = (val0 & ~clr) | set; + else if (attr == PL2_HWMON_ATTR) + val1 = (val1 & ~clr) | set; else return -EIO; @@ -236,12 +254,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return GT_PERF_STATUS; break; case REG_PKG_ENERGY_STATUS: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_ENERGY_STATUS; - else - return BMG_PLATFORM_ENERGY_STATUS; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { return PVC_GT0_PLATFORM_ENERGY_STATUS; } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { return PCU_CR_PACKAGE_ENERGY_STATUS; @@ -273,7 +286,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg */ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) { - u64 reg_val, min, max; + u64 reg_val = 0, min, max; struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; struct xe_mmio *mmio = xe_root_tile_mmio(xe); @@ -285,16 +298,6 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe } else { rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); - - /* - * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible. - * So not checking it again here. - */ - if (!xe_reg_is_valid(pkg_power_sku)) { - drm_warn(&xe->drm, "pkg_power_sku invalid\n"); - *value = 0; - goto unlock; - } reg_val = xe_mmio_read32(mmio, rapl_limit); } @@ -327,7 +330,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); int ret = 0; - u32 reg_val; + u32 reg_val, max; struct xe_reg rapl_limit; mutex_lock(&hwmon->hwmon_lock); @@ -339,7 +342,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe if (hwmon->xe->info.has_mbx_power_limits) { drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", PWR_ATTR_TO_STR(attr), channel); - xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, 0); + xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0); xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); } else { reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); @@ -355,25 +358,29 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); - reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val); /* - * Clamp power limit to card-firmware default as maximum, as an additional protection to + * Clamp power limit to GPU firmware default as maximum, as an additional protection to * pcode clamp. */ if (hwmon->xe->info.has_mbx_power_limits) { - if (reg_val > REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel])) { - reg_val = REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel]); - drm_dbg(&hwmon->xe->drm, "Clamping power limit to firmware default 0x%x\n", + max = (attr == PL1_HWMON_ATTR) ? + hwmon->pl1_on_boot[channel] : hwmon->pl2_on_boot[channel]; + max = REG_FIELD_PREP(PWR_LIM_VAL, max); + if (reg_val > max) { + reg_val = max; + drm_dbg(&hwmon->xe->drm, + "Clamping power limit to GPU firmware default 0x%x\n", reg_val); } } + reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val); + if (hwmon->xe->info.has_mbx_power_limits) - ret = xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, reg_val); + ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val); else - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN | PWR_LIM_VAL, - reg_val); + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; @@ -428,16 +435,37 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; - u64 reg_val; + u32 reg_val; + int ret = 0; - reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)); + /* Energy is supported only for card and pkg */ + if (channel > CHANNEL_PKG) { + *energy = 0; + return; + } - if (reg_val >= ei->reg_val_prev) - ei->accum_energy += reg_val - ei->reg_val_prev; - else - ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + if (hwmon->xe->info.platform == XE_BATTLEMAGE) { + u64 pmt_val; + + ret = xe_pmt_telem_read(to_pci_dev(hwmon->xe->drm.dev), + xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), + &pmt_val, BMG_ENERGY_STATUS_PMT_OFFSET, sizeof(pmt_val)); + if (ret != sizeof(pmt_val)) { + drm_warn(&hwmon->xe->drm, "energy read from pmt failed, ret %d\n", ret); + *energy = 0; + return; + } + + if (channel == CHANNEL_PKG) + reg_val = REG_FIELD_GET64(ENERGY_PKG, pmt_val); + else + reg_val = REG_FIELD_GET64(ENERGY_CARD, pmt_val); + } else { + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)); + } + ei->accum_energy += reg_val - ei->reg_val_prev; ei->reg_val_prev = reg_val; *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, @@ -452,8 +480,9 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; - int channel = to_sensor_dev_attr(attr)->index; - u32 power_attr = PL1_HWMON_ATTR; + int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; + int ret = 0; xe_pm_runtime_get(hwmon->xe); @@ -506,9 +535,9 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a u32 x, y, rxy, x_w = 2; /* 2 bits */ u64 tau4, r, max_win; unsigned long val; + int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; int ret; - int channel = to_sensor_dev_attr(attr)->index; - u32 power_attr = PL1_HWMON_ATTR; ret = kstrtoul(buf, 0, &val); if (ret) @@ -535,10 +564,8 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a tau4 = (u64)((1 << x_w) | x) << y; max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); - if (val > max_win) { - drm_warn(&hwmon->xe->drm, "power_interval invalid val 0x%lx\n", val); + if (val > max_win) return -EINVAL; - } /* val in hw units */ val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME) + 1; @@ -563,14 +590,11 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a mutex_lock(&hwmon->hwmon_lock); - if (hwmon->xe->info.has_mbx_power_limits) { - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); - r = (r & ~PWR_LIM_TIME) | rxy; - xe_hwmon_pcode_write_power_limit(hwmon, power_attr, channel, r); - } else { + if (hwmon->xe->info.has_mbx_power_limits) + xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy); + else r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), PWR_LIM_TIME, rxy); - } mutex_unlock(&hwmon->hwmon_lock); @@ -582,15 +606,25 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a /* PSYS PL1 */ static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_CARD); - + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL1); +/* PKG PL1 */ static SENSOR_DEVICE_ATTR(power2_max_interval, 0664, xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_PKG); + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL1); +/* PSYS PL2 */ +static SENSOR_DEVICE_ATTR(power1_cap_interval, 0664, + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL2); +/* PKG PL2 */ +static SENSOR_DEVICE_ATTR(power2_cap_interval, 0664, + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL2); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_power1_max_interval.dev_attr.attr, &sensor_dev_attr_power2_max_interval.dev_attr.attr, + &sensor_dev_attr_power1_cap_interval.dev_attr.attr, + &sensor_dev_attr_power2_cap_interval.dev_attr.attr, NULL }; @@ -600,19 +634,22 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret = 0; - int channel = index ? CHANNEL_PKG : CHANNEL_CARD; - u32 power_attr = PL1_HWMON_ATTR; - u32 uval; + int channel = (index % 2) ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = (index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; + u32 uval = 0; + struct xe_reg rapl_limit; + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); xe_pm_runtime_get(hwmon->xe); if (hwmon->xe->info.has_mbx_power_limits) { xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval); - ret = (uval & PWR_LIM_EN) ? attr->mode : 0; - } else { - ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, - channel)) ? attr->mode : 0; + } else if (power_attr != PL2_HWMON_ATTR) { + rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + if (xe_reg_is_valid(rapl_limit)) + uval = xe_mmio_read32(mmio, rapl_limit); } + ret = (uval & PWR_LIM_EN) ? attr->mode : 0; xe_pm_runtime_put(hwmon->xe); @@ -632,8 +669,9 @@ static const struct attribute_group *hwmon_groups[] = { static const struct hwmon_channel_info * const hwmon_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL), - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT, - HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL), + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT | + HWMON_P_CAP, + HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CAP), HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), @@ -754,31 +792,62 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) static umode_t xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { - u32 uval; + u32 uval = 0; + struct xe_reg reg; + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); switch (attr) { case hwmon_power_max: + case hwmon_power_cap: if (hwmon->xe->info.has_mbx_power_limits) { xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); - return (uval) ? 0664 : 0; - } else { - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, - channel)) ? 0664 : 0; + } else if (attr != PL2_HWMON_ATTR) { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); } + if (uval & PWR_LIM_EN) { + drm_info(&hwmon->xe->drm, "%s is supported on channel %d\n", + PWR_ATTR_TO_STR(attr), channel); + return 0664; + } + drm_dbg(&hwmon->xe->drm, "%s is unsupported on channel %d\n", + PWR_ATTR_TO_STR(attr), channel); + return 0; case hwmon_power_rated_max: - if (hwmon->xe->info.has_mbx_power_limits) + if (hwmon->xe->info.has_mbx_power_limits) { return 0; - else - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, - channel)) ? 0444 : 0; + } else { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + return uval ? 0444 : 0; + } case hwmon_power_crit: - case hwmon_power_label: if (channel == CHANNEL_CARD) { xe_hwmon_pcode_read_i1(hwmon, &uval); - return (uval & POWER_SETUP_I1_WATTS) ? (attr == hwmon_power_label) ? - 0444 : 0644 : 0; + return (uval & POWER_SETUP_I1_WATTS) ? 0644 : 0; } break; + case hwmon_power_label: + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); + } else { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + + if (!uval) { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + } + } + if ((!(uval & PWR_LIM_EN)) && channel == CHANNEL_CARD) { + xe_hwmon_pcode_read_i1(hwmon, &uval); + return (uval & POWER_SETUP_I1_WATTS) ? 0444 : 0; + } + return (uval) ? 0444 : 0; default: return 0; } @@ -790,6 +859,7 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_power_max: + case hwmon_power_cap: xe_hwmon_power_max_read(hwmon, attr, channel, val); return 0; case hwmon_power_rated_max: @@ -806,6 +876,7 @@ static int xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) { switch (attr) { + case hwmon_power_cap: case hwmon_power_max: return xe_hwmon_power_max_write(hwmon, attr, channel, val); case hwmon_power_crit: @@ -888,11 +959,18 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) static umode_t xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { + long energy = 0; + switch (attr) { case hwmon_energy_input: case hwmon_energy_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)) ? 0444 : 0; + if (hwmon->xe->info.platform == XE_BATTLEMAGE) { + xe_hwmon_energy_get(hwmon, channel, &energy); + return energy ? 0444 : 0; + } else { + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)) ? 0444 : 0; + } default: return 0; } @@ -1128,22 +1206,32 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) struct xe_reg pkg_power_sku_unit; if (hwmon->xe->info.has_mbx_power_limits) { - /* Check if card firmware support mailbox power limits commands. */ + /* Check if GPU firmware support mailbox power limits commands. */ if (xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_CARD, &hwmon->pl1_on_boot[CHANNEL_CARD]) | xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG, - &hwmon->pl1_on_boot[CHANNEL_PKG])) { + &hwmon->pl1_on_boot[CHANNEL_PKG]) | + xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_CARD, + &hwmon->pl2_on_boot[CHANNEL_CARD]) | + xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_PKG, + &hwmon->pl2_on_boot[CHANNEL_PKG])) { drm_warn(&hwmon->xe->drm, - "Failed to read power limits, check card firmware !\n"); + "Failed to read power limits, check GPU firmware !\n"); } else { drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); /* Write default limits to read from pcode from now on. */ - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_CARD, - hwmon->pl1_on_boot[CHANNEL_CARD]); - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_PKG, - hwmon->pl1_on_boot[CHANNEL_PKG]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_PKG]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl2_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl2_on_boot[CHANNEL_PKG]); hwmon->scl_shift_power = PWR_UNIT; hwmon->scl_shift_energy = ENERGY_UNIT; hwmon->scl_shift_time = TIME_UNIT; @@ -1227,4 +1315,4 @@ int xe_hwmon_register(struct xe_device *xe) return 0; } - +MODULE_IMPORT_NS("INTEL_PMT_TELEMETRY"); diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c new file mode 100644 index 000000000000..db9c0340be5c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Intel Xe I2C attached Microcontroller Units (MCU) + * + * Copyright (C) 2025 Intel Corporation. + */ + +#include <linux/array_size.h> +#include <linux/container_of.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/ioport.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/property.h> +#include <linux/regmap.h> +#include <linux/sprintf.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#include "regs/xe_i2c_regs.h" +#include "regs/xe_irq_regs.h" + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_i2c.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" + +/** + * DOC: Xe I2C devices + * + * Register a platform device for the I2C host controller (Synpsys DesignWare + * I2C) if the registers of that controller are mapped to the MMIO, and also the + * I2C client device for the Add-In Management Controller (the MCU) attached to + * the host controller. + * + * See drivers/i2c/busses/i2c-designware-* for more information on the I2C host + * controller. + */ + +static const char adapter_name[] = "i2c_designware"; + +static const struct property_entry xe_i2c_adapter_properties[] = { + PROPERTY_ENTRY_STRING("compatible", "intel,xe-i2c"), + PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_FAST_MODE_PLUS_FREQ), + { } +}; + +static inline void xe_i2c_read_endpoint(struct xe_mmio *mmio, void *ep) +{ + u32 *val = ep; + + val[0] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_PREFIX); + val[1] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_POSTFIX); +} + +static void xe_i2c_client_work(struct work_struct *work) +{ + struct xe_i2c *i2c = container_of(work, struct xe_i2c, work); + struct i2c_board_info info = { + .type = "amc", + .flags = I2C_CLIENT_HOST_NOTIFY, + .addr = i2c->ep.addr[1], + }; + + i2c->client[0] = i2c_new_client_device(i2c->adapter, &info); +} + +static int xe_i2c_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct xe_i2c *i2c = container_of(nb, struct xe_i2c, bus_notifier); + struct i2c_adapter *adapter = i2c_verify_adapter(data); + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE && + adapter && dev->parent == &i2c->pdev->dev) { + i2c->adapter = adapter; + schedule_work(&i2c->work); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static int xe_i2c_register_adapter(struct xe_i2c *i2c) +{ + struct pci_dev *pci = to_pci_dev(i2c->drm_dev); + struct platform_device *pdev; + struct fwnode_handle *fwnode; + int ret; + + fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); + if (!fwnode) + return -ENOMEM; + + /* + * Not using platform_device_register_full() here because we don't have + * a handle to the platform_device before it returns. xe_i2c_notifier() + * uses that handle, but it may be called before + * platform_device_register_full() is done. + */ + pdev = platform_device_alloc(adapter_name, pci_dev_id(pci)); + if (!pdev) { + ret = -ENOMEM; + goto err_fwnode_remove; + } + + if (i2c->adapter_irq) { + struct resource res; + + res = DEFINE_RES_IRQ_NAMED(i2c->adapter_irq, "xe_i2c"); + + ret = platform_device_add_resources(pdev, &res, 1); + if (ret) + goto err_pdev_put; + } + + pdev->dev.parent = i2c->drm_dev; + pdev->dev.fwnode = fwnode; + i2c->adapter_node = fwnode; + i2c->pdev = pdev; + + ret = platform_device_add(pdev); + if (ret) + goto err_pdev_put; + + return 0; + +err_pdev_put: + platform_device_put(pdev); +err_fwnode_remove: + fwnode_remove_software_node(fwnode); + + return ret; +} + +static void xe_i2c_unregister_adapter(struct xe_i2c *i2c) +{ + platform_device_unregister(i2c->pdev); + fwnode_remove_software_node(i2c->adapter_node); +} + +/** + * xe_i2c_irq_handler: Handler for I2C interrupts + * @xe: xe device instance + * @master_ctl: interrupt register + * + * Forward interrupts generated by the I2C host adapter to the I2C host adapter + * driver. + */ +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) +{ + if (!xe->i2c || !xe->i2c->adapter_irq) + return; + + if (master_ctl & I2C_IRQ) + generic_handle_irq_safe(xe->i2c->adapter_irq); +} + +static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw_irq_num) +{ + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + return 0; +} + +static const struct irq_domain_ops xe_i2c_irq_ops = { + .map = xe_i2c_irq_map, +}; + +static int xe_i2c_create_irq(struct xe_i2c *i2c) +{ + struct irq_domain *domain; + + if (!(i2c->ep.capabilities & XE_I2C_EP_CAP_IRQ)) + return 0; + + domain = irq_domain_create_linear(dev_fwnode(i2c->drm_dev), 1, &xe_i2c_irq_ops, NULL); + if (!domain) + return -ENOMEM; + + i2c->adapter_irq = irq_create_mapping(domain, 0); + i2c->irqdomain = domain; + + return 0; +} + +static void xe_i2c_remove_irq(struct xe_i2c *i2c) +{ + if (!i2c->irqdomain) + return; + + irq_dispose_mapping(i2c->adapter_irq); + irq_domain_remove(i2c->irqdomain); +} + +static int xe_i2c_read(void *context, unsigned int reg, unsigned int *val) +{ + struct xe_i2c *i2c = context; + + *val = xe_mmio_read32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET)); + + return 0; +} + +static int xe_i2c_write(void *context, unsigned int reg, unsigned int val) +{ + struct xe_i2c *i2c = context; + + xe_mmio_write32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET), val); + + return 0; +} + +static const struct regmap_config i2c_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_read = xe_i2c_read, + .reg_write = xe_i2c_write, + .fast_io = true, +}; + +void xe_i2c_pm_suspend(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return; + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D3hot); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return; + + if (d3cold) + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY); + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +static void xe_i2c_remove(void *data) +{ + struct xe_i2c *i2c = data; + unsigned int i; + + for (i = 0; i < XE_I2C_MAX_CLIENTS; i++) + i2c_unregister_device(i2c->client[i]); + + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + xe_i2c_unregister_adapter(i2c); + xe_i2c_remove_irq(i2c); +} + +/** + * xe_i2c_probe: Probe the I2C host adapter and the I2C clients attached to it + * @xe: xe device instance + * + * Register all the I2C devices described in the I2C Endpoint data structure. + * + * Return: 0 on success, error code on failure + */ +int xe_i2c_probe(struct xe_device *xe) +{ + struct device *drm_dev = xe->drm.dev; + struct xe_i2c_endpoint ep; + struct regmap *regmap; + struct xe_i2c *i2c; + int ret; + + if (xe->info.platform != XE_BATTLEMAGE) + return 0; + + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); + if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return 0; + + i2c = devm_kzalloc(drm_dev, sizeof(*i2c), GFP_KERNEL); + if (!i2c) + return -ENOMEM; + + INIT_WORK(&i2c->work, xe_i2c_client_work); + i2c->mmio = xe_root_tile_mmio(xe); + i2c->drm_dev = drm_dev; + i2c->ep = ep; + xe->i2c = i2c; + + /* PCI PM isn't aware of this device, bring it up and match it with SGUnit state. */ + xe_i2c_pm_resume(xe, true); + + regmap = devm_regmap_init(drm_dev, NULL, i2c, &i2c_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + i2c->bus_notifier.notifier_call = xe_i2c_notifier; + ret = bus_register_notifier(&i2c_bus_type, &i2c->bus_notifier); + if (ret) + return ret; + + ret = xe_i2c_create_irq(i2c); + if (ret) + goto err_unregister_notifier; + + ret = xe_i2c_register_adapter(i2c); + if (ret) + goto err_remove_irq; + + return devm_add_action_or_reset(drm_dev, xe_i2c_remove, i2c); + +err_remove_irq: + xe_i2c_remove_irq(i2c); + +err_unregister_notifier: + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h new file mode 100644 index 000000000000..b767ed8ce52b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_H_ +#define _XE_I2C_H_ + +#include <linux/bits.h> +#include <linux/notifier.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +struct device; +struct fwnode_handle; +struct i2c_adapter; +struct i2c_client; +struct irq_domain; +struct platform_device; +struct xe_device; +struct xe_mmio; + +#define XE_I2C_MAX_CLIENTS 3 + +#define XE_I2C_EP_COOKIE_DEVICE 0xde + +/* Endpoint Capabilities */ +#define XE_I2C_EP_CAP_IRQ BIT(0) + +struct xe_i2c_endpoint { + u8 cookie; + u8 capabilities; + u16 addr[XE_I2C_MAX_CLIENTS]; +}; + +struct xe_i2c { + struct fwnode_handle *adapter_node; + struct platform_device *pdev; + struct i2c_adapter *adapter; + struct i2c_client *client[XE_I2C_MAX_CLIENTS]; + + struct notifier_block bus_notifier; + struct work_struct work; + + struct irq_domain *irqdomain; + int adapter_irq; + + struct xe_i2c_endpoint ep; + struct device *drm_dev; + + struct xe_mmio *mmio; +}; + +#if IS_ENABLED(CONFIG_I2C) +int xe_i2c_probe(struct xe_device *xe); +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); +void xe_i2c_pm_suspend(struct xe_device *xe); +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold); +#else +static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } +static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } +static inline void xe_i2c_pm_suspend(struct xe_device *xe) { } +static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5362d3174b06..5df5b8c2a3e4 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -18,10 +18,12 @@ #include "xe_gt.h" #include "xe_guc.h" #include "xe_hw_engine.h" +#include "xe_i2c.h" #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_pxp.h" #include "xe_sriov.h" +#include "xe_tile.h" /* * Interrupt registers for a unit are always consecutive and ordered @@ -160,7 +162,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) dmask = irqs << 16 | irqs; smask = irqs << 16; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* Enable interrupts for each engine class */ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); if (ccs_mask) @@ -260,7 +262,7 @@ gt_engine_identity(struct xe_device *xe, static void gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) { - if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + if (instance == OTHER_GUC_INSTANCE && xe_gt_is_main_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); @@ -476,6 +478,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) if (xe->info.has_heci_cscfi) xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); + xe_i2c_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); } } @@ -550,7 +553,7 @@ static void xelp_irq_reset(struct xe_tile *tile) static void dg1_irq_reset(struct xe_tile *tile) { - if (tile->id == 0) + if (xe_tile_is_root(tile)) dg1_intr_disable(tile_to_xe(tile)); gt_irq_reset(tile); diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 63db66df064b..a2000307d5bf 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -11,6 +11,7 @@ #include "xe_assert.h" #include "xe_bo.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_lmtt.h" #include "xe_map.h" #include "xe_mmio.h" @@ -78,6 +79,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, xe_bo_size(bo)); pt->level = level; pt->bo = bo; @@ -91,6 +95,9 @@ out: static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -216,6 +223,58 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt) lmtt_setup_dir_ptr(lmtt); } +static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_gt_tlb_invalidation_fence fences[XE_MAX_GT_PER_TILE]; + struct xe_gt_tlb_invalidation_fence *fence = fences; + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_gt *gt; + int result = 0; + int err; + u8 id; + + for_each_gt_on_tile(gt, tile, id) { + xe_gt_tlb_invalidation_fence_init(gt, fence, true); + err = xe_gt_tlb_invalidation_all(gt, fence); + result = result ?: err; + fence++; + } + + lmtt_debug(lmtt, "num_fences=%d err=%d\n", (int)(fence - fences), result); + + /* + * It is fine to wait for all fences, even for those which covers the + * invalidation request that failed, as such fence should be already + * marked as signaled. + */ + fence = fences; + for_each_gt_on_tile(gt, tile, id) + xe_gt_tlb_invalidation_fence_wait(fence++); + + return result; +} + +/** + * xe_lmtt_invalidate_hw - Invalidate LMTT hardware. + * @lmtt: the &xe_lmtt to invalidate + * + * Send requests to all GuCs on this tile to invalidate all TLBs. + * + * This function should be called only when running as a PF driver. + */ +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_device *xe = lmtt_to_xe(lmtt); + int err; + + lmtt_assert(lmtt, IS_SRIOV_PF(xe)); + + err = lmtt_invalidate_hw(lmtt); + if (err) + xe_sriov_warn(xe, "LMTT%u invalidation failed (%pe)", + lmtt_to_tile(lmtt)->id, ERR_PTR(err)); +} + static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, u64 pte, unsigned int idx) { @@ -226,9 +285,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: @@ -265,6 +329,7 @@ static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) return; lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid); + lmtt_invalidate_hw(lmtt); lmtt_assert(lmtt, pd->level > 0); lmtt_assert(lmtt, pt->level == pd->level - 1); @@ -386,11 +451,11 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo u64 addr, vram_offset; lmtt_assert(lmtt, IS_ALIGNED(start, page_size)); - lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size)); + lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size)); lmtt_assert(lmtt, xe_bo_is_vram(bo)); vram_offset = vram_region_gpu_offset(bo->ttm.resource); - xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); while (cur.remaining) { addr = xe_res_dma(&cur); addr += vram_offset; /* XXX */ diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h index cb10ef994db6..75a234fbf367 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.h +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -15,6 +15,7 @@ struct xe_lmtt_ops; #ifdef CONFIG_PCI_IOV int xe_lmtt_init(struct xe_lmtt *lmtt); void xe_lmtt_init_hw(struct xe_lmtt *lmtt); +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt); int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 63d74e27f54c..6d38411bdeba 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -39,7 +39,33 @@ #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) #define LRC_PPHWSP_SIZE SZ_4K +#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +#define LRC_WA_BB_SIZE SZ_4K + +/* + * Layout of the LRC and associated data allocated as + * lrc->bo: + * + * Region Size + * +============================+=================================+ <- __xe_lrc_ring_offset() + * | Ring | ring_size, see | + * | | xe_lrc_init() | + * +============================+=================================+ <- __xe_lrc_pphwsp_offset() + * | PPHWSP (includes SW state) | 4K | + * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset() + * | Engine Context Image | n * 4K, see | + * | | xe_gt_lrc_size() | + * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset() + * | Indirect Ring State Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_RING_STATE | + * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset() + * | Indirect Context Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_CTX | + * +============================+=================================+ <- __xe_lrc_wa_bb_offset() + * | WA BB Per Ctx | 4k | + * +============================+=================================+ <- xe_bo_size(lrc->bo) + */ static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) @@ -47,6 +73,12 @@ lrc_to_xe(struct xe_lrc *lrc) return gt_to_xe(lrc->fence_ctx.gt); } +static bool +gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) +{ + return false; +} + size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) { struct xe_device *xe = gt_to_xe(gt); @@ -581,8 +613,6 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) if (xe_gt_has_indirect_ring_state(hwe->gt)) regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ } static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) @@ -654,8 +684,8 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_SEQNO_PPHWSP_OFFSET 512 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) +#define LRC_ENGINE_ID_PPHWSP_OFFSET 1024 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 -#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) { @@ -716,8 +746,23 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; + u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - + LRC_INDIRECT_RING_STATE_SIZE; + + if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) + offset -= LRC_INDIRECT_CTX_BO_SIZE; + + return offset; +} + +static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; +} + +static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; } #define DECL_MAP_ADDR_HELPERS(elem) \ @@ -910,13 +955,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); xe_bo_unpin_map_no_vm(lrc->bo); - xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); } /* - * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active - * context run ticks. - * @lrc: Pointer to the lrc. + * wa_bb_setup_utilization() - Write commands to wa bb to assist + * in calculating active context run ticks. * * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the * context, but only gets updated when the context switches out. In order to @@ -941,11 +984,15 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +static ssize_t setup_utilization_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, + size_t max_len) { - u32 *cmd; + u32 *cmd = batch; - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) + return -ENOSPC; *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; *cmd++ = ENGINE_ID(0).addr; @@ -964,34 +1011,190 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = upper_32_bits(CONTEXT_ACTIVE); } - *cmd++ = MI_BATCH_BUFFER_END; + return cmd - batch; +} + +struct bo_setup { + ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *batch, size_t max_size); +}; + +struct bo_setup_state { + /* Input: */ + struct xe_lrc *lrc; + struct xe_hw_engine *hwe; + size_t max_size; + size_t reserve_dw; + unsigned int offset; + const struct bo_setup *funcs; + unsigned int num_funcs; + + /* State: */ + u32 *buffer; + u32 *ptr; + unsigned int written; +}; + +static int setup_bo(struct bo_setup_state *state) +{ + ssize_t remain; + + if (state->lrc->bo->vmap.is_iomem) { + state->buffer = kmalloc(state->max_size, GFP_KERNEL); + if (!state->buffer) + return -ENOMEM; + state->ptr = state->buffer; + } else { + state->ptr = state->lrc->bo->vmap.vaddr + state->offset; + state->buffer = NULL; + } + + remain = state->max_size / sizeof(u32); + + for (size_t i = 0; i < state->num_funcs; i++) { + ssize_t len = state->funcs[i].setup(state->lrc, state->hwe, + state->ptr, remain); + + remain -= len; + + /* + * Caller has asked for at least reserve_dw to remain unused. + */ + if (len < 0 || + xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw)) + goto fail; + + state->ptr += len; + state->written += len; + } + + return 0; + +fail: + kfree(state->buffer); + return -ENOSPC; +} + +static void finish_bo(struct bo_setup_state *state) +{ + if (!state->buffer) + return; + + xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, + state->offset, state->buffer, + state->written * sizeof(u32)); + kfree(state->buffer); +} + +static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static const struct bo_setup funcs[] = { + { .setup = setup_utilization_wa }, + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = LRC_WA_BB_SIZE, + .reserve_dw = 1, + .offset = __xe_lrc_wa_bb_offset(lrc), + .funcs = funcs, + .num_funcs = ARRAY_SIZE(funcs), + }; + int ret; + + ret = setup_bo(&state); + if (ret) + return ret; + + *state.ptr++ = MI_BATCH_BUFFER_END; + state.written++; + + finish_bo(&state); xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, - xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + xe_bo_ggtt_addr(lrc->bo) + state.offset + 1); + return 0; } -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) +static int +setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static struct bo_setup rcs_funcs[] = { + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = (63 * 64) /* max 63 cachelines */, + .offset = __xe_lrc_indirect_ctx_offset(lrc), + }; + int ret; + + if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) + return 0; + + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) { + state.funcs = rcs_funcs; + state.num_funcs = ARRAY_SIZE(rcs_funcs); + } + + if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) + return 0; + + ret = setup_bo(&state); + if (ret) + return ret; + + /* + * Align to 64B cacheline so there's no garbage at the end for CS to + * execute: size for indirect ctx must be a multiple of 64. + */ + while (state.written & 0xf) { + *state.ptr++ = MI_NOOP; + state.written++; + } + + finish_bo(&state); + + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX, + (xe_bo_ggtt_addr(lrc->bo) + state.offset) | + /* Size in CLs. */ + (state.written * sizeof(u32) / 64)); + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX_OFFSET, + CTX_INDIRECT_CTX_OFFSET_DEFAULT); + + return 0; +} static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, u32 ring_size, u16 msix_vec, u32 init_flags) { struct xe_gt *gt = hwe->gt; + const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; - void *init_data = NULL; u32 arb_enable; - u32 lrc_size; u32 bo_flags; int err; kref_init(&lrc->refcount); lrc->gt = gt; + lrc->size = lrc_size; lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; + bo_size += LRC_INDIRECT_CTX_BO_SIZE; + } + if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; @@ -1000,52 +1203,36 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (vm && vm->xef) /* userspace */ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size, + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, ttm_bo_type_kernel, bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, - ttm_bo_type_kernel, - bo_flags); - if (IS_ERR(lrc->bb_per_ctx_bo)) { - err = PTR_ERR(lrc->bb_per_ctx_bo); - goto err_lrc_finish; - } - - lrc->size = lrc_size; - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - /* * Init Per-Process of HW status Page, LRC / context state to known - * values + * values. If there's already a primed default_lrc, just copy it, otherwise + * it's the early submission to record the lrc: build a new empty one from + * scratch. */ map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { + if (gt->default_lrc[hwe->class]) { xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); + lrc_size - LRC_PPHWSP_SIZE); } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); + void *init_data = empty_lrc_data(hwe); + + if (!init_data) { + err = -ENOMEM; + goto err_lrc_finish; + } + + xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); kfree(init_data); } @@ -1099,7 +1286,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); + xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); @@ -1125,7 +1312,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - xe_lrc_setup_utilization(lrc); + err = setup_wa_bb(lrc, hwe); + if (err) + goto err_lrc_finish; + + err = setup_indirect_ctx(lrc, hwe); + if (err) + goto err_lrc_finish; return 0; @@ -1719,7 +1912,7 @@ static const struct instr_state xe_hpg_svg_state[] = { { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, }; -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) { struct xe_gt *gt = q->hwe->gt; struct xe_device *xe = gt_to_xe(gt); @@ -1754,7 +1947,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; + return cs; } for (int i = 0; i < state_table_size; i++) { @@ -1777,12 +1970,14 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b instr == CMD_3DSTATE_DRAWING_RECTANGLE) instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - bb->cs[bb->len] = instr; + *cs = instr; if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); + *cs |= (num_dw - 2); - bb->len += num_dw; + cs += num_dw; } + + return cs; } struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) @@ -1803,7 +1998,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; + snapshot->lrc_size = lrc->size; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index eb6e8de8c939..b6c8053c581b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -112,7 +112,7 @@ void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index ae24cf6f8dd9..e9883706e004 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -22,14 +22,15 @@ struct xe_lrc { */ struct xe_bo *bo; - /** @size: size of lrc including any indirect ring state page */ + /** @size: size of the lrc and optional indirect ring state */ u32 size; /** @gt: gt which this LRC belongs to */ struct xe_gt *gt; /** @flags: LRC flags */ -#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 +#define XE_LRC_FLAG_INDIRECT_CTX 0x1 +#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2 u32 flags; /** @refcount: ref count of this lrc */ @@ -53,9 +54,6 @@ struct xe_lrc { /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ u64 ctx_timestamp; - - /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ - struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h index f62e0c8b67ab..8d67f6ba2d95 100644 --- a/drivers/gpu/drm/xe/xe_map.h +++ b/drivers/gpu/drm/xe/xe_map.h @@ -78,6 +78,24 @@ static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map, iosys_map_wr(map__, offset__, type__, val__); \ }) +#define xe_map_rd_array(xe__, map__, index__, type__) \ + xe_map_rd(xe__, map__, (index__) * sizeof(type__), type__) + +#define xe_map_wr_array(xe__, map__, index__, type__, val__) \ + xe_map_wr(xe__, map__, (index__) * sizeof(type__), type__, val__) + +#define xe_map_rd_array_u32(xe__, map__, index__) \ + xe_map_rd_array(xe__, map__, index__, u32) + +#define xe_map_wr_array_u32(xe__, map__, index__, val__) \ + xe_map_wr_array(xe__, map__, index__, u32, val__) + +#define xe_map_rd_ring_u32(xe__, map__, index__, size__) \ + xe_map_rd_array_u32(xe__, map__, (index__) % (size__)) + +#define xe_map_wr_ring_u32(xe__, map__, index__, size__, val__) \ + xe_map_wr_array_u32(xe__, map__, (index__) % (size__), val__) + #define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({ \ struct xe_device *__xe = xe__; \ xe_device_assert_mem_access(__xe); \ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8f8e9fdfb2a8..ba1cff2e4cda 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -82,7 +82,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE +#define MAX_PTE_PER_SDI 0x1FEU /** * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. @@ -203,7 +203,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); /* Need to be sure everything fits in the first PT, or create more */ - xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); + xe_tile_assert(tile, m->batch_base_ofs + xe_bo_size(batch) < SZ_2M); bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, @@ -214,7 +214,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, return PTR_ERR(bo); /* PT30 & PT31 reserved for 2M identity map */ - pt29_ofs = bo->size - 3 * XE_PAGE_SIZE; + pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE; entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); @@ -236,7 +236,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (!IS_DGFX(xe)) { /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -247,13 +247,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level++; } if (xe->info.has_usm) { - xe_tile_assert(tile, batch->size == SZ_1M); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_1M); batch = tile->primary_gt->usm.bb_pool->bo; m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; - xe_tile_assert(tile, batch->size == SZ_512K); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_512K); - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -306,7 +306,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { - u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; + u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, pat_index, pt30_ofs); @@ -321,7 +321,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; u64 vram_offset = IDENTITY_OFFSET + DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); - u64 pt31_ofs = bo->size - XE_PAGE_SIZE; + u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); @@ -768,7 +768,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it, ccs_it; u64 src_L0_ofs, dst_L0_ofs; u32 src_L0_pt, dst_L0_pt; @@ -791,7 +791,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) return ERR_PTR(-EINVAL); - if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) + if (src_bo != dst_bo && XE_WARN_ON(xe_bo_size(src_bo) != xe_bo_size(dst_bo))) return ERR_PTR(-EINVAL); if (!src_is_vram) @@ -863,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) @@ -1064,7 +1064,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_device *xe = gt_to_xe(gt); bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; - u64 size = bo->size; + u64 size = xe_bo_size(bo); struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; @@ -1076,9 +1076,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_only_system_ccs = true; if (!clear_vram) - xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); + xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &src_it); else - xe_res_first(src, 0, bo->size, &src_it); + xe_res_first(src, 0, xe_bo_size(bo), &src_it); while (size) { u64 clear_L0_ofs; @@ -1407,7 +1407,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, if (idx == chunk) goto next_cmd; - xe_tile_assert(tile, pt_bo->size == SZ_4K); + xe_tile_assert(tile, xe_bo_size(pt_bo) == SZ_4K); /* Map a PT at most once */ if (pt_bo->update_index < 0) @@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; @@ -1815,8 +1817,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, xe_bo_assert_held(bo); /* Use bounce buffer for small access and unaligned access */ - if (len & XE_CACHELINE_MASK || - ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) { + if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || + !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; /* @@ -1846,7 +1848,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, (void *)ptr, - sizeof(bounce), 0); + sizeof(bounce), write); if (err) return err; } else { @@ -1866,7 +1868,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (IS_ERR(dma_addr)) return PTR_ERR(dma_addr); - xe_res_first(bo->ttm.resource, offset, bo->size - offset, &cursor); + xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor); do { struct dma_fence *__fence; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7357458bc0d2..e4db8d58ea2d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -22,6 +22,9 @@ #include "xe_macros.h" #include "xe_sriov.h" #include "xe_trace.h" +#include "xe_wa.h" + +#include "generated/xe_device_wa_oob.h" static void tiles_fini(void *arg) { @@ -55,6 +58,7 @@ static void tiles_fini(void *arg) static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) { struct xe_tile *tile; + struct xe_gt *gt; u8 id; /* @@ -67,7 +71,7 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) /* Possibly override number of tile based on configuration register */ if (!xe->info.skip_mtcfg) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); - u8 tile_count; + u8 tile_count, gt_count; u32 mtcfg; /* @@ -84,12 +88,15 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) xe->info.tile_count = tile_count; /* - * FIXME: Needs some work for standalone media, but - * should be impossible with multi-tile for now: - * multi-tile platform with standalone media doesn't - * exist + * We've already setup gt_count according to the full + * tile count. Re-calculate it to only include the GTs + * that belong to the remaining tile(s). */ - xe->info.gt_count = xe->info.tile_count; + gt_count = 0; + for_each_gt(gt, xe, id) + if (gt->info.id < tile_count * xe->info.max_gt_per_tile) + gt_count++; + xe->info.gt_count = gt_count; } } @@ -163,7 +170,7 @@ static void mmio_flush_pending_writes(struct xe_mmio *mmio) #define DUMMY_REG_OFFSET 0x130030 int i; - if (mmio->tile->xe->info.platform != XE_LUNARLAKE) + if (!XE_DEVICE_WA(mmio->tile->xe, 15015404425)) return; /* 4 dummy writes */ @@ -176,7 +183,6 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u8 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readb(mmio->regs + addr); @@ -190,7 +196,6 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u16 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readw(mmio->regs + addr); @@ -217,7 +222,6 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u32 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e4742e27e2cd..107ffe87808c 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -18,29 +18,45 @@ #include "xe_observation.h" #include "xe_sched_job.h" +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define DEFAULT_GUC_LOG_LEVEL 3 +#else +#define DEFAULT_GUC_LOG_LEVEL 1 +#endif + +#define DEFAULT_PROBE_DISPLAY true +#define DEFAULT_VRAM_BAR_SIZE 0 +#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define DEFAULT_WEDGED_MODE 1 +#define DEFAULT_SVM_NOTIFIER_SIZE 512 + struct xe_modparam xe_modparam = { - .probe_display = true, - .guc_log_level = 3, - .force_probe = CONFIG_DRM_XE_FORCE_PROBE, - .wedged_mode = 1, - .svm_notifier_size = 512, + .probe_display = DEFAULT_PROBE_DISPLAY, + .guc_log_level = DEFAULT_GUC_LOG_LEVEL, + .force_probe = DEFAULT_FORCE_PROBE, + .wedged_mode = DEFAULT_WEDGED_MODE, + .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ }; module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); -MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); +MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " + "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]"); module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); -MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " + "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); -MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size"); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " + "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); -MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); +MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " + "[default=" __stringify(DEFAULT_GUC_LOG_LEVEL) "])"); module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400); MODULE_PARM_DESC(guc_firmware_path, @@ -56,7 +72,8 @@ MODULE_PARM_DESC(gsc_firmware_path, module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, - "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details " + "[default=" DEFAULT_FORCE_PROBE "])"); #ifdef CONFIG_PCI_IOV module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); @@ -67,7 +84,8 @@ MODULE_PARM_DESC(max_vfs, module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, - "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); + "Module's default policy for the wedged mode (0=never, 1=upon-critical-errors, 2=upon-any-hang " + "[default=" __stringify(DEFAULT_WEDGED_MODE) "])"); static int xe_check_nomodeset(void) { diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c new file mode 100644 index 000000000000..61b0a1531a53 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. + */ + +#include <linux/intel_dg_nvm_aux.h> +#include <linux/pci.h> + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_mmio.h" +#include "xe_nvm.h" +#include "regs/xe_gsc_regs.h" +#include "xe_sriov.h" + +#define GEN12_GUNIT_NVM_BASE 0x00102040 +#define GEN12_DEBUG_NVM_BASE 0x00101018 + +#define GEN12_CNTL_PROTECTED_NVM_REG 0x0010100C + +#define GEN12_GUNIT_NVM_SIZE 0x80 +#define GEN12_DEBUG_NVM_SIZE 0x4 + +#define NVM_NON_POSTED_ERASE_CHICKEN_BIT BIT(13) + +#define HECI_FW_STATUS_2_NVM_ACCESS_MODE BIT(3) + +static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { + [0] = { .name = "DESCRIPTOR", }, + [2] = { .name = "GSC", }, + [9] = { .name = "PADDING", }, + [11] = { .name = "OptionROM", }, + [12] = { .name = "DAM", }, +}; + +static void xe_nvm_release_dev(struct device *dev) +{ +} + +static bool xe_nvm_non_posted_erase(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + + if (xe->info.platform != XE_BATTLEMAGE) + return false; + return !(xe_mmio_read32(>->mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & + NVM_NON_POSTED_ERASE_CHICKEN_BIT); +} + +static bool xe_nvm_writable_override(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + bool writable_override; + resource_size_t base; + + switch (xe->info.platform) { + case XE_BATTLEMAGE: + base = DG2_GSC_HECI2_BASE; + break; + case XE_PVC: + base = PVC_GSC_HECI2_BASE; + break; + case XE_DG2: + base = DG2_GSC_HECI2_BASE; + break; + case XE_DG1: + base = DG1_GSC_HECI2_BASE; + break; + default: + drm_err(&xe->drm, "Unknown platform\n"); + return true; + } + + writable_override = + !(xe_mmio_read32(>->mmio, HECI_FWSTS2(base)) & + HECI_FW_STATUS_2_NVM_ACCESS_MODE); + if (writable_override) + drm_info(&xe->drm, "NVM access overridden by jumper\n"); + return writable_override; +} + +int xe_nvm_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct auxiliary_device *aux_dev; + struct intel_dg_nvm_dev *nvm; + int ret; + + if (!xe->info.has_gsc_nvm) + return 0; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return 0; + + /* Nvm pointer should be NULL here */ + if (WARN_ON(xe->nvm)) + return -EFAULT; + + xe->nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); + if (!xe->nvm) + return -ENOMEM; + + nvm = xe->nvm; + + nvm->writable_override = xe_nvm_writable_override(xe); + nvm->non_posted_erase = xe_nvm_non_posted_erase(xe); + nvm->bar.parent = &pdev->resource[0]; + nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start; + nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1; + nvm->bar.flags = IORESOURCE_MEM; + nvm->bar.desc = IORES_DESC_NONE; + nvm->regions = regions; + + nvm->bar2.parent = &pdev->resource[0]; + nvm->bar2.start = GEN12_DEBUG_NVM_BASE + pdev->resource[0].start; + nvm->bar2.end = nvm->bar2.start + GEN12_DEBUG_NVM_SIZE - 1; + nvm->bar2.flags = IORESOURCE_MEM; + nvm->bar2.desc = IORES_DESC_NONE; + + aux_dev = &nvm->aux_dev; + + aux_dev->name = "nvm"; + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | pci_dev_id(pdev); + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = xe_nvm_release_dev; + + ret = auxiliary_device_init(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); + goto err; + } + + ret = auxiliary_device_add(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); + auxiliary_device_uninit(aux_dev); + goto err; + } + return 0; + +err: + kfree(nvm); + xe->nvm = NULL; + return ret; +} + +void xe_nvm_fini(struct xe_device *xe) +{ + struct intel_dg_nvm_dev *nvm = xe->nvm; + + if (!xe->info.has_gsc_nvm) + return; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return; + + /* Nvm pointer should not be NULL here */ + if (WARN_ON(!nvm)) + return; + + auxiliary_device_delete(&nvm->aux_dev); + auxiliary_device_uninit(&nvm->aux_dev); + kfree(nvm); + xe->nvm = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_nvm.h b/drivers/gpu/drm/xe/xe_nvm.h new file mode 100644 index 000000000000..7f3d5f57bed0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2019-2025 Intel Corporation. All rights reserved. + */ + +#ifndef __XE_NVM_H__ +#define __XE_NVM_H__ + +struct xe_device; + +int xe_nvm_init(struct xe_device *xe); + +void xe_nvm_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index fb842fa0552e..d991fbd90f20 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -43,6 +43,12 @@ #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX +enum xe_oam_unit_type { + XE_OAM_UNIT_SAG, + XE_OAM_UNIT_SCMI_0, + XE_OAM_UNIT_SCMI_1, +}; + enum xe_oa_submit_deps { XE_OA_SUBMIT_NO_DEPS, XE_OA_SUBMIT_ADD_DEPS, @@ -77,7 +83,7 @@ struct xe_oa_config { struct xe_oa_open_param { struct xe_file *xef; - u32 oa_unit_id; + struct xe_oa_unit *oa_unit; bool sample; u32 metric_set; enum xe_oa_format_name oa_format; @@ -194,7 +200,7 @@ static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *l static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) { - return &stream->hwe->oa_unit->regs; + return &stream->oa_unit->regs; } static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) @@ -397,7 +403,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - int size_exponent = __ffs(stream->oa_buffer.bo->size); + int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo)); u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; @@ -429,7 +435,7 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); + memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); } static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) @@ -454,7 +460,7 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream) static u32 __oactrl_used_bits(struct xe_oa_stream *stream) { - return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? + return stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; } @@ -475,7 +481,7 @@ static void xe_oa_enable(struct xe_oa_stream *stream) __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); @@ -838,11 +844,16 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) /* Reset PMON Enable to save power. */ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); + + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && + GRAPHICS_VER(stream->oa->xe) >= 30) + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, OAM_LAT_MEASURE_ENABLE, 0); } static void xe_oa_stream_destroy(struct xe_oa_stream *stream) { - struct xe_oa_unit *u = stream->hwe->oa_unit; + struct xe_oa_unit *u = stream->oa_unit; struct xe_gt *gt = stream->hwe->gt; if (WARN_ON(stream != u->exclusive_stream)) @@ -1054,7 +1065,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) static u32 oag_buf_size_select(const struct xe_oa_stream *stream) { return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, - stream->oa_buffer.bo->size > SZ_16M ? + xe_bo_size(stream->oa_buffer.bo) > SZ_16M ? OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); } @@ -1105,9 +1116,13 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) */ sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && + GRAPHICS_VER(stream->oa->xe) >= 30) + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, 0, OAM_LAT_MEASURE_ENABLE); + /* Configure OAR/OAC */ if (stream->exec_q) { ret = xe_oa_configure_oa_context(stream, true); @@ -1139,14 +1154,31 @@ static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *n return -EINVAL; } +static struct xe_oa_unit *xe_oa_lookup_oa_unit(struct xe_oa *oa, u32 oa_unit_id) +{ + struct xe_gt *gt; + int gt_id, i; + + for_each_gt(gt, oa->xe, gt_id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + struct xe_oa_unit *u = >->oa.oa_unit[i]; + + if (u->oa_unit_id == oa_unit_id) + return u; + } + } + + return NULL; +} + static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { - if (value >= oa->oa_unit_ids) { + param->oa_unit = xe_oa_lookup_oa_unit(oa, value); + if (!param->oa_unit) { drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); return -EINVAL; } - param->oa_unit_id = value; return 0; } @@ -1550,7 +1582,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1636,7 +1668,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { + if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1677,13 +1709,13 @@ static const struct file_operations xe_oa_fops = { static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { - struct xe_oa_unit *u = param->hwe->oa_unit; struct xe_gt *gt = param->hwe->gt; unsigned int fw_ref; int ret; stream->exec_q = param->exec_q; stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; + stream->oa_unit = param->oa_unit; stream->hwe = param->hwe; stream->gt = stream->hwe->gt; stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; @@ -1704,7 +1736,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, * buffer whose size, circ_size, is a multiple of the report size */ if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) stream->oa_buffer.circ_size = param->oa_buffer_size - param->oa_buffer_size % stream->oa_buffer.format->size; @@ -1762,7 +1794,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", stream->oa_config->uuid); - WRITE_ONCE(u->exclusive_stream, stream); + WRITE_ONCE(stream->oa_unit->exclusive_stream, stream); hrtimer_setup(&stream->poll_check_timer, xe_oa_poll_check_timer_cb, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1798,7 +1830,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, int ret; /* We currently only allow exclusive access */ - if (param->hwe->oa_unit->exclusive_stream) { + if (param->oa_unit->exclusive_stream) { drm_dbg(&oa->xe->drm, "OA unit already in use\n"); ret = -EBUSY; goto exit; @@ -1874,13 +1906,14 @@ static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) return div_u64(nom + den - 1, den); } -static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) +static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) { - switch (hwe->oa_unit->type) { + switch (param->oa_unit->type) { case DRM_XE_OA_UNIT_TYPE_OAG: return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; case DRM_XE_OA_UNIT_TYPE_OAM: + case DRM_XE_OA_UNIT_TYPE_OAM_SAG: return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; default: return false; @@ -1899,37 +1932,48 @@ u16 xe_oa_unit_id(struct xe_hw_engine *hwe) hwe->oa_unit->oa_unit_id : U16_MAX; } +/* A hwe must be assigned to stream/oa_unit for batch submissions */ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) { - struct xe_gt *gt; - int i, ret = 0; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int ret = 0; + + /* If not provided, OA unit defaults to OA unit 0 as per uapi */ + if (!param->oa_unit) + param->oa_unit = &xe_device_get_gt(oa->xe, 0)->oa.oa_unit[0]; + /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { - /* When we have an exec_q, get hwe from the exec_q */ param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, param->engine_instance, true); - } else { - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - /* Else just get the first hwe attached to the oa unit */ - for_each_gt(gt, oa->xe, i) { - for_each_hw_engine(hwe, gt, id) { - if (xe_oa_unit_id(hwe) == param->oa_unit_id) { - param->hwe = hwe; - goto out; - } - } - } + if (!param->hwe || param->hwe->oa_unit != param->oa_unit) + goto err; + goto out; } -out: - if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { - drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", - param->exec_q ? param->exec_q->class : -1, - param->engine_instance, param->oa_unit_id); - ret = -EINVAL; + + /* Else just get the first hwe attached to the oa unit */ + for_each_hw_engine(hwe, param->oa_unit->gt, id) { + if (hwe->oa_unit == param->oa_unit) { + param->hwe = hwe; + goto out; + } } + /* If we still didn't find a hwe, just get one with a valid oa_unit from the same gt */ + for_each_hw_engine(hwe, param->oa_unit->gt, id) { + if (!hwe->oa_unit) + continue; + + param->hwe = hwe; + goto out; + } +err: + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", + param->exec_q ? param->exec_q->class : -1, + param->engine_instance, param->oa_unit->oa_unit_id); + ret = -EINVAL; +out: return ret; } @@ -2007,7 +2051,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f f = &oa->oa_formats[param.oa_format]; if (!param.oa_format || !f->size || - !engine_supports_oa_format(param.hwe, f->type)) { + !oa_unit_supports_oa_format(¶m, f->type)) { drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", param.oa_format, f->type, f->size, param.hwe->class); ret = -EINVAL; @@ -2155,6 +2199,7 @@ static const struct xe_mmio_range gen12_oa_mux_regs[] = { static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB01C, .end = 0xB01C }, /* LNCF_MISC_CONFIG_REGISTER0 */ { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ @@ -2448,20 +2493,38 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { - return 1; + if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) + return 1; + else if (!IS_DGFX(gt_to_xe(gt))) + return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ + else + return XE_OAM_UNIT_SCMI_1 + 1; /* SAG + SCMI_0 + SCMI_1 */ } static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) { - if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { - /* - * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices - * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA - */ - xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) + return XE_OA_UNIT_INVALID; + + xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt)); + if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) return 0; - } + /* + * XE_OAM_UNIT_SAG has only GSCCS attached to it, but only on some platforms. Also + * GSCCS cannot be used to submit batches to program the OAM unit. Therefore we don't + * assign an OA unit to GSCCS. This means that XE_OAM_UNIT_SAG is exposed as an OA + * unit without attached engines. Fused off engines can also result in oa_unit's with + * num_engines == 0. OA streams can be opened on all OA units. + */ + else if (hwe->engine_id == XE_HW_ENGINE_GSCCS0) + return XE_OA_UNIT_INVALID; + else if (!IS_DGFX(gt_to_xe(hwe->gt))) + return XE_OAM_UNIT_SCMI_0; + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE) + return (hwe->instance / 2 & 0x1) + 1; + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE) + return (hwe->instance & 0x1) + 1; return XE_OA_UNIT_INVALID; } @@ -2475,6 +2538,7 @@ static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_ENHANCE: + case XE_ENGINE_CLASS_OTHER: return __hwe_oam_unit(hwe); default: @@ -2514,20 +2578,29 @@ static struct xe_oa_regs __oag_regs(void) static void __xe_oa_init_oa_units(struct xe_gt *gt) { - const u32 mtl_oa_base[] = { 0x13000 }; + /* Actual address is MEDIA_GT_GSI_OFFSET + oam_base_addr[i] */ + const u32 oam_base_addr[] = { + [XE_OAM_UNIT_SAG] = 0x13000, + [XE_OAM_UNIT_SCMI_0] = 0x14000, + [XE_OAM_UNIT_SCMI_1] = 0x14800, + }; int i, num_units = gt->oa.num_oa_units; for (i = 0; i < num_units; i++) { struct xe_oa_unit *u = >->oa.oa_unit[i]; - if (gt->info.type != XE_GT_TYPE_MEDIA) { + if (xe_gt_is_main_type(gt)) { u->regs = __oag_regs(); u->type = DRM_XE_OA_UNIT_TYPE_OAG; - } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { - u->regs = __oam_regs(mtl_oa_base[i]); - u->type = DRM_XE_OA_UNIT_TYPE_OAM; + } else { + xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270); + u->regs = __oam_regs(oam_base_addr[i]); + u->type = i == XE_OAM_UNIT_SAG && GRAPHICS_VER(gt_to_xe(gt)) >= 20 ? + DRM_XE_OA_UNIT_TYPE_OAM_SAG : DRM_XE_OA_UNIT_TYPE_OAM; } + u->gt = gt; + xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0); /* Ensure MMIO trigger remains disabled till there is a stream */ @@ -2560,10 +2633,6 @@ static int xe_oa_init_gt(struct xe_gt *gt) } } - /* - * Fused off engines can result in oa_unit's with num_engines == 0. These units - * will appear in OA unit query, but no OA streams can be opened on them. - */ gt->oa.num_oa_units = num_oa_units; gt->oa.oa_unit = u; @@ -2574,17 +2643,54 @@ static int xe_oa_init_gt(struct xe_gt *gt) return 0; } +static void xe_oa_print_gt_oa_units(struct xe_gt *gt) +{ + enum xe_hw_engine_id hwe_id; + struct xe_hw_engine *hwe; + struct xe_oa_unit *u; + char buf[256]; + int i, n; + + for (i = 0; i < gt->oa.num_oa_units; i++) { + u = >->oa.oa_unit[i]; + buf[0] = '\0'; + n = 0; + + for_each_hw_engine(hwe, gt, hwe_id) + if (xe_oa_unit_id(hwe) == u->oa_unit_id) + n += scnprintf(buf + n, sizeof(buf) - n, "%s ", hwe->name); + + xe_gt_dbg(gt, "oa_unit %d, type %d, Engines: %s\n", u->oa_unit_id, u->type, buf); + } +} + +static void xe_oa_print_oa_units(struct xe_oa *oa) +{ + struct xe_gt *gt; + int gt_id; + + for_each_gt(gt, oa->xe, gt_id) + xe_oa_print_gt_oa_units(gt); +} + static int xe_oa_init_oa_units(struct xe_oa *oa) { struct xe_gt *gt; int i, ret; + /* Needed for OAM implementation here */ + BUILD_BUG_ON(XE_OAM_UNIT_SAG != 0); + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_0 != 1); + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_1 != 2); + for_each_gt(gt, oa->xe, i) { ret = xe_oa_init_gt(gt); if (ret) return ret; } + xe_oa_print_oa_units(oa); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 52e33c37d5ee..2628f78c4e8d 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -95,6 +95,9 @@ struct xe_oa_unit { /** @oa_unit_id: identifier for the OA unit */ u16 oa_unit_id; + /** @gt: gt associated with the OA unit */ + struct xe_gt *gt; + /** @type: Type of OA unit - OAM, OAG etc. */ enum drm_xe_oa_unit_type type; @@ -182,6 +185,9 @@ struct xe_oa_stream { /** @gt: gt associated with the oa stream */ struct xe_gt *gt; + /** @oa_unit: oa unit for this stream */ + struct xe_oa_unit *oa_unit; + /** @hwe: hardware engine associated with this oa stream */ struct xe_hw_engine *hwe; diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 30fdbdb9341e..2e7cb99ae87a 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -103,7 +103,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { * * Note: There is an implicit assumption in the driver that compression and * coh_1way+ are mutually exclusive. If this is ever not true then userptr - * and imported dma-buf from external device will have uncleared ccs state. + * and imported dma-buf from external device will have uncleared ccs state. See + * also xe_bo_needs_ccs_pages(). */ #define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ { \ @@ -162,21 +163,35 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries) { + struct xe_device *xe = gt_to_xe(gt); + for (int i = 0; i < n_entries; i++) { struct xe_reg reg = XE_REG(_PAT_INDEX(i)); xe_mmio_write32(>->mmio, reg, table[i].value); } + + if (xe->pat.pat_ats) + xe_mmio_write32(>->mmio, XE_REG(_PAT_ATS), xe->pat.pat_ats->value); + if (xe->pat.pat_pta) + xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe->pat.pat_pta->value); } static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries) { + struct xe_device *xe = gt_to_xe(gt); + for (int i = 0; i < n_entries; i++) { struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i)); xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value); } + + if (xe->pat.pat_ats) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe->pat.pat_ats->value); + if (xe->pat.pat_pta) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_pta->value); } static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) @@ -303,26 +318,6 @@ static const struct xe_pat_ops xelpg_pat_ops = { .dump = xelpg_dump, }; -static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], - int n_entries) -{ - program_pat_mcr(gt, table, n_entries); - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value); - - if (IS_DGFX(gt_to_xe(gt))) - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe2_pat_pta.value); -} - -static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], - int n_entries) -{ - program_pat(gt, table, n_entries); - xe_mmio_write32(>->mmio, XE_REG(_PAT_ATS), xe2_pat_ats.value); - - if (IS_DGFX(gt_to_xe(gt))) - xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe2_pat_pta.value); -} - static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); @@ -375,8 +370,8 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) } static const struct xe_pat_ops xe2_pat_ops = { - .program_graphics = xe2lpg_program_pat, - .program_media = xe2lpm_program_pat, + .program_graphics = program_pat_mcr, + .program_media = program_pat, .dump = xe2_dump, }; @@ -385,6 +380,9 @@ void xe_pat_init_early(struct xe_device *xe) if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; xe->pat.table = xe2_pat_table; + xe->pat.pat_ats = &xe2_pat_ats; + if (IS_DGFX(xe)) + xe->pat.pat_pta = &xe2_pat_pta; /* Wa_16023588340. XXX: Should use XE_WA */ if (GRAPHICS_VERx100(xe) == 2001) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ac4beaed58ff..3c40ef426f0c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -38,43 +38,6 @@ enum toggle_d3cold { D3COLD_ENABLE, }; -struct xe_subplatform_desc { - enum xe_subplatform subplatform; - const char *name; - const u16 *pciidlist; -}; - -struct xe_device_desc { - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_graphics_ip; - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_media_ip; - - const char *platform_name; - const struct xe_subplatform_desc *subplatforms; - - enum xe_platform platform; - - u8 dma_mask_size; - u8 max_remote_tiles:2; - - u8 require_force_probe:1; - u8 is_dgfx:1; - - u8 has_display:1; - u8 has_fan_control:1; - u8 has_heci_gscfi:1; - u8 has_heci_cscfi:1; - u8 has_llc:1; - u8 has_mbx_power_limits:1; - u8 has_pxp:1; - u8 has_sriov:1; - u8 needs_scratch:1; - u8 skip_guc_pc:1; - u8 skip_mtcfg:1; - u8 skip_pcode:1; -}; - __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); @@ -140,7 +103,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ @@ -180,9 +142,11 @@ static const struct xe_ip graphics_ips[] = { { 1271, "Xe_LPG", &graphics_xelpg }, { 1274, "Xe_LPG+", &graphics_xelpg }, { 2001, "Xe2_HPG", &graphics_xe2 }, + { 2002, "Xe2_HPG", &graphics_xe2 }, { 2004, "Xe2_LPG", &graphics_xe2 }, { 3000, "Xe3_LPG", &graphics_xe2 }, { 3001, "Xe3_LPG", &graphics_xe2 }, + { 3003, "Xe3_LPG", &graphics_xe2 }, }; /* Pre-GMDID Media IPs */ @@ -195,6 +159,7 @@ static const struct xe_ip media_ips[] = { { 1301, "Xe2_HPM", &media_xelpmp }, { 2000, "Xe2_LPM", &media_xelpmp }, { 3000, "Xe3_LPM", &media_xelpmp }, + { 3002, "Xe3_LPM", &media_xelpmp }, }; static const struct xe_device_desc tgl_desc = { @@ -204,6 +169,7 @@ static const struct xe_device_desc tgl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -214,6 +180,7 @@ static const struct xe_device_desc rkl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -226,6 +193,7 @@ static const struct xe_device_desc adl_s_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -242,6 +210,7 @@ static const struct xe_device_desc adl_p_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -256,6 +225,7 @@ static const struct xe_device_desc adl_n_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -269,7 +239,9 @@ static const struct xe_device_desc dg1_desc = { PLATFORM(DG1), .dma_mask_size = 39, .has_display = true, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -280,6 +252,7 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; #define DG2_FEATURES \ DGFX_FEATURES, \ PLATFORM(DG2), \ + .has_gsc_nvm = 1, \ .has_heci_gscfi = 1, \ .subplatforms = (const struct xe_subplatform_desc[]) { \ { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ @@ -292,6 +265,7 @@ static const struct xe_device_desc ats_m_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, @@ -302,6 +276,7 @@ static const struct xe_device_desc dg2_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, @@ -316,7 +291,9 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { PLATFORM(PVC), .dma_mask_size = 52, .has_display = false, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .max_remote_tiles = 1, .require_force_probe = true, .has_mbx_power_limits = false, @@ -329,6 +306,7 @@ static const struct xe_device_desc mtl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, }; static const struct xe_device_desc lnl_desc = { @@ -336,6 +314,7 @@ static const struct xe_device_desc lnl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -346,7 +325,10 @@ static const struct xe_device_desc bmg_desc = { .has_display = true, .has_fan_control = true, .has_mbx_power_limits = true, + .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .has_sriov = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -355,7 +337,7 @@ static const struct xe_device_desc ptl_desc = { .dma_mask_size = 46, .has_display = true, .has_sriov = true, - .require_force_probe = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -589,6 +571,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; + xe->info.has_gsc_nvm = desc->has_gsc_nvm; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; @@ -602,6 +585,10 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && desc->has_display; + + xe_assert(xe, desc->max_gt_per_tile > 0); + xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE); + xe->info.max_gt_per_tile = desc->max_gt_per_tile; xe->info.tile_count = 1 + desc->max_remote_tiles; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); @@ -701,10 +688,11 @@ static int xe_info_init(struct xe_device *xe, */ for_each_tile(tile, xe, id) { gt = tile->primary_gt; - gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; + gt->info.id = tile->id * xe->info.max_gt_per_tile; gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.engine_mask = graphics_desc->hw_engine_mask; + xe->info.gt_count++; if (MEDIA_VER(xe) < 13 && media_desc) gt->info.engine_mask |= media_desc->hw_engine_mask; @@ -722,17 +710,10 @@ static int xe_info_init(struct xe_device *xe, gt = tile->media_gt; gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; gt->info.engine_mask = media_desc->hw_engine_mask; - - /* - * FIXME: At the moment multi-tile and standalone media are - * mutually exclusive on current platforms. We'll need to - * come up with a better way to number GTs if we ever wind - * up with platforms that support both together. - */ - drm_WARN_ON(&xe->drm, id != 0); - gt->info.id = xe->info.gt_count++; + xe->info.gt_count++; } return 0; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index ca6b10d35573..4de6f69ed975 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -8,6 +8,47 @@ #include <linux/types.h> +#include "xe_platform_types.h" + +struct xe_subplatform_desc { + enum xe_subplatform subplatform; + const char *name; + const u16 *pciidlist; +}; + +struct xe_device_desc { + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_graphics_ip; + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_media_ip; + + const char *platform_name; + const struct xe_subplatform_desc *subplatforms; + + enum xe_platform platform; + + u8 dma_mask_size; + u8 max_remote_tiles:2; + u8 max_gt_per_tile:2; + + u8 require_force_probe:1; + u8 is_dgfx:1; + + u8 has_display:1; + u8 has_fan_control:1; + u8 has_gsc_nvm:1; + u8 has_heci_gscfi:1; + u8 has_heci_cscfi:1; + u8 has_llc:1; + u8 has_mbx_power_limits:1; + u8 has_pxp:1; + u8 has_sriov:1; + u8 needs_scratch:1; + u8 skip_guc_pc:1; + u8 skip_mtcfg:1; + u8 skip_pcode:1; +}; + struct xe_graphics_desc { u8 va_bits; u8 vm_max_level; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 9189117fe825..6a7ddb9005f9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -336,3 +336,33 @@ int xe_pcode_probe_early(struct xe_device *xe) return xe_pcode_ready(xe, false); } ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */ + +/* Helpers with drm device. These should only be called by the display side */ +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_read(tile, mbox, val, val1); +} + +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_write_timeout(tile, mbox, val, timeout_ms); +} + +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_request(tile, mbox, request, reply_mask, reply, timeout_base_ms); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index de38f44f3201..a5584c1c75f9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -7,8 +7,10 @@ #define _XE_PCODE_H_ #include <linux/types.h> -struct xe_tile; + +struct drm_device; struct xe_device; +struct xe_tile; void xe_pcode_init(struct xe_tile *tile); int xe_pcode_probe_early(struct xe_device *xe); @@ -32,4 +34,12 @@ int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, | FIELD_PREP(PCODE_MB_PARAM1, param1)\ | FIELD_PREP(PCODE_MB_PARAM2, param2)) +/* Helpers with drm device */ +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1); +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms); +#define intel_pcode_write(drm, mbox, val) \ + intel_pcode_write_timeout((drm), (mbox), (val), 1) +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms); + #endif diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 0befdea77db1..92bfcba51e19 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -50,6 +50,21 @@ #define READ_PL_FROM_FW 0x1 #define READ_PL_FROM_PCODE 0x0 +#define PCODE_LATE_BINDING 0x5C +#define GET_CAPABILITY_STATUS 0x0 +#define V1_FAN_SUPPORTED REG_BIT(0) +#define VR_PARAMS_SUPPORTED REG_BIT(3) +#define V1_FAN_PROVISIONED REG_BIT(16) +#define VR_PARAMS_PROVISIONED REG_BIT(19) +#define GET_VERSION_LOW 0x1 +#define GET_VERSION_HIGH 0x2 +#define MAJOR_VERSION_MASK REG_GENMASK(31, 16) +#define MINOR_VERSION_MASK REG_GENMASK(15, 0) +#define HOTFIX_VERSION_MASK REG_GENMASK(31, 16) +#define BUILD_VERSION_MASK REG_GENMASK(15, 0) +#define FAN_TABLE 1 +#define VR_CONFIG 2 + #define PCODE_FREQUENCY_CONFIG 0x6e /* Frequency Config Sub Commands (param1) */ #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ff749edc005b..e279b47ba03b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -19,6 +19,7 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc.h" +#include "xe_i2c.h" #include "xe_irq.h" #include "xe_pcode.h" #include "xe_pxp.h" @@ -134,7 +135,7 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err_pxp; + goto err_display; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); @@ -146,12 +147,13 @@ int xe_pm_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); + xe_i2c_pm_suspend(xe); + drm_dbg(&xe->drm, "Device suspended\n"); return 0; err_display: xe_display_pm_resume(xe); -err_pxp: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); @@ -191,6 +193,8 @@ int xe_pm_resume(struct xe_device *xe) if (err) goto err; + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -488,6 +492,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_runtime_suspend_late(xe); + xe_i2c_pm_suspend(xe); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return 0; @@ -535,6 +541,8 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -753,11 +761,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index 69df0e3520a5..cab51d826345 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -157,10 +157,13 @@ static bool event_gt_forcewake(struct perf_event *event) return true; } -static bool event_supported(struct xe_pmu *pmu, unsigned int gt, +static bool event_supported(struct xe_pmu *pmu, unsigned int gt_id, unsigned int id) { - if (gt >= XE_MAX_GT_PER_TILE) + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + struct xe_gt *gt = xe_device_get_gt(xe, gt_id); + + if (!gt) return false; return id < sizeof(pmu->supported_events) * BITS_PER_BYTE && diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index b04756a97cdc..c8e63bd23300 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -907,6 +907,11 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); + if (xe_vma_bo(vma)) + xe_bo_assert_held(xe_vma_bo(vma)); + else if (xe_vma_is_userptr(vma)) + lockdep_assert_held(&xe_vma_vm(vma)->userptr.notifier_lock); + if (!(pt_mask & BIT(tile->id))) return false; @@ -1458,6 +1463,7 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) struct xe_vm *vm = pt_update->vops->vm; struct xe_vma_ops *vops = pt_update->vops; struct xe_vma_op *op; + unsigned long i; int err; err = xe_pt_pre_commit(pt_update); @@ -1467,20 +1473,35 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) xe_svm_notifier_lock(vm); list_for_each_entry(op, &vops->list, link) { - struct xe_svm_range *range = op->map_range.range; + struct xe_svm_range *range = NULL; if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) continue; - xe_svm_range_debug(range, "PRE-COMMIT"); + if (op->base.op == DRM_GPUVA_OP_PREFETCH) { + xe_assert(vm->xe, + xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); + xa_for_each(&op->prefetch_range.range, i, range) { + xe_svm_range_debug(range, "PRE-COMMIT"); - xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); - xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); + xe_svm_notifier_unlock(vm); + return -ENODATA; + } + } + } else { + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); + xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); + range = op->map_range.range; - if (!xe_svm_range_pages_valid(range)) { - xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); - xe_svm_notifier_unlock(vm); - return -EAGAIN; + xe_svm_range_debug(range, "PRE-COMMIT"); + + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); + xe_svm_notifier_unlock(vm); + return -EAGAIN; + } } } @@ -1974,6 +1995,32 @@ static int unbind_op_prepare(struct xe_tile *tile, return 0; } +static bool +xe_pt_op_check_range_skip_invalidation(struct xe_vm_pgtable_update_op *pt_op, + struct xe_svm_range *range) +{ + struct xe_vm_pgtable_update *update = pt_op->entries; + + XE_WARN_ON(!pt_op->num_entries); + + /* + * We can't skip the invalidation if we are removing PTEs that span more + * than the range, do some checks to ensure we are removing PTEs that + * are invalid. + */ + + if (pt_op->num_entries > 1) + return false; + + if (update->pt->level == 0) + return true; + + if (update->pt->level == 1) + return xe_svm_range_size(range) >= SZ_2M; + + return false; +} + static int unbind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -2002,7 +2049,10 @@ static int unbind_range_prepare(struct xe_vm *vm, range->base.itree.last + 1); ++pt_update_ops->current_op; pt_update_ops->needs_svm_lock = true; - pt_update_ops->needs_invalidation = true; + pt_update_ops->needs_invalidation |= xe_vm_has_scratch(vm) || + xe_vm_has_valid_gpu_mapping(tile, range->tile_present, + range->tile_invalidated) || + !xe_pt_op_check_range_skip_invalidation(pt_op, range); xe_pt_commit_prepare_unbind(XE_INVALID_VMA, pt_op->entries, pt_op->num_entries); @@ -2065,11 +2115,20 @@ static int op_prepare(struct xe_vm *vm, { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - if (xe_vma_is_cpu_addr_mirror(vma)) - break; + if (xe_vma_is_cpu_addr_mirror(vma)) { + struct xe_svm_range *range; + unsigned long i; - err = bind_op_prepare(vm, tile, pt_update_ops, vma, false); - pt_update_ops->wait_vm_kernel = true; + xa_for_each(&op->prefetch_range.range, i, range) { + err = bind_range_prepare(vm, tile, pt_update_ops, + vma, range); + if (err) + return err; + } + } else { + err = bind_op_prepare(vm, tile, pt_update_ops, vma, false); + pt_update_ops->wait_vm_kernel = true; + } break; } case DRM_GPUVA_OP_DRIVER: @@ -2166,10 +2225,15 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); + /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + WRITE_ONCE(vma->tile_present, vma->tile_present | BIT(tile->id)); if (invalidate_on_bind) - vma->tile_invalidated |= BIT(tile->id); + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated | BIT(tile->id)); + else + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated & ~BIT(tile->id)); + vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { lockdep_assert_held_read(&vm->userptr.notifier_lock); to_userptr_vma(vma)->userptr.initial_bind = true; @@ -2216,6 +2280,18 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, } } +static void range_present_and_invalidated_tile(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_id) +{ + /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + + lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); + + WRITE_ONCE(range->tile_present, range->tile_present | BIT(tile_id)); + WRITE_ONCE(range->tile_invalidated, range->tile_invalidated & ~BIT(tile_id)); +} + static void op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -2263,27 +2339,28 @@ static void op_commit(struct xe_vm *vm, { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - if (!xe_vma_is_cpu_addr_mirror(vma)) + if (xe_vma_is_cpu_addr_mirror(vma)) { + struct xe_svm_range *range = NULL; + unsigned long i; + + xa_for_each(&op->prefetch_range.range, i, range) + range_present_and_invalidated_tile(vm, range, tile->id); + } else { bind_op_commit(vm, tile, pt_update_ops, vma, fence, fence2, false); + } break; } case DRM_GPUVA_OP_DRIVER: { - /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */ - - if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { - WRITE_ONCE(op->map_range.range->tile_present, - op->map_range.range->tile_present | - BIT(tile->id)); - WRITE_ONCE(op->map_range.range->tile_invalidated, - op->map_range.range->tile_invalidated & - ~BIT(tile->id)); - } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) + range_present_and_invalidated_tile(vm, op->map_range.range, tile->id); + else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) WRITE_ONCE(op->unmap_range.range->tile_present, op->unmap_range.range->tile_present & ~BIT(tile->id)); - } + break; } default: @@ -2476,7 +2553,7 @@ free_ifence: kfree(mfence); kfree(ifence); kill_vm_tile1: - if (err != -EAGAIN && tile->id) + if (err != -EAGAIN && err != -ENODATA && tile->id) xe_vm_kill(vops->vm, false); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index b5bc15f436fa..3d62008c99f1 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -504,69 +504,62 @@ int xe_pxp_exec_queue_set_type(struct xe_pxp *pxp, struct xe_exec_queue *q, u8 t return 0; } -static void __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +static int __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) { - spin_lock_irq(&pxp->queues.lock); - list_add_tail(&q->pxp.link, &pxp->queues.list); - spin_unlock_irq(&pxp->queues.lock); + int ret = 0; + + /* + * A queue can be added to the list only if the PXP is in active status, + * otherwise the termination might not handle it correctly. + */ + mutex_lock(&pxp->mutex); + + if (pxp->status == XE_PXP_ACTIVE) { + spin_lock_irq(&pxp->queues.lock); + list_add_tail(&q->pxp.link, &pxp->queues.list); + spin_unlock_irq(&pxp->queues.lock); + } else if (pxp->status == XE_PXP_ERROR || pxp->status == XE_PXP_SUSPENDED) { + ret = -EIO; + } else { + ret = -EBUSY; /* try again later */ + } + + mutex_unlock(&pxp->mutex); + + return ret; } -/** - * xe_pxp_exec_queue_add - add a queue to the PXP list - * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) - * @q: the queue to add to the list - * - * If PXP is enabled and the prerequisites are done, start the PXP ARB - * session (if not already running) and add the queue to the PXP list. Note - * that the queue must have previously been marked as using PXP with - * xe_pxp_exec_queue_set_type. - * - * Returns 0 if the PXP ARB session is running and the queue is in the list, - * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done, - * other errno value if something goes wrong during the session start. - */ -int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +static int pxp_start(struct xe_pxp *pxp, u8 type) { int ret = 0; + bool restart = false; if (!xe_pxp_is_enabled(pxp)) return -ENODEV; /* we only support HWDRM sessions right now */ - xe_assert(pxp->xe, q->pxp.type == DRM_XE_PXP_TYPE_HWDRM); - - /* - * Runtime suspend kills PXP, so we take a reference to prevent it from - * happening while we have active queues that use PXP - */ - xe_pm_runtime_get(pxp->xe); + xe_assert(pxp->xe, type == DRM_XE_PXP_TYPE_HWDRM); /* get_readiness_status() returns 0 for in-progress and 1 for done */ ret = xe_pxp_get_readiness_status(pxp); - if (ret <= 0) { - if (!ret) - ret = -EBUSY; - goto out; - } + if (ret <= 0) + return ret ?: -EBUSY; + ret = 0; wait_for_idle: /* * if there is an action in progress, wait for it. We need to wait * outside the lock because the completion is done from within the lock. - * Note that the two action should never be pending at the same time. + * Note that the two actions should never be pending at the same time. */ if (!wait_for_completion_timeout(&pxp->termination, - msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) { - ret = -ETIMEDOUT; - goto out; - } + msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) + return -ETIMEDOUT; if (!wait_for_completion_timeout(&pxp->activation, - msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) { - ret = -ETIMEDOUT; - goto out; - } + msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) + return -ETIMEDOUT; mutex_lock(&pxp->mutex); @@ -574,11 +567,9 @@ wait_for_idle: switch (pxp->status) { case XE_PXP_ERROR: ret = -EIO; - break; + goto out_unlock; case XE_PXP_ACTIVE: - __exec_queue_add(pxp, q); - mutex_unlock(&pxp->mutex); - goto out; + goto out_unlock; case XE_PXP_READY_TO_START: pxp->status = XE_PXP_START_IN_PROGRESS; reinit_completion(&pxp->activation); @@ -586,8 +577,8 @@ wait_for_idle: case XE_PXP_START_IN_PROGRESS: /* If a start is in progress then the completion must not be done */ XE_WARN_ON(completion_done(&pxp->activation)); - mutex_unlock(&pxp->mutex); - goto wait_for_idle; + restart = true; + goto out_unlock; case XE_PXP_NEEDS_TERMINATION: mark_termination_in_progress(pxp); break; @@ -595,29 +586,25 @@ wait_for_idle: case XE_PXP_NEEDS_ADDITIONAL_TERMINATION: /* If a termination is in progress then the completion must not be done */ XE_WARN_ON(completion_done(&pxp->termination)); - mutex_unlock(&pxp->mutex); - goto wait_for_idle; + restart = true; + goto out_unlock; case XE_PXP_SUSPENDED: default: drm_err(&pxp->xe->drm, "unexpected state during PXP start: %u\n", pxp->status); ret = -EIO; - break; + goto out_unlock; } mutex_unlock(&pxp->mutex); - if (ret) - goto out; - if (!completion_done(&pxp->termination)) { ret = pxp_terminate_hw(pxp); if (ret) { drm_err(&pxp->xe->drm, "PXP termination failed before start\n"); mutex_lock(&pxp->mutex); pxp->status = XE_PXP_ERROR; - mutex_unlock(&pxp->mutex); - goto out; + goto out_unlock; } goto wait_for_idle; @@ -639,21 +626,59 @@ wait_for_idle: if (pxp->status != XE_PXP_START_IN_PROGRESS) { drm_err(&pxp->xe->drm, "unexpected state after PXP start: %u\n", pxp->status); pxp->status = XE_PXP_NEEDS_TERMINATION; - mutex_unlock(&pxp->mutex); - goto wait_for_idle; + restart = true; + goto out_unlock; } /* If everything went ok, update the status and add the queue to the list */ - if (!ret) { + if (!ret) pxp->status = XE_PXP_ACTIVE; - __exec_queue_add(pxp, q); - } else { + else pxp->status = XE_PXP_ERROR; - } +out_unlock: mutex_unlock(&pxp->mutex); -out: + if (restart) + goto wait_for_idle; + + return ret; +} + +/** + * xe_pxp_exec_queue_add - add a queue to the PXP list + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @q: the queue to add to the list + * + * If PXP is enabled and the prerequisites are done, start the PXP default + * session (if not already running) and add the queue to the PXP list. + * + * Returns 0 if the PXP session is running and the queue is in the list, + * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done, + * other errno value if something goes wrong during the session start. + */ +int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +{ + int ret; + + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + /* + * Runtime suspend kills PXP, so we take a reference to prevent it from + * happening while we have active queues that use PXP + */ + xe_pm_runtime_get(pxp->xe); + +start: + ret = pxp_start(pxp, q->pxp.type); + + if (!ret) { + ret = __exec_queue_add(pxp, q); + if (ret == -EBUSY) + goto start; + } + /* * in the successful case the PM ref is released from * xe_pxp_exec_queue_remove diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 2dbf4066d86f..d517ec9ddcbf 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -141,7 +141,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id >= XE_MAX_GT_PER_TILE) + if (eci->gt_id >= xe->info.max_gt_per_tile) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); @@ -368,6 +368,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query struct drm_xe_query_gt_list __user *query_ptr = u64_to_user_ptr(query->data); struct drm_xe_query_gt_list *gt_list; + int iter = 0; u8 id; if (query->size == 0) { @@ -385,12 +386,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MEDIA; else - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; - gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; - gt_list->gt_list[id].gt_id = gt->info.id; - gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[iter].tile_id = gt_to_tile(gt)->id; + gt_list->gt_list[iter].gt_id = gt->info.id; + gt_list->gt_list[iter].reference_clock = gt->info.reference_clock; /* * The mem_regions indexes in the mask below need to * directly identify the struct @@ -406,19 +407,21 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query * assumption. */ if (!IS_DGFX(xe)) - gt_list->gt_list[id].near_mem_regions = 0x1; + gt_list->gt_list[iter].near_mem_regions = 0x1; else - gt_list->gt_list[id].near_mem_regions = + gt_list->gt_list[iter].near_mem_regions = BIT(gt_to_tile(gt)->id) << 1; - gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ - gt_list->gt_list[id].near_mem_regions; + gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[iter].near_mem_regions; - gt_list->gt_list[id].ip_ver_major = + gt_list->gt_list[iter].ip_ver_major = REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_minor = + gt_list->gt_list[iter].ip_ver_minor = REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_rev = + gt_list->gt_list[iter].ip_ver_rev = REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); + + iter++; } if (copy_to_user(query_ptr, gt_list, size)) { @@ -683,8 +686,8 @@ static int query_oa_units(struct xe_device *xe, du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | DRM_XE_OA_CAPS_OA_BUFFER_SIZE | - DRM_XE_OA_CAPS_WAIT_NUM_REPORTS; - + DRM_XE_OA_CAPS_WAIT_NUM_REPORTS | + DRM_XE_OA_CAPS_OAM; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { if (!xe_hw_engine_is_reserved(hwe) && diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index bc1689db4cd7..7b50c7c1ee21 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 *dw, int i) +static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i) { dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | - MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; - dw[i++] = 0; + MI_FLUSH_IMM_DW; + + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; + dw[i++] = val; return i; } @@ -397,23 +398,20 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, static void emit_migration_job_gen12(struct xe_sched_job *job, struct xe_lrc *lrc, u32 seqno) { + u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc); u32 dw[MAX_JOB_SIZE_DW], i = 0; i = emit_copy_timestamp(lrc, dw, i); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + i = emit_store_imm_ggtt(saddr, seqno, dw, i); dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); - if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(dw, i); - dw[i++] = preparser_disable(false); - } + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(saddr, seqno, dw, i); + dw[i++] = preparser_disable(false); i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 29e694bb1219..95571b87aa73 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -56,37 +56,61 @@ static bool rule_matches(const struct xe_device *xe, xe->info.subplatform == r->subplatform; break; case XE_RTP_MATCH_GRAPHICS_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 >= r->ver_start && xe->info.graphics_verx100 <= r->ver_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start; break; case XE_RTP_MATCH_GRAPHICS_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 >= r->ver_start && xe->info.media_verx100 <= r->ver_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.media >= r->step_start && xe->info.step.media < r->step_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start; break; case XE_RTP_MATCH_INTEGRATED: @@ -108,6 +132,9 @@ static bool rule_matches(const struct xe_device *xe, match = hwe->class != r->engine_class; break; case XE_RTP_MATCH_FUNC: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = r->match_func(gt, hwe); break; default: @@ -186,6 +213,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, struct xe_device **xe) { switch (ctx->type) { + case XE_RTP_PROCESS_TYPE_DEVICE: + *hwe = NULL; + *gt = NULL; + *xe = ctx->xe; + break; case XE_RTP_PROCESS_TYPE_GT: *hwe = NULL; *gt = ctx->gt; @@ -326,21 +358,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, hwe->engine_id == __ffs(render_compute_mask); } -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) -{ - unsigned int dss_per_gslice = 4; - unsigned int dss; - - if (drm_WARN(>_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask), - "Checking gslice for platform without geometry pipeline\n")) - return false; - - dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0); - - return dss >= dss_per_gslice; -} - bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, const struct xe_hw_engine *hwe) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 4fe736a11c42..5ed6c14b9ae3 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -422,7 +422,8 @@ struct xe_reg_sr; #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ - struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \ + struct xe_device * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE }) void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, unsigned long *active_entries, @@ -466,17 +467,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, const struct xe_hw_engine *hwe); /* - * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off - * - * @gt: GT structure - * @hwe: Engine instance - * - * Returns: true if first gslice is fused off, false otherwise. - */ -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe); - -/* * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device * * @gt: GT structure diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 1b76b947c706..f4cf30e298cf 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -110,12 +110,14 @@ struct xe_rtp_entry { }; enum xe_rtp_process_type { + XE_RTP_PROCESS_TYPE_DEVICE, XE_RTP_PROCESS_TYPE_GT, XE_RTP_PROCESS_TYPE_ENGINE, }; struct xe_rtp_process_ctx { union { + struct xe_device *xe; struct xe_gt *gt; struct xe_hw_engine *hwe; }; diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 1905ca590965..d21bf8f26964 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -113,7 +113,8 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, kref_init(&job->refcount); xe_exec_queue_get(job->q); - err = drm_sched_job_init(&job->drm, q->entity, 1, NULL); + err = drm_sched_job_init(&job->drm, q->entity, 1, NULL, + q->xef ? q->xef->drm->client_id : 0); if (err) goto err_free; @@ -216,15 +217,17 @@ void xe_sched_job_set_error(struct xe_sched_job *job, int error) bool xe_sched_job_started(struct xe_sched_job *job) { + struct dma_fence *fence = dma_fence_chain_contained(job->fence); struct xe_lrc *lrc = job->q->lrc[0]; - return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job), - xe_lrc_start_seqno(lrc), - dma_fence_chain_contained(job->fence)->ops); + return !__dma_fence_is_later(fence, + xe_sched_job_lrc_seqno(job), + xe_lrc_start_seqno(lrc)); } bool xe_sched_job_completed(struct xe_sched_job *job) { + struct dma_fence *fence = dma_fence_chain_contained(job->fence); struct xe_lrc *lrc = job->q->lrc[0]; /* @@ -232,9 +235,9 @@ bool xe_sched_job_completed(struct xe_sched_job *job) * parallel handshake is done. */ - return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job), - xe_lrc_seqno(lrc), - dma_fence_chain_contained(job->fence)->ops); + return !__dma_fence_is_later(fence, + xe_sched_job_lrc_seqno(job), + xe_lrc_seqno(lrc)); } void xe_sched_job_arm(struct xe_sched_job *job) diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 86d47aaf0358..1c3c04d52f55 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -5,6 +5,7 @@ #include <linux/shrinker.h> +#include <drm/drm_managed.h> #include <drm/ttm/ttm_backup.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_tt.h> @@ -65,11 +66,15 @@ static s64 xe_shrinker_walk(struct xe_device *xe, struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type); struct ttm_bo_lru_cursor curs; struct ttm_buffer_object *ttm_bo; + struct ttm_lru_walk_arg arg = { + .ctx = ctx, + .trylock_only = true, + }; if (!man || !man->use_tt) continue; - ttm_bo_lru_for_each_reserved_guarded(&curs, man, ctx, ttm_bo) { + ttm_bo_lru_for_each_reserved_guarded(&curs, man, &arg, ttm_bo) { if (!ttm_bo_shrink_suitable(ttm_bo, ctx)) continue; @@ -81,6 +86,8 @@ static s64 xe_shrinker_walk(struct xe_device *xe, if (*scanned >= to_scan) break; } + /* Trylocks should never error, just fail. */ + xe_assert(xe, !IS_ERR(ttm_bo)); } return freed; @@ -213,24 +220,34 @@ static void xe_shrinker_pm(struct work_struct *work) xe_pm_runtime_put(shrinker->xe); } +static void xe_shrinker_fini(struct drm_device *drm, void *arg) +{ + struct xe_shrinker *shrinker = arg; + + xe_assert(shrinker->xe, !shrinker->shrinkable_pages); + xe_assert(shrinker->xe, !shrinker->purgeable_pages); + shrinker_free(shrinker->shrink); + flush_work(&shrinker->pm_worker); + kfree(shrinker); +} + /** * xe_shrinker_create() - Create an xe per-device shrinker * @xe: Pointer to the xe device. * - * Returns: A pointer to the created shrinker on success, - * Negative error code on failure. + * Return: %0 on success. Negative error code on failure. */ -struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) +int xe_shrinker_create(struct xe_device *xe) { struct xe_shrinker *shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL); if (!shrinker) - return ERR_PTR(-ENOMEM); + return -ENOMEM; shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique); if (!shrinker->shrink) { kfree(shrinker); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } INIT_WORK(&shrinker->pm_worker, xe_shrinker_pm); @@ -240,19 +257,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) shrinker->shrink->scan_objects = xe_shrinker_scan; shrinker->shrink->private_data = shrinker; shrinker_register(shrinker->shrink); + xe->mem.shrinker = shrinker; - return shrinker; -} - -/** - * xe_shrinker_destroy() - Destroy an xe per-device shrinker - * @shrinker: Pointer to the shrinker to destroy. - */ -void xe_shrinker_destroy(struct xe_shrinker *shrinker) -{ - xe_assert(shrinker->xe, !shrinker->shrinkable_pages); - xe_assert(shrinker->xe, !shrinker->purgeable_pages); - shrinker_free(shrinker->shrink); - flush_work(&shrinker->pm_worker); - kfree(shrinker); + return drmm_add_action_or_reset(&xe->drm, xe_shrinker_fini, shrinker); } diff --git a/drivers/gpu/drm/xe/xe_shrinker.h b/drivers/gpu/drm/xe/xe_shrinker.h index 28a038f4fcbf..5132ae5192e1 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.h +++ b/drivers/gpu/drm/xe/xe_shrinker.h @@ -11,8 +11,6 @@ struct xe_device; void xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgeable); -struct xe_shrinker *xe_shrinker_create(struct xe_device *xe); - -void xe_shrinker_destroy(struct xe_shrinker *shrinker); +int xe_shrinker_create(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 0f721ae17b26..afbdd894bd6e 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -3,6 +3,8 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> #include <drm/drm_managed.h> #include "xe_assert.h" @@ -10,6 +12,8 @@ #include "xe_module.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" static unsigned int wanted_max_vfs(struct xe_device *xe) @@ -80,9 +84,22 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) */ int xe_sriov_pf_init_early(struct xe_device *xe) { + int err; + xe_assert(xe, IS_SRIOV_PF(xe)); - return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + xe->sriov.pf.vfs = drmm_kcalloc(&xe->drm, 1 + xe_sriov_pf_get_totalvfs(xe), + sizeof(*xe->sriov.pf.vfs), GFP_KERNEL); + if (!xe->sriov.pf.vfs) + return -ENOMEM; + + err = drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + if (err) + return err; + + xe_sriov_pf_service_init(xe); + + return 0; } /** @@ -102,3 +119,45 @@ void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs); drm_printf(p, "enabled: %u\n", pci_num_vf(pdev)); } + +static int simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_device *xe = parent->d_inode->i_private; + void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data; + + print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, + { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, +}; + +/** + * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Prepare debugfs attributes exposed by the PF. + */ +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + struct drm_minor *minor = xe->drm.primary; + struct dentry *parent; + + /* + * /sys/kernel/debug/dri/0/ + * ├── pf + * │  ├── ... + */ + parent = debugfs_create_dir("pf", root); + if (IS_ERR(parent)) + return; + parent->d_inode->i_private = xe; + + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index d1220e70e1c0..c392c3fcf085 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -8,12 +8,14 @@ #include <linux/types.h> +struct dentry; struct drm_printer; struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else static inline bool xe_sriov_pf_readiness(struct xe_device *xe) @@ -25,6 +27,10 @@ static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } + +static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_sriov_pf_service.c new file mode 100644 index 000000000000..eee3b2a1ba41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#include "abi/guc_relay_actions_abi.h" + +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_printk.h" + +#include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_service_types.h" + +/** + * xe_sriov_pf_service_init - Early initialization of the SR-IOV PF service. + * @xe: the &xe_device to initialize + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_init(struct xe_device *xe) +{ + BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); + BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* base versions may differ between platforms */ + xe->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; + xe->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; + + /* latest version is same for all platforms */ + xe->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; + xe->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_negotiate_version(struct xe_device *xe, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + struct xe_sriov_pf_service_version base = xe->sriov.pf.service.version.base; + struct xe_sriov_pf_service_version latest = xe->sriov.pf.service.version.latest; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, base.major); + xe_assert(xe, base.major <= latest.major); + xe_assert(xe, (base.major < latest.major) || (base.minor <= latest.minor)); + + /* VF doesn't care - return our latest */ + if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && + wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants newer than our - return our latest */ + if (wanted_major > latest.major) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants older than min required - reject */ + if (wanted_major < base.major || + (wanted_major == base.major && wanted_minor < base.minor)) { + return -EPERM; + } + + /* previous major - return wanted, as we should still support it */ + if (wanted_major < latest.major) { + /* XXX: we are not prepared for multi-versions yet */ + xe_assert(xe, base.major == latest.major); + return -ENOPKG; + } + + /* same major - return common minor */ + *major = wanted_major; + *minor = min_t(u32, latest.minor, wanted_minor); + return 0; +} + +static void pf_connect(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + xe_assert(xe, major || minor); + + xe->sriov.pf.vfs[vfid].version.major = major; + xe->sriov.pf.vfs[vfid].version.minor = minor; +} + +static void pf_disconnect(struct xe_device *xe, u32 vfid) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + xe->sriov.pf.vfs[vfid].version.major = 0; + xe->sriov.pf.vfs[vfid].version.minor = 0; +} + +/** + * xe_sriov_pf_service_is_negotiated - Check if VF has negotiated given ABI version. + * @xe: the &xe_device + * @vfid: the VF identifier + * @major: the major version to check + * @minor: the minor version to check + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + * + * Returns: true if VF can use given ABI version functionality. + */ +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + return major == xe->sriov.pf.vfs[vfid].version.major && + minor <= xe->sriov.pf.vfs[vfid].version.minor; +} + +/** + * xe_sriov_pf_service_handshake_vf - Confirm a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * @wanted_major: the major service version expected by the VF + * @wanted_minor: the minor service version expected by the VF + * @major: the major service version to be used by the VF + * @minor: the minor service version to be used by the VF + * + * Negotiate a VF/PF ABI version to allow VF use the PF services. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + int err; + + xe_sriov_dbg_verbose(xe, "VF%u wants ABI version %u.%u\n", + vfid, wanted_major, wanted_minor); + + err = pf_negotiate_version(xe, wanted_major, wanted_minor, major, minor); + + if (err < 0) { + xe_sriov_notice(xe, "VF%u failed to negotiate ABI %u.%u (%pe)\n", + vfid, wanted_major, wanted_minor, ERR_PTR(err)); + pf_disconnect(xe, vfid); + } else { + xe_sriov_dbg(xe, "VF%u negotiated ABI version %u.%u\n", + vfid, *major, *minor); + pf_connect(xe, vfid, *major, *minor); + } + + return err; +} + +/** + * xe_sriov_pf_service_reset_vf - Reset a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Reset a VF driver negotiated VF/PF ABI version. + * + * After that point, the VF driver will have to perform new version handshake + * to continue use of the PF services again. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid) +{ + pf_disconnect(xe, vfid); +} + +static void print_pf_version(struct drm_printer *p, const char *name, + const struct xe_sriov_pf_service_version *version) +{ + drm_printf(p, "%s:\t%u.%u\n", name, version->major, version->minor); +} + +/** + * xe_sriov_pf_service_print_versions - Print ABI versions negotiated with VFs. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for PF use only. + */ +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_sriov_pf_service_version *version; + char name[8]; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + print_pf_version(p, "base", &xe->sriov.pf.service.version.base); + print_pf_version(p, "latest", &xe->sriov.pf.service.version.latest); + + for (n = 1; n <= total_vfs; n++) { + version = &xe->sriov.pf.vfs[n].version; + if (!version->major && !version->minor) + continue; + + print_pf_version(p, xe_sriov_function_name(n, name, sizeof(name)), version); + } +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_sriov_pf_service_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_sriov_pf_service.h new file mode 100644 index 000000000000..d38c18f5ed10 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_H_ +#define _XE_SRIOV_PF_SERVICE_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_device; + +void xe_sriov_pf_service_init(struct xe_device *xe); +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p); + +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor); +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor); +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h new file mode 100644 index 000000000000..0835dde358c1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_TYPES_H_ +#define _XE_SRIOV_PF_SERVICE_TYPES_H_ + +#include <linux/types.h> + +/** + * struct xe_sriov_pf_service_version - VF/PF ABI Version. + * @major: the major version of the VF/PF ABI + * @minor: the minor version of the VF/PF ABI + * + * See `GuC Relay Communication`_. + */ +struct xe_sriov_pf_service_version { + u16 major; + u16 minor; +}; + +/** + * struct xe_sriov_pf_service - Data used by the PF service. + * @version: information about VF/PF ABI versions for current platform. + * @version.base: lowest VF/PF ABI version that could be negotiated with VF. + * @version.latest: latest VF/PF ABI version supported by the PF driver. + */ +struct xe_sriov_pf_service { + struct { + struct xe_sriov_pf_service_version base; + struct xe_sriov_pf_service_version latest; + } version; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h new file mode 100644 index 000000000000..956a88f9f213 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_TYPES_H_ +#define _XE_SRIOV_PF_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/types.h> + +#include "xe_sriov_pf_service_types.h" + +/** + * struct xe_sriov_metadata - per-VF device level metadata + */ +struct xe_sriov_metadata { + /** @version: negotiated VF/PF ABI version */ + struct xe_sriov_pf_service_version version; +}; + +/** + * struct xe_device_pf - Xe PF related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_PF mode. + */ +struct xe_device_pf { + /** @device_total_vfs: Maximum number of VFs supported by the device. */ + u16 device_total_vfs; + + /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ + u16 driver_max_vfs; + + /** @master_lock: protects all VFs configurations across GTs */ + struct mutex master_lock; + + /** @service: device level service data. */ + struct xe_sriov_pf_service service; + + /** @vfs: metadata for all VFs. */ + struct xe_sriov_metadata *vfs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index ca94382a721e..1a138108d139 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,9 +7,6 @@ #define _XE_SRIOV_TYPES_H_ #include <linux/build_bug.h> -#include <linux/mutex.h> -#include <linux/types.h> -#include <linux/workqueue_types.h> /** * VFID - Virtual Function Identifier @@ -40,37 +37,4 @@ enum xe_sriov_mode { }; static_assert(XE_SRIOV_MODE_NONE); -/** - * struct xe_device_pf - Xe PF related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_PF mode. - */ -struct xe_device_pf { - /** @device_total_vfs: Maximum number of VFs supported by the device. */ - u16 device_total_vfs; - - /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ - u16 driver_max_vfs; - - /** @master_lock: protects all VFs configurations across GTs */ - struct mutex master_lock; -}; - -/** - * struct xe_device_vf - Xe Virtual Function related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_VF mode. - */ -struct xe_device_vf { - /** @migration: VF Migration state data */ - struct { - /** @migration.worker: VF migration recovery worker */ - struct work_struct worker; - /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ - unsigned long gt_flags; - } migration; -}; - #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index c1275e64aa9c..26e243c28994 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -7,12 +7,15 @@ #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt.h" #include "xe_gt_sriov_printk.h" #include "xe_gt_sriov_vf.h" +#include "xe_guc_ct.h" #include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" +#include "xe_tile_sriov_vf.h" /** * DOC: VF restore procedure in PF KMD and VF KMD @@ -121,6 +124,15 @@ * | | | */ +static bool vf_migration_supported(struct xe_device *xe) +{ + /* + * TODO: Add conditions to allow specific platforms, when they're + * supported at production quality. + */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG); +} + static void migration_worker_func(struct work_struct *w); /** @@ -130,86 +142,118 @@ static void migration_worker_func(struct work_struct *w); void xe_sriov_vf_init_early(struct xe_device *xe) { INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); + + if (!vf_migration_supported(xe)) + xe_sriov_info(xe, "migration not supported by this module version\n"); } -/** - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. +static bool gt_vf_post_migration_needed(struct xe_gt *gt) +{ + return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); +} + +/* + * Notify GuCs marked in flags about resource fixups apply finished. * @xe: the &xe_device struct instance - * - * After migration, we need to re-query all VF configuration to make sure - * they match previous provisioning. Note that most of VF provisioning - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. - * - * Returns: 0 if the operation completed successfully, or a negative error - * code otherwise. + * @gt_flags: flags marking to which GTs the notification shall be sent */ -static int vf_post_migration_requery_guc(struct xe_device *xe) +static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) { struct xe_gt *gt; unsigned int id; - int err, ret = 0; + int err = 0; for_each_gt(gt, xe, id) { - err = xe_gt_sriov_vf_query_config(gt); - ret = ret ?: err; + if (!test_bit(id, >_flags)) + continue; + /* skip asking GuC for RESFIX exit if new recovery request arrived */ + if (gt_vf_post_migration_needed(gt)) + continue; + err = xe_gt_sriov_vf_notify_resfix_done(gt); + if (err) + break; + clear_bit(id, >_flags); } - return ret; -} - -/* - * vf_post_migration_imminent - Check if post-restore recovery is coming. - * @xe: the &xe_device struct instance - * - * Return: True if migration recovery worker will soon be running. Any worker currently - * executing does not affect the result. - */ -static bool vf_post_migration_imminent(struct xe_device *xe) -{ - return xe->sriov.vf.migration.gt_flags != 0 || - work_pending(&xe->sriov.vf.migration.worker); + if (gt_flags && !err) + drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); + return err; } -/* - * Notify all GuCs about resource fixups apply finished. - */ -static void vf_post_migration_notify_resfix_done(struct xe_device *xe) +static int vf_get_next_migrated_gt_id(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (vf_post_migration_imminent(xe)) - goto skip; - xe_gt_sriov_vf_notify_resfix_done(gt); + if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) + return id; } - return; + return -1; +} -skip: - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); +/** + * Perform post-migration fixups on a single GT. + * + * After migration, GuC needs to be re-queried for VF configuration to check + * if it matches previous provisioning. Most of VF provisioning shall be the + * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT + * range has changed, we have to perform fixups - shift all GGTT references + * used anywhere within the driver. After the fixups in this function succeed, + * it is allowed to ask the GuC bound to this GT to continue normal operation. + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. + */ +static int gt_vf_post_migration_fixups(struct xe_gt *gt) +{ + s64 shift; + int err; + + err = xe_gt_sriov_vf_query_config(gt); + if (err) + return err; + + shift = xe_gt_sriov_vf_ggtt_shift(gt); + if (shift) { + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); + /* FIXME: add the recovery steps */ + xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); + } + return 0; } static void vf_post_migration_recovery(struct xe_device *xe) { - int err; + unsigned long fixed_gts = 0; + int id, err; drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); - err = vf_post_migration_requery_guc(xe); - if (vf_post_migration_imminent(xe)) - goto defer; - if (unlikely(err)) + + if (!vf_migration_supported(xe)) { + xe_sriov_err(xe, "migration not supported by this module version\n"); + err = -ENOTRECOVERABLE; + goto fail; + } + + while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { + struct xe_gt *gt = xe_device_get_gt(xe, id); + + err = gt_vf_post_migration_fixups(gt); + if (err) + goto fail; + + set_bit(id, &fixed_gts); + } + + err = vf_post_migration_notify_resfix_done(xe, fixed_gts); + if (err) goto fail; - /* FIXME: add the recovery steps */ - vf_post_migration_notify_resfix_done(xe); xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); return; -defer: - xe_pm_runtime_put(xe); - drm_dbg(&xe->drm, "migration recovery deferred\n"); - return; fail: xe_pm_runtime_put(xe); drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); @@ -224,18 +268,23 @@ static void migration_worker_func(struct work_struct *w) vf_post_migration_recovery(xe); } -static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +/* + * Check if post-restore recovery is coming on any of GTs. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); - return false; - } + if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) + return true; } - return true; + return false; } /** @@ -250,13 +299,9 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) xe_assert(xe, IS_SRIOV_VF(xe)); - if (!vf_ready_to_recovery_on_all_gts(xe)) + if (!vf_ready_to_recovery_on_any_gts(xe)) return; - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); - /* Ensure other threads see that no flags are set now. */ - smp_mb(); - started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); drm_info(&xe->drm, "VF migration recovery %s\n", started ? "scheduled" : "already in progress"); diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h new file mode 100644 index 000000000000..8300416a6226 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_TYPES_H_ +#define _XE_SRIOV_VF_TYPES_H_ + +#include <linux/types.h> +#include <linux/workqueue_types.h> + +/** + * struct xe_sriov_vf_relay_version - PF ABI version details. + */ +struct xe_sriov_vf_relay_version { + /** @major: major version. */ + u16 major; + /** @minor: minor version. */ + u16 minor; +}; + +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @pf_version: negotiated VF/PF ABI version. */ + struct xe_sriov_vf_relay_version pf_version; + + /** @migration: VF Migration state data */ + struct { + /** @migration.worker: VF migration recovery worker */ + struct work_struct worker; + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ + unsigned long gt_flags; + } migration; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index c77b5c317fa0..10e88f2c9615 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -5,6 +5,7 @@ #include "xe_step.h" +#include <kunit/visibility.h> #include <linux/bitfield.h> #include "xe_device.h" @@ -255,3 +256,4 @@ const char *xe_step_name(enum xe_step step) return "**"; } } +EXPORT_SYMBOL_IF_KUNIT(xe_step_name); diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 1f710b3fc599..41705f5d52e3 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_heci_gsc.h" +#include "xe_i2c.h" #include "xe_mmio.h" #include "xe_pcode_api.h" #include "xe_vsec.h" @@ -173,20 +174,22 @@ static int enable_survivability_mode(struct pci_dev *pdev) survivability->mode = true; ret = xe_heci_gsc_init(xe); - if (ret) { - /* - * But if it fails, device can't enter survivability - * so move it back for correct error handling - */ - survivability->mode = false; - return ret; - } + if (ret) + goto err; xe_vsec_init(xe); + ret = xe_i2c_probe(xe); + if (ret) + goto err; + dev_err(dev, "In Survivability Mode\n"); return 0; + +err: + survivability->mode = false; + return ret; } /** diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 6345896585de..a7ff5975873f 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -3,13 +3,17 @@ * Copyright © 2024 Intel Corporation */ +#include <drm/drm_drv.h> + #include "xe_bo.h" #include "xe_gt_stats.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_module.h" +#include "xe_pm.h" #include "xe_pt.h" #include "xe_svm.h" +#include "xe_tile.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" #include "xe_vm_types.h" @@ -45,21 +49,6 @@ static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) return gpusvm_to_vm(r->gpusvm); } -static unsigned long xe_svm_range_start(struct xe_svm_range *range) -{ - return drm_gpusvm_range_start(&range->base); -} - -static unsigned long xe_svm_range_end(struct xe_svm_range *range) -{ - return drm_gpusvm_range_end(&range->base); -} - -static unsigned long xe_svm_range_size(struct xe_svm_range *range) -{ - return drm_gpusvm_range_size(&range->base); -} - #define range_debug(r__, operaton__) \ vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ @@ -103,11 +92,6 @@ static void xe_svm_range_free(struct drm_gpusvm_range *range) kfree(range); } -static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) -{ - return container_of(r, struct xe_svm_range, base); -} - static void xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, const struct mmu_notifier_range *mmu_range) @@ -161,7 +145,12 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, for_each_tile(tile, xe, id) if (xe_pt_zap_ptes_range(tile, vm, range)) { tile_mask |= BIT(id); - range->tile_invalidated |= BIT(id); + /* + * WRITE_ONCE pairs with READ_ONCE in + * xe_vm_has_valid_gpu_mapping() + */ + WRITE_ONCE(range->tile_invalidated, + range->tile_invalidated | BIT(id)); } return tile_mask; @@ -187,14 +176,9 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, { struct xe_vm *vm = gpusvm_to_vm(gpusvm); struct xe_device *xe = vm->xe; - struct xe_tile *tile; struct drm_gpusvm_range *r, *first; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; u64 adj_start = mmu_range->start, adj_end = mmu_range->end; u8 tile_mask = 0; - u8 id; - u32 fence_id = 0; long err; xe_svm_assert_in_notifier(vm); @@ -240,42 +224,8 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, xe_device_wmb(xe); - for_each_tile(tile, xe, id) { - if (tile_mask & BIT(id)) { - int err; - - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], true); - - err = xe_gt_tlb_invalidation_range(tile->primary_gt, - &fence[fence_id], - adj_start, - adj_end, - vm->usm.asid); - if (WARN_ON_ONCE(err < 0)) - goto wait; - ++fence_id; - - if (!tile->media_gt) - continue; - - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], true); - - err = xe_gt_tlb_invalidation_range(tile->media_gt, - &fence[fence_id], - adj_start, - adj_end, - vm->usm.asid); - if (WARN_ON_ONCE(err < 0)) - goto wait; - ++fence_id; - } - } - -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + err = xe_vm_range_tilemask_tlb_invalidation(vm, adj_start, adj_end, tile_mask); + WARN_ON_ONCE(err); range_notifier_event_end: r = first; @@ -349,7 +299,7 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w) up_write(&vm->lock); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct xe_vram_region *page_to_vr(struct page *page) { @@ -537,16 +487,18 @@ static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); } -static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) +static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) { return container_of(devmem_allocation, struct xe_bo, devmem_allocation); } -static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) +static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) { struct xe_bo *bo = to_xe_bo(devmem_allocation); + struct xe_device *xe = xe_bo_device(bo); xe_bo_put_async(bo); + xe_pm_runtime_put(xe); } static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) @@ -559,7 +511,7 @@ static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) return &tile->mem.vram.ttm.mm; } -static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, +static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, unsigned long npages, unsigned long *pfn) { struct xe_bo *bo = to_xe_bo(devmem_allocation); @@ -582,7 +534,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio return 0; } -static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { +static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { .devmem_release = xe_svm_devmem_release, .populate_devmem_pfn = xe_svm_populate_devmem_pfn, .copy_to_devmem = xe_svm_copy_to_devmem, @@ -662,84 +614,140 @@ static bool xe_svm_range_is_valid(struct xe_svm_range *range, struct xe_tile *tile, bool devmem_only) { - /* - * Advisory only check whether the range currently has a valid mapping, - * READ_ONCE pairs with WRITE_ONCE in xe_pt.c - */ - return ((READ_ONCE(range->tile_present) & - ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) && - (!devmem_only || xe_svm_range_in_vram(range)); + return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present, + range->tile_invalidated) && + (!devmem_only || xe_svm_range_in_vram(range))); +} + +/** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM + * @vm: xe_vm pointer + * @range: Pointer to the SVM range structure + * + * The xe_svm_range_migrate_to_smem() checks range has pages in VRAM + * and migrates them to SMEM + */ +void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) +{ + if (xe_svm_range_in_vram(range)) + drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); +} + +/** + * xe_svm_range_validate() - Check if the SVM range is valid + * @vm: xe_vm pointer + * @range: Pointer to the SVM range structure + * @tile_mask: Mask representing the tiles to be checked + * @devmem_preferred : if true range needs to be in devmem + * + * The xe_svm_range_validate() function checks if a range is + * valid and located in the desired memory region. + * + * Return: true if the range is valid, false otherwise + */ +bool xe_svm_range_validate(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_mask, bool devmem_preferred) +{ + bool ret; + + xe_svm_notifier_lock(vm); + + ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask && + (devmem_preferred == range->base.flags.has_devmem_pages); + + xe_svm_notifier_unlock(vm); + + return ret; +} + +/** + * xe_svm_find_vma_start - Find start of CPU VMA + * @vm: xe_vm pointer + * @start: start address + * @end: end address + * @vma: Pointer to struct xe_vma + * + * + * This function searches for a cpu vma, within the specified + * range [start, end] in the given VM. It adjusts the range based on the + * xe_vma start and end addresses. If no cpu VMA is found, it returns ULONG_MAX. + * + * Return: The starting address of the VMA within the range, + * or ULONG_MAX if no VMA is found + */ +u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *vma) +{ + return drm_gpusvm_find_vma_start(&vm->svm.gpusvm, + max(start, xe_vma_start(vma)), + min(end, xe_vma_end(vma))); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) { return &tile->mem.vram; } -static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms) { - struct mm_struct *mm = vm->svm.gpusvm.mm; + struct xe_tile *tile = container_of(dpagemap, typeof(*tile), mem.vram.dpagemap); + struct xe_device *xe = tile_to_xe(tile); + struct device *dev = xe->drm.dev; struct xe_vram_region *vr = tile_to_vr(tile); struct drm_buddy_block *block; struct list_head *blocks; struct xe_bo *bo; - ktime_t end = 0; - int err; + ktime_t time_end = 0; + int err, idx; - range_debug(range, "ALLOCATE VRAM"); + if (!drm_dev_enter(&xe->drm, &idx)) + return -ENODEV; - if (!mmget_not_zero(mm)) - return -EFAULT; - mmap_read_lock(mm); + xe_pm_runtime_get(xe); -retry: - bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, - xe_svm_range_size(range), + retry: + bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, end - start, ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_CPU_ADDR_MIRROR); if (IS_ERR(bo)) { err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &end)) + if (xe_vm_validate_should_retry(NULL, err, &time_end)) goto retry; - goto unlock; + goto out_pm_put; } - drm_gpusvm_devmem_init(&bo->devmem_allocation, - vm->xe->drm.dev, mm, - &gpusvm_devmem_ops, - &tile->mem.vram.dpagemap, - xe_svm_range_size(range)); + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, + &dpagemap_devmem_ops, + &tile->mem.vram.dpagemap, + end - start); blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; list_for_each_entry(block, blocks, link) block->private = vr; xe_bo_get(bo); - err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, - &bo->devmem_allocation, ctx); + + /* Ensure the device has a pm ref while there are device pages active. */ + xe_pm_runtime_get_noresume(xe); + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, + start, end, timeslice_ms, + xe_svm_devm_owner(xe)); if (err) xe_svm_devmem_release(&bo->devmem_allocation); xe_bo_unlock(bo); xe_bo_put(bo); -unlock: - mmap_read_unlock(mm); - mmput(mm); +out_pm_put: + xe_pm_runtime_put(xe); + drm_dev_exit(idx); return err; } -#else -static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) -{ - return -EOPNOTSUPP; -} #endif static bool supports_4K_migration(struct xe_device *xe) @@ -750,21 +758,31 @@ static bool supports_4K_migration(struct xe_device *xe) return true; } -static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, - struct xe_vma *vma) +/** + * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not + * @range: SVM range for which migration needs to be decided + * @vma: vma which has range + * @preferred_region_is_vram: preferred region for range is vram + * + * Return: True for range needing migration and migration is supported else false + */ +bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, + bool preferred_region_is_vram) { struct xe_vm *vm = range_to_vm(&range->base); u64 range_size = xe_svm_range_size(range); - if (!range->base.flags.migrate_devmem) + if (!range->base.flags.migrate_devmem || !preferred_region_is_vram) return false; - if (xe_svm_range_in_vram(range)) { - drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); + xe_assert(vm->xe, IS_DGFX(vm->xe)); + + if (preferred_region_is_vram && xe_svm_range_in_vram(range)) { + drm_info(&vm->xe->drm, "Range is already in VRAM\n"); return false; } - if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + if (preferred_region_is_vram && range_size < SZ_64K && !supports_4K_migration(vm->xe)) { drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); return false; } @@ -792,20 +810,19 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), .devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), .check_pages_threshold = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0, .devmem_only = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), .timeslice_ms = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0, + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? + vm->xe->atomic_svm_timeslice_ms : 0, }; struct xe_svm_range *range; - struct drm_gpusvm_range *r; - struct drm_exec exec; struct dma_fence *fence; - int migrate_try_count = ctx.devmem_only ? 3 : 1; struct xe_tile *tile = gt_to_tile(gt); + int migrate_try_count = ctx.devmem_only ? 3 : 1; ktime_t end = 0; int err; @@ -820,24 +837,22 @@ retry: if (err) return err; - r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr, - xe_vma_start(vma), xe_vma_end(vma), - &ctx); - if (IS_ERR(r)) - return PTR_ERR(r); + range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); - if (ctx.devmem_only && !r->flags.migrate_devmem) + if (IS_ERR(range)) + return PTR_ERR(range); + + if (ctx.devmem_only && !range->base.flags.migrate_devmem) return -EACCES; - range = to_xe_range(r); if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) return 0; range_debug(range, "PAGE FAULT"); if (--migrate_try_count >= 0 && - xe_svm_range_needs_migrate_to_vram(range, vma)) { - err = xe_svm_alloc_vram(vm, tile, range, &ctx); + xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) { + err = xe_svm_alloc_vram(tile, range, &ctx); ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { if (migrate_try_count || !ctx.devmem_only) { @@ -855,16 +870,11 @@ retry: } range_debug(range, "GET PAGES"); - err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); + err = xe_svm_range_get_pages(vm, range, &ctx); /* Corner where CPU mappings have changed */ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (migrate_try_count > 0 || !ctx.devmem_only) { - if (err == -EOPNOTSUPP) { - range_debug(range, "PAGE FAULT - EVICT PAGES"); - drm_gpusvm_range_evict(&vm->svm.gpusvm, - &range->base); - } drm_dbg(&vm->xe->drm, "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); @@ -884,30 +894,21 @@ retry: range_debug(range, "PAGE FAULT - BIND"); retry_bind: - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj); - drm_exec_retry_on_contention(&exec); - if (err) { - drm_exec_fini(&exec); - goto err_out; - } - - fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); - if (IS_ERR(fence)) { - drm_exec_fini(&exec); - err = PTR_ERR(fence); - if (err == -EAGAIN) { - ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ - range_debug(range, "PAGE FAULT - RETRY BIND"); - goto retry; - } - if (xe_vm_validate_should_retry(&exec, err, &end)) - goto retry_bind; - goto err_out; + xe_vm_lock(vm, false); + fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); + if (IS_ERR(fence)) { + xe_vm_unlock(vm); + err = PTR_ERR(fence); + if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ + range_debug(range, "PAGE FAULT - RETRY BIND"); + goto retry; } + if (xe_vm_validate_should_retry(NULL, err, &end)) + goto retry_bind; + goto err_out; } - drm_exec_fini(&exec); + xe_vm_unlock(vm); dma_fence_wait(fence, false); dma_fence_put(fence); @@ -943,10 +944,84 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) */ int xe_svm_bo_evict(struct xe_bo *bo) { - return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); + return drm_pagemap_evict_to_ram(&bo->devmem_allocation); +} + +/** + * xe_svm_range_find_or_insert- Find or insert GPU SVM range + * @vm: xe_vm pointer + * @addr: address for which range needs to be found/inserted + * @vma: Pointer to struct xe_vma which mirrors CPU + * @ctx: GPU SVM context + * + * This function finds or inserts a newly allocated a SVM range based on the + * address. + * + * Return: Pointer to the SVM range on success, ERR_PTR() on failure. + */ +struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) +{ + struct drm_gpusvm_range *r; + + r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)), + xe_vma_start(vma), xe_vma_end(vma), ctx); + if (IS_ERR(r)) + return ERR_PTR(PTR_ERR(r)); + + return to_xe_range(r); +} + +/** + * xe_svm_range_get_pages() - Get pages for a SVM range + * @vm: Pointer to the struct xe_vm + * @range: Pointer to the xe SVM range structure + * @ctx: GPU SVM context + * + * This function gets pages for a SVM range and ensures they are mapped for + * DMA access. In case of failure with -EOPNOTSUPP, it evicts the range. + * + * Return: 0 on success, negative error code on failure. + */ +int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, + struct drm_gpusvm_ctx *ctx) +{ + int err = 0; + + err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, &range->base, ctx); + if (err == -EOPNOTSUPP) { + range_debug(range, "PAGE FAULT - EVICT PAGES"); + drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); + } + + return err; } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + +/** + * xe_svm_alloc_vram()- Allocate device memory pages for range, + * migrating existing data. + * @tile: tile to allocate vram from + * @range: SVM range + * @ctx: DRM GPU SVM context + * + * Return: 0 on success, error code on failure. + */ +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + struct drm_pagemap *dpagemap; + + xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); + range_debug(range, "ALLOCATE VRAM"); + + dpagemap = xe_tile_local_pagemap(tile); + return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), + xe_svm_range_end(range), + range->base.gpusvm->mm, + ctx->timeslice_ms); +} static struct drm_pagemap_device_addr xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, @@ -972,6 +1047,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, static const struct drm_pagemap_ops xe_drm_pagemap_ops = { .device_map = xe_drm_pagemap_device_map, + .populate_mm = xe_drm_pagemap_populate_mm, }; /** @@ -1003,7 +1079,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) vr->pagemap.range.start = res->start; vr->pagemap.range.end = res->end; vr->pagemap.nr_range = 1; - vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); + vr->pagemap.ops = drm_pagemap_pagemap_ops_get(); vr->pagemap.owner = xe_svm_devm_owner(xe); addr = devm_memremap_pages(dev, &vr->pagemap); @@ -1024,6 +1100,13 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #else +int xe_svm_alloc_vram(struct xe_tile *tile, + struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + return -EOPNOTSUPP; +} + int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) { return 0; diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index 30fc78b85b30..da9a69ea0bb1 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -70,6 +70,26 @@ int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx); + +struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx); + +int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, + struct drm_gpusvm_ctx *ctx); + +bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, + bool preferred_region_is_vram); + +void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range); + +bool xe_svm_range_validate(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_mask, bool devmem_preferred); + +u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma); + /** * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping * @range: SVM range @@ -82,6 +102,53 @@ static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) return range->base.flags.has_dma_mapping; } +/** + * to_xe_range - Convert a drm_gpusvm_range pointer to a xe_svm_range + * @r: Pointer to the drm_gpusvm_range structure + * + * This function takes a pointer to a drm_gpusvm_range structure and + * converts it to a pointer to the containing xe_svm_range structure. + * + * Return: Pointer to the xe_svm_range structure + */ +static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) +{ + return container_of(r, struct xe_svm_range, base); +} + +/** + * xe_svm_range_start() - SVM range start address + * @range: SVM range + * + * Return: start address of range. + */ +static inline unsigned long xe_svm_range_start(struct xe_svm_range *range) +{ + return drm_gpusvm_range_start(&range->base); +} + +/** + * xe_svm_range_end() - SVM range end address + * @range: SVM range + * + * Return: end address of range. + */ +static inline unsigned long xe_svm_range_end(struct xe_svm_range *range) +{ + return drm_gpusvm_range_end(&range->base); +} + +/** + * xe_svm_range_size() - SVM range size + * @range: SVM range + * + * Return: Size of range. + */ +static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) +{ + return drm_gpusvm_range_size(&range->base); +} + #define xe_svm_assert_in_notifier(vm__) \ lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) @@ -97,6 +164,8 @@ void xe_svm_flush(struct xe_vm *vm); #include <linux/interval_tree.h> struct drm_pagemap_device_addr; +struct drm_gpusvm_ctx; +struct drm_gpusvm_range; struct xe_bo; struct xe_gt; struct xe_vm; @@ -167,6 +236,73 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } +static inline int +xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + return -EOPNOTSUPP; +} + +static inline +struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) +{ + return ERR_PTR(-EINVAL); +} + +static inline +int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, + struct drm_gpusvm_ctx *ctx) +{ + return -EINVAL; +} + +static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) +{ + return NULL; +} + +static inline unsigned long xe_svm_range_start(struct xe_svm_range *range) +{ + return 0; +} + +static inline unsigned long xe_svm_range_end(struct xe_svm_range *range) +{ + return 0; +} + +static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) +{ + return 0; +} + +static inline +bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, + u32 region) +{ + return false; +} + +static inline +void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) +{ +} + +static inline +bool xe_svm_range_validate(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_mask, bool devmem_preferred) +{ + return false; +} + +static inline +u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma) +{ + return ULONG_MAX; +} + #define xe_svm_assert_in_notifier(...) do {} while (0) #define xe_svm_range_has_dma_mapping(...) false diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 0771acbbf367..86e9811e60ba 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -10,6 +10,7 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_memirq.h" #include "xe_migrate.h" #include "xe_pcode.h" #include "xe_sa.h" @@ -87,13 +88,9 @@ */ static int xe_tile_alloc(struct xe_tile *tile) { - struct drm_device *drm = &tile_to_xe(tile)->drm; - - tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt), - GFP_KERNEL); + tile->mem.ggtt = xe_ggtt_alloc(tile); if (!tile->mem.ggtt) return -ENOMEM; - tile->mem.ggtt->tile = tile; return 0; } @@ -178,6 +175,12 @@ int xe_tile_init_noalloc(struct xe_tile *tile) int xe_tile_init(struct xe_tile *tile) { + int err; + + err = xe_memirq_init(&tile->memirq); + if (err) + return err; + tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index eb939316d55b..cc33e8733983 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -16,4 +16,21 @@ int xe_tile_init(struct xe_tile *tile); void xe_tile_migrate_wait(struct xe_tile *tile); +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + return &tile->mem.vram.dpagemap; +} +#else +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + return NULL; +} +#endif + +static inline bool xe_tile_is_root(struct xe_tile *tile) +{ + return tile->id == 0; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c new file mode 100644 index 000000000000..f221dbed16f0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "regs/xe_gtt_defs.h" + +#include "xe_assert.h" +#include "xe_ggtt.h" +#include "xe_gt_sriov_vf.h" +#include "xe_sriov.h" +#include "xe_sriov_printk.h" +#include "xe_tile_sriov_vf.h" +#include "xe_wopcm.h" + +static int vf_init_ggtt_balloons(struct xe_tile *tile) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); + + tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); + } + + return 0; +} + +/** + * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range. + * @tile: the &xe_tile struct instance + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile) +{ + u64 ggtt_base = xe_gt_sriov_vf_ggtt_base(tile->primary_gt); + u64 ggtt_size = xe_gt_sriov_vf_ggtt(tile->primary_gt); + struct xe_device *xe = tile_to_xe(tile); + u64 wopcm = xe_wopcm_size(xe); + u64 start, end; + int err; + + xe_tile_assert(tile, IS_SRIOV_VF(xe)); + xe_tile_assert(tile, ggtt_size); + lockdep_assert_held(&tile->mem.ggtt->lock); + + /* + * VF can only use part of the GGTT as allocated by the PF: + * + * WOPCM GUC_GGTT_TOP + * |<------------ Total GGTT size ------------------>| + * + * VF GGTT base -->|<- size ->| + * + * +--------------------+----------+-----------------+ + * |////////////////////| block |\\\\\\\\\\\\\\\\\| + * +--------------------+----------+-----------------+ + * + * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| + */ + + if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP || + ggtt_size > GUC_GGTT_TOP - ggtt_base) { + xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n", + tile->id, ggtt_base, ggtt_base + ggtt_size - 1); + return -ERANGE; + } + + start = wopcm; + end = ggtt_base; + if (end != start) { + err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0], + start, end); + if (err) + return err; + } + + start = ggtt_base + ggtt_size; + end = GUC_GGTT_TOP; + if (end != start) { + err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1], + start, end); + if (err) { + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); + return err; + } + } + + return 0; +} + +static int vf_balloon_ggtt(struct xe_tile *tile) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + int err; + + mutex_lock(&ggtt->lock); + err = xe_tile_sriov_vf_balloon_ggtt_locked(tile); + mutex_unlock(&ggtt->lock); + + return err; +} + +/** + * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes. + * @tile: the &xe_tile struct instance + */ +void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile) +{ + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); +} + +static void vf_deballoon_ggtt(struct xe_tile *tile) +{ + mutex_lock(&tile->mem.ggtt->lock); + xe_tile_sriov_vf_deballoon_ggtt_locked(tile); + mutex_unlock(&tile->mem.ggtt->lock); +} + +static void vf_fini_ggtt_balloons(struct xe_tile *tile) +{ + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); +} + +static void cleanup_ggtt(struct drm_device *drm, void *arg) +{ + struct xe_tile *tile = arg; + + vf_deballoon_ggtt(tile); + vf_fini_ggtt_balloons(tile); +} + +/** + * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. + * @tile: the &xe_tile + * + * This function is for VF use only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + int err; + + err = vf_init_ggtt_balloons(tile); + if (err) + return err; + + err = vf_balloon_ggtt(tile); + if (err) { + vf_fini_ggtt_balloons(tile); + return err; + } + + return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile); +} + +/** + * DOC: GGTT nodes shifting during VF post-migration recovery + * + * The first fixup applied to the VF KMD structures as part of post-migration + * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved + * from range previously assigned to this VF, into newly provisioned area. + * The changes include balloons, which are resized accordingly. + * + * The balloon nodes are there to eliminate unavailable ranges from use: one + * reserves the GGTT area below the range for current VF, and another one + * reserves area above. + * + * Below is a GGTT layout of example VF, with a certain address range assigned to + * said VF, and inaccessible areas above and below: + * + * 0 4GiB + * |<--------------------------- Total GGTT size ----------------------------->| + * WOPCM GUC_TOP + * |<-------------- Area mappable by xe_ggtt instance ---------------->| + * + * +---+---------------------------------+----------+----------------------+---+ + * |\\\|/////////////////////////////////| VF mem |//////////////////////|\\\| + * +---+---------------------------------+----------+----------------------+---+ + * + * Hardware enforced access rules before migration: + * + * |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->| + * + * GGTT nodes used for tracking allocations: + * + * |<---------- balloon ------------>|<- nodes->|<----- balloon ------>| + * + * After the migration, GGTT area assigned to the VF might have shifted, either + * to lower or to higher address. But we expect the total size and extra areas to + * be identical, as migration can only happen between matching platforms. + * Below is an example of GGTT layout of the VF after migration. Content of the + * GGTT for VF has been moved to a new area, and we receive its address from GuC: + * + * +---+----------------------+----------+---------------------------------+---+ + * |\\\|//////////////////////| VF mem |/////////////////////////////////|\\\| + * +---+----------------------+----------+---------------------------------+---+ + * + * Hardware enforced access rules after migration: + * + * |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->| + * + * So the VF has a new slice of GGTT assigned, and during migration process, the + * memory content was copied to that new area. But the &xe_ggtt nodes are still + * tracking allocations using the old addresses. The nodes within VF owned area + * have to be shifted, and balloon nodes need to be resized to properly mask out + * areas not owned by the VF. + * + * Fixed &xe_ggtt nodes used for tracking allocations: + * + * |<------ balloon ------>|<- nodes->|<----------- balloon ----------->| + * + * Due to use of GPU profiles, we do not expect the old and new GGTT ares to + * overlap; but our node shifting will fix addresses properly regardless. + */ + +/** + * xe_tile_sriov_vf_fixup_ggtt_nodes - Shift GGTT allocations to match assigned range. + * @tile: the &xe_tile struct instance + * @shift: the shift value + * + * Since Global GTT is not virtualized, each VF has an assigned range + * within the global space. This range might have changed during migration, + * which requires all memory addresses pointing to GGTT to be shifted. + */ +void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + + mutex_lock(&ggtt->lock); + + xe_tile_sriov_vf_deballoon_ggtt_locked(tile); + xe_ggtt_shift_nodes_locked(ggtt, shift); + xe_tile_sriov_vf_balloon_ggtt_locked(tile); + + mutex_unlock(&ggtt->lock); +} diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h new file mode 100644 index 000000000000..93eb043171e8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_SRIOV_VF_H_ +#define _XE_TILE_SRIOV_VF_H_ + +#include <linux/types.h> + +struct xe_tile; + +int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile); +int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile); +void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile); +void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift); + +#endif diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index ccebd5f0878e..86323cf3be2c 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -33,7 +33,7 @@ DECLARE_EVENT_CLASS(xe_bo, TP_fast_assign( __assign_str(dev); - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __entry->flags = bo->flags; __entry->vm = bo->vm; ), @@ -73,7 +73,7 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __assign_str(new_placement_name); __assign_str(old_placement_name); __assign_str(device_id); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 49ddbda7cdef..828b45b24c23 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -98,6 +98,11 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, + { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) + }, }; static const struct xe_rtp_entry_sr lrc_tunings[] = { diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3a8751a8b92d..3e0c3af235f2 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -33,6 +33,22 @@ uc_to_xe(struct xe_uc *uc) } /* Should be called once at driver load only */ +int xe_uc_init_noalloc(struct xe_uc *uc) +{ + int ret; + + ret = xe_guc_init_noalloc(&uc->guc); + if (ret) + goto err; + + /* HuC and GSC have no early dependencies and will be initialized during xe_uc_init(). */ + return 0; + +err: + xe_gt_err(uc_to_gt(uc), "Failed to early initialize uC (%pe)\n", ERR_PTR(ret)); + return ret; +} + int xe_uc_init(struct xe_uc *uc) { int ret; @@ -56,15 +72,17 @@ int xe_uc_init(struct xe_uc *uc) if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; - if (IS_SRIOV_VF(uc_to_xe(uc))) - return 0; + if (!IS_SRIOV_VF(uc_to_xe(uc))) { + ret = xe_wopcm_init(&uc->wopcm); + if (ret) + goto err; + } - ret = xe_wopcm_init(&uc->wopcm); + ret = xe_guc_min_load_for_hwconfig(&uc->guc); if (ret) goto err; return 0; - err: xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret)); return ret; @@ -126,28 +144,7 @@ int xe_uc_sanitize_reset(struct xe_uc *uc) return uc_reset(uc); } -/** - * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig - * @uc: The UC object - * - * Return: 0 on success, negative error code on error. - */ -int xe_uc_init_hwconfig(struct xe_uc *uc) -{ - int ret; - - /* GuC submission not enabled, nothing to do */ - if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; - - ret = xe_guc_min_load_for_hwconfig(&uc->guc); - if (ret) - return ret; - - return 0; -} - -static int vf_uc_init_hw(struct xe_uc *uc) +static int vf_uc_load_hw(struct xe_uc *uc) { int err; @@ -161,22 +158,30 @@ static int vf_uc_init_hw(struct xe_uc *uc) err = xe_gt_sriov_vf_connect(uc_to_gt(uc)); if (err) - return err; + goto err_out; uc->guc.submission_state.enabled = true; - err = xe_gt_record_default_lrcs(uc_to_gt(uc)); + err = xe_guc_opt_in_features_enable(&uc->guc); if (err) return err; + err = xe_gt_record_default_lrcs(uc_to_gt(uc)); + if (err) + goto err_out; + return 0; + +err_out: + xe_guc_sanitize(&uc->guc); + return err; } /* * Should be called during driver load, after every GT reset, and after every * suspend to reload / auth the firmwares. */ -int xe_uc_init_hw(struct xe_uc *uc) +int xe_uc_load_hw(struct xe_uc *uc) { int ret; @@ -185,7 +190,7 @@ int xe_uc_init_hw(struct xe_uc *uc) return 0; if (IS_SRIOV_VF(uc_to_xe(uc))) - return vf_uc_init_hw(uc); + return vf_uc_load_hw(uc); ret = xe_huc_upload(&uc->huc); if (ret) @@ -201,15 +206,15 @@ int xe_uc_init_hw(struct xe_uc *uc) ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (ret) - return ret; + goto err_out; ret = xe_guc_post_load_init(&uc->guc); if (ret) - return ret; + goto err_out; ret = xe_guc_pc_start(&uc->guc.pc); if (ret) - return ret; + goto err_out; xe_guc_engine_activity_enable_stats(&uc->guc); @@ -221,11 +226,10 @@ int xe_uc_init_hw(struct xe_uc *uc) xe_gsc_load_start(&uc->gsc); return 0; -} -int xe_uc_fini_hw(struct xe_uc *uc) -{ - return xe_uc_sanitize_reset(uc); +err_out: + xe_guc_sanitize(&uc->guc); + return ret; } int xe_uc_reset_prepare(struct xe_uc *uc) diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index c23e6f5e2514..21c9306098cf 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -8,11 +8,10 @@ struct xe_uc; +int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); -int xe_uc_init_hwconfig(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); -int xe_uc_init_hw(struct xe_uc *uc); -int xe_uc_fini_hw(struct xe_uc *uc); +int xe_uc_load_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2741849bbf4d..9bbdde604923 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -16,6 +16,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_gt_sriov_vf.h" #include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" @@ -114,10 +115,11 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 47, 0)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ @@ -126,6 +128,7 @@ struct fw_blobs_by_type { fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, no_ver(xe, huc, ptl)) \ fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \ fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \ fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \ @@ -662,11 +665,39 @@ do { \ ver_->major, ver_->minor, ver_->patch); \ } while (0) +static void uc_fw_vf_override(struct xe_uc_fw *uc_fw) +{ + struct xe_uc_fw_version *compat = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY]; + struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted; + + /* Only GuC/HuC are supported */ + if (uc_fw->type != XE_UC_FW_TYPE_GUC && uc_fw->type != XE_UC_FW_TYPE_HUC) + uc_fw->path = NULL; + + /* VF will support only firmwares that driver can autoselect */ + xe_uc_fw_change_status(uc_fw, uc_fw->path ? + XE_UC_FIRMWARE_PRELOADED : + XE_UC_FIRMWARE_NOT_SUPPORTED); + + if (!xe_uc_fw_is_supported(uc_fw)) + return; + + /* PF is doing the loading, so we don't need a path on the VF */ + uc_fw->path = "Loaded by PF"; + + /* The GuC versions are set up during the VF bootstrap */ + if (uc_fw->type == XE_UC_FW_TYPE_GUC) { + uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; + xe_gt_sriov_vf_guc_versions(uc_fw_to_gt(uc_fw), wanted, compat); + } +} + static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p) { struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct drm_printer p = xe_gt_info_printer(gt); struct device *dev = xe->drm.dev; - struct drm_printer p = drm_info_printer(dev); const struct firmware *fw = NULL; int err; @@ -675,20 +706,13 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar * before we're looked at the HW caps to see if we have uc support */ BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); - xe_assert(xe, !uc_fw->status); - xe_assert(xe, !uc_fw->path); + xe_gt_assert(gt, !uc_fw->status); + xe_gt_assert(gt, !uc_fw->path); uc_fw_auto_select(xe, uc_fw); if (IS_SRIOV_VF(xe)) { - /* Only GuC/HuC are supported */ - if (uc_fw->type != XE_UC_FW_TYPE_GUC && - uc_fw->type != XE_UC_FW_TYPE_HUC) - uc_fw->path = NULL; - /* VF will support only firmwares that driver can autoselect */ - xe_uc_fw_change_status(uc_fw, uc_fw->path ? - XE_UC_FIRMWARE_PRELOADED : - XE_UC_FIRMWARE_NOT_SUPPORTED); + uc_fw_vf_override(uc_fw); return 0; } @@ -700,7 +724,7 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar if (!xe_uc_fw_is_supported(uc_fw)) { if (uc_fw->type == XE_UC_FW_TYPE_GUC) { - drm_err(&xe->drm, "No GuC firmware defined for platform\n"); + xe_gt_err(gt, "No GuC firmware defined for platform\n"); return -ENOENT; } return 0; @@ -709,7 +733,7 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar /* an empty path means the firmware is disabled */ if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) { xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); - drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type)); + xe_gt_dbg(gt, "%s disabled\n", xe_uc_fw_type_repr(uc_fw->type)); return 0; } @@ -742,10 +766,10 @@ fail: XE_UC_FIRMWARE_MISSING : XE_UC_FIRMWARE_ERROR); - drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", - xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", - xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); + xe_gt_notice(gt, "%s firmware %s: fetch failed with error %pe\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, ERR_PTR(err)); + xe_gt_info(gt, "%s firmware(s) can be downloaded from %s\n", + xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); release_firmware(fw); /* OK even if fw is NULL */ diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index ad3b35a0e6eb..914026015019 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -65,6 +65,8 @@ enum xe_uc_fw_type { * struct xe_uc_fw_version - Version for XE micro controller firmware */ struct xe_uc_fw_version { + /** @branch: branch version of the FW (not always available) */ + u16 branch; /** @major: major version of the FW */ u16 major; /** @minor: minor version of the FW */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 861577746929..2035604121e6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -732,7 +732,9 @@ int xe_vm_userptr_pin(struct xe_vm *vm) DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); + down_read(&vm->userptr.notifier_lock); err = xe_vm_invalidate_vma(&uvma->vma); + up_read(&vm->userptr.notifier_lock); xe_vm_unlock(vm); if (err) break; @@ -798,21 +800,47 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) } ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); +static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) +{ + struct xe_vma *vma; + + vma = gpuva_to_vma(op->base.prefetch.va); + + if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) + xa_destroy(&op->prefetch_range.range); +} + +static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return; + + list_for_each_entry(op, &vops->list, link) + xe_vma_svm_prefetch_op_fini(op); +} + static void xe_vma_ops_fini(struct xe_vma_ops *vops) { int i; + xe_vma_svm_prefetch_ops_fini(vops); + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) kfree(vops->pt_update_ops[i].ops); } -static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) +static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) { int i; + if (!inc_val) + return; + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) if (BIT(i) & tile_mask) - ++vops->pt_update_ops[i].num_ops; + vops->pt_update_ops[i].num_ops += inc_val; } static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, @@ -842,7 +870,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, xe_vm_populate_rebind(op, vma, tile_mask); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); return 0; } @@ -977,7 +1005,7 @@ xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, xe_vm_populate_range_rebind(op, vma, range, tile_mask); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); return 0; } @@ -1062,7 +1090,7 @@ xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, xe_vm_populate_range_unbind(op, range); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, range->tile_present); + xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); return 0; } @@ -2141,6 +2169,35 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; } +static bool vma_matches(struct xe_vma *vma, u64 page_addr) +{ + if (page_addr > xe_vma_end(vma) - 1 || + page_addr + SZ_4K - 1 < xe_vma_start(vma)) + return false; + + return true; +} + +/** + * xe_vm_find_vma_by_addr() - Find a VMA by its address + * + * @vm: the xe_vm the vma belongs to + * @page_addr: address to look up + */ +struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) +{ + struct xe_vma *vma = NULL; + + if (vm->usm.last_fault_vma) { /* Fast lookup */ + if (vma_matches(vm->usm.last_fault_vma, page_addr)) + vma = vm->usm.last_fault_vma; + } + if (!vma) + vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); + + return vma; +} + static const u32 region_to_mem_type[] = { XE_PL_TT, XE_PL_VRAM0, @@ -2221,13 +2278,25 @@ static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) return true; } +static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) +{ + struct drm_gpuva_op *__op; + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + xe_vma_svm_prefetch_op_fini(op); + } +} + /* * Create operations list from IOCTL arguments, setup operations fields so parse * and commit steps are decoupled from IOCTL arguments. This step can fail. */ static struct drm_gpuva_ops * -vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, - u64 bo_offset_or_userptr, u64 addr, u64 range, +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, + struct xe_bo *bo, u64 bo_offset_or_userptr, + u64 addr, u64 range, u32 operation, u32 flags, u32 prefetch_region, u16 pat_index) { @@ -2235,6 +2304,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; struct drm_gpuvm_bo *vm_bo; + u64 range_end = addr + range; int err; lockdep_assert_held_write(&vm->lock); @@ -2296,14 +2366,80 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { - op->prefetch.region = prefetch_region; - } + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + struct xe_tile *tile; + u8 id, tile_mask = 0; + u32 i; + + if (!xe_vma_is_cpu_addr_mirror(vma)) { + op->prefetch.region = prefetch_region; + break; + } + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + + for_each_tile(tile, vm->xe, id) + tile_mask |= 0x1 << id; + + xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); + op->prefetch_range.region = prefetch_region; + op->prefetch_range.ranges_count = 0; +alloc_next_range: + svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); + + if (PTR_ERR(svm_range) == -ENOENT) { + u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); + + addr = ret == ULONG_MAX ? 0 : ret; + if (addr) + goto alloc_next_range; + else + goto print_op_label; + } + + if (IS_ERR(svm_range)) { + err = PTR_ERR(svm_range); + goto unwind_prefetch_ops; + } + + if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { + xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); + goto check_next_range; + } + + err = xa_alloc(&op->prefetch_range.range, + &i, svm_range, xa_limit_32b, + GFP_KERNEL); + if (err) + goto unwind_prefetch_ops; + + op->prefetch_range.ranges_count++; + vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; + xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); +check_next_range: + if (range_end > xe_svm_range_end(svm_range) && + xe_svm_range_end(svm_range) < xe_vma_end(vma)) { + addr = xe_svm_range_end(svm_range); + goto alloc_next_range; + } + } +print_op_label: print_op(vm->xe, __op); } return ops; + +unwind_prefetch_ops: + xe_svm_prefetch_gpuva_ops_fini(ops); + drm_gpuva_ops_free(&vm->gpuvm, ops); + return ERR_PTR(err); } + ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, @@ -2498,7 +2634,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, !op->map.is_cpu_addr_mirror) || op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, - op->tile_mask); + op->tile_mask, 1); break; } case DRM_GPUVA_OP_REMAP: @@ -2507,6 +2643,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, gpuva_to_vma(op->base.remap.unmap->va); bool skip = xe_vma_is_cpu_addr_mirror(old); u64 start = xe_vma_start(old), end = xe_vma_end(old); + int num_remap_ops = 0; if (op->base.remap.prev) start = op->base.remap.prev->va.addr + @@ -2559,7 +2696,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } @@ -2588,11 +2725,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } if (!skip) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; + + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); break; } case DRM_GPUVA_OP_UNMAP: @@ -2604,7 +2743,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return -EBUSY; if (!xe_vma_is_cpu_addr_mirror(vma)) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); break; case DRM_GPUVA_OP_PREFETCH: vma = gpuva_to_vma(op->base.prefetch.va); @@ -2615,8 +2754,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return err; } - if (!xe_vma_is_cpu_addr_mirror(vma)) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + if (xe_vma_is_cpu_addr_mirror(vma)) + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, + op->prefetch_range.ranges_count); + else + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); + break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -2742,6 +2885,57 @@ static int check_ufence(struct xe_vma *vma) return 0; } +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) +{ + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + int err = 0; + + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + struct xe_tile *tile; + unsigned long i; + u32 region; + + if (!xe_vma_is_cpu_addr_mirror(vma)) + return 0; + + region = op->prefetch_range.region; + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = devmem_possible; + ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; + + /* TODO: Threading the migration */ + xa_for_each(&op->prefetch_range.range, i, svm_range) { + if (!region) + xe_svm_range_migrate_to_smem(vm, svm_range); + + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { + tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; + err = xe_svm_alloc_vram(tile, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + return -ENODATA; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); + } + + err = xe_svm_range_get_pages(vm, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) + err = -ENODATA; + return err; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); + } + + return err; +} + static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_op *op) { @@ -2779,7 +2973,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_PREFETCH: { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - u32 region = op->prefetch.region; + u32 region; + + if (xe_vma_is_cpu_addr_mirror(vma)) + region = op->prefetch_range.region; + else + region = op->prefetch.region; xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); @@ -2798,6 +2997,25 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, return err; } +static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + int err; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return 0; + + list_for_each_entry(op, &vops->list, link) { + if (op->base.op == DRM_GPUVA_OP_PREFETCH) { + err = prefetch_ranges(vm, op); + if (err) + return err; + } + } + + return 0; +} + static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_ops *vops) @@ -3239,6 +3457,7 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, vops->q = q; vops->syncs = syncs; vops->num_syncs = num_syncs; + vops->flags = 0; } static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, @@ -3247,9 +3466,9 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; - if (XE_IOCTL_DBG(xe, range > bo->size) || + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { + xe_bo_size(bo) - range)) { return -EINVAL; } @@ -3446,7 +3665,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; u16 pat_index = bind_ops[i].pat_index; - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, + ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, addr, range, op, flags, prefetch_region, pat_index); if (IS_ERR(ops[i])) { @@ -3479,6 +3698,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) goto unwind_ops; + err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); + if (err) + goto unwind_ops; + fence = vm_bind_ioctl_ops_execute(vm, &vops); if (IS_ERR(fence)) err = PTR_ERR(fence); @@ -3548,7 +3771,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, xe_vma_ops_init(&vops, vm, q, NULL, 0); - ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size, + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), DRM_XE_VM_BIND_OP_MAP, 0, 0, vm->xe->pat.idx[cache_lvl]); if (IS_ERR(ops)) { @@ -3620,6 +3843,68 @@ void xe_vm_unlock(struct xe_vm *vm) } /** + * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an + * address range + * @vm: The VM + * @start: start address + * @end: end address + * @tile_mask: mask for which gt's issue tlb invalidation + * + * Issue a range based TLB invalidation for gt's in tilemask + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask) +{ + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + struct xe_tile *tile; + u32 fence_id = 0; + u8 id; + int err; + + if (!tile_mask) + return 0; + + for_each_tile(tile, vm->xe, id) { + if (tile_mask & BIT(id)) { + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[fence_id], true); + + err = xe_gt_tlb_invalidation_range(tile->primary_gt, + &fence[fence_id], + start, + end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + + if (!tile->media_gt) + continue; + + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], true); + + err = xe_gt_tlb_invalidation_range(tile->media_gt, + &fence[fence_id], + start, + end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + } + } + +wait: + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); + + return err; +} + +/** * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock * @vma: VMA to invalidate * @@ -3632,28 +3917,34 @@ void xe_vm_unlock(struct xe_vm *vm) int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; + struct xe_vm *vm = xe_vma_vm(vma); struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; - u8 id; - u32 fence_id = 0; + u8 tile_mask = 0; int ret = 0; + u8 id; xe_assert(xe, !xe_vma_is_null(vma)); xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); trace_xe_vma_invalidate(vma); - vm_dbg(&xe_vma_vm(vma)->xe->drm, + vm_dbg(&vm->xe->drm, "INVALIDATE: addr=0x%016llx, range=0x%016llx", xe_vma_start(vma), xe_vma_size(vma)); - /* Check that we don't race with page-table updates */ + /* + * Check that we don't race with page-table updates, tile_invalidated + * update is safe + */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { + lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || + (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && + lockdep_is_held(&xe_vm_resv(vm)->lock.base))); + WARN_ON_ONCE(!mmu_interval_check_retry (&to_userptr_vma(vma)->userptr.notifier, to_userptr_vma(vma)->userptr.notifier_seq)); - WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), + WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP)); } else { @@ -3661,39 +3952,17 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) } } - for_each_tile(tile, xe, id) { - if (xe_pt_zap_ptes(tile, vma)) { - xe_device_wmb(xe); - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], - true); - - ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; - - if (!tile->media_gt) - continue; - - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], - true); + for_each_tile(tile, xe, id) + if (xe_pt_zap_ptes(tile, vma)) + tile_mask |= BIT(id); - ret = xe_gt_tlb_invalidation_vma(tile->media_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; - } - } + xe_device_wmb(xe); -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma), + xe_vma_end(vma), tile_mask); - vma->tile_invalidated = vma->tile_mask; + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); return ret; } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 494af6bdc646..3475a118f666 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -169,6 +169,8 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma) !xe_vma_is_cpu_addr_mirror(vma); } +struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr); + /** * to_userptr_vma() - Return a pointer to an embedding userptr vma * @vma: Pointer to the embedded struct xe_vma @@ -226,6 +228,9 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, struct xe_svm_range *range); +int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask); + int xe_vm_invalidate_vma(struct xe_vma *vma); int xe_vm_validate_protected(struct xe_vm *vm); @@ -370,6 +375,25 @@ static inline bool xe_vm_is_validating(struct xe_vm *vm) return false; } +/** + * xe_vm_has_valid_gpu_mapping() - Advisory helper to check if VMA or SVM range has + * a valid GPU mapping + * @tile: The tile which the GPU mapping belongs to + * @tile_present: Tile present mask + * @tile_invalidated: Tile invalidated mask + * + * The READ_ONCEs pair with WRITE_ONCEs in either the TLB invalidation paths + * (xe_vm.c, xe_svm.c) or the binding paths (xe_pt.c). These are not reliable + * without the notifier lock in userptr or SVM cases, and not reliable without + * the BO dma-resv lock in the BO case. As such, they should only be used in + * opportunistic cases (e.g., skipping a page fault fix or not skipping a TLB + * invalidation) where it is harmless. + * + * Return: True is there are valid GPU pages, False otherwise + */ +#define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \ + ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id)) + #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); #else diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 1979e9bdbdf3..bed6088e1bb3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -100,14 +100,21 @@ struct xe_vma { struct work_struct destroy_work; }; - /** @tile_invalidated: VMA has been invalidated */ + /** + * @tile_invalidated: Tile mask of binding are invalidated for this VMA. + * protected by BO's resv and for userptrs, vm->userptr.notifier_lock in + * write mode for writing or vm->userptr.notifier_lock in read mode and + * the vm->resv. For stable reading, BO's resv or userptr + * vm->userptr.notifier_lock in read mode is required. Can be + * opportunistically read with READ_ONCE outside of locks. + */ u8 tile_invalidated; /** @tile_mask: Tile mask of where to create binding for this VMA */ u8 tile_mask; /** - * @tile_present: GT mask of binding are present for this VMA. + * @tile_present: Tile mask of binding are present for this VMA. * protected by vm->lock, vm->resv and for userptrs, * vm->userptr.notifier_lock for writing. Needs either for reading, * but if reading is done under the vm->lock only, it needs to be held @@ -382,6 +389,16 @@ struct xe_vma_op_unmap_range { struct xe_svm_range *range; }; +/** struct xe_vma_op_prefetch_range - VMA prefetch range operation */ +struct xe_vma_op_prefetch_range { + /** @range: xarray for SVM ranges data */ + struct xarray range; + /** @ranges_count: number of svm ranges to map */ + u32 ranges_count; + /** @region: memory region to prefetch to */ + u32 region; +}; + /** enum xe_vma_op_flags - flags for VMA operation */ enum xe_vma_op_flags { /** @XE_VMA_OP_COMMITTED: VMA operation committed */ @@ -424,6 +441,8 @@ struct xe_vma_op { struct xe_vma_op_map_range map_range; /** @unmap_range: VMA unmap range operation specific data */ struct xe_vma_op_unmap_range unmap_range; + /** @prefetch_range: VMA prefetch range operation specific data */ + struct xe_vma_op_prefetch_range prefetch_range; }; }; @@ -441,6 +460,9 @@ struct xe_vma_ops { u32 num_syncs; /** @pt_update_ops: page table update operations */ struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE]; + /** @flag: signify the properties within xe_vma_ops*/ +#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) + u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ bool inject_error; diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c index b378848d3b7b..8f23a27871b6 100644 --- a/drivers/gpu/drm/xe/xe_vsec.c +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -24,6 +24,7 @@ #define BMG_DEVICE_ID 0xE2F8 static struct intel_vsec_header bmg_telemetry = { + .rev = 1, .length = 0x10, .id = VSEC_ID_TELEMETRY, .num_entries = 2, @@ -32,28 +33,19 @@ static struct intel_vsec_header bmg_telemetry = { .offset = BMG_DISCOVERY_OFFSET, }; -static struct intel_vsec_header bmg_punit_crashlog = { +static struct intel_vsec_header bmg_crashlog = { + .rev = 1, .length = 0x10, .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, + .num_entries = 2, + .entry_size = 6, .tbir = 0, .offset = BMG_DISCOVERY_OFFSET + 0x60, }; -static struct intel_vsec_header bmg_oobmsm_crashlog = { - .length = 0x10, - .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, - .tbir = 0, - .offset = BMG_DISCOVERY_OFFSET + 0x78, -}; - static struct intel_vsec_header *bmg_capabilities[] = { &bmg_telemetry, - &bmg_punit_crashlog, - &bmg_oobmsm_crashlog, + &bmg_crashlog, NULL }; @@ -149,8 +141,8 @@ static int xe_guid_decode(u32 guid, int *index, u32 *offset) return 0; } -static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, - u32 count) +int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, + u32 count) { struct xe_device *xe = pdev_to_xe_device(pdev); void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET; diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h index 5777c53faec2..dabfb4e02d70 100644 --- a/drivers/gpu/drm/xe/xe_vsec.h +++ b/drivers/gpu/drm/xe/xe_vsec.h @@ -4,8 +4,12 @@ #ifndef _XE_VSEC_H_ #define _XE_VSEC_H_ +#include <linux/types.h> + +struct pci_dev; struct xe_device; void xe_vsec_init(struct xe_device *xe); +int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, u32 count); #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 67196baa4249..22a98600fd8f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -10,6 +10,7 @@ #include <linux/compiler_types.h> #include <linux/fault-inject.h> +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> #include "regs/xe_engine_regs.h" @@ -285,6 +286,18 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16021865536"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, { XE_RTP_NAME("14021486841"), XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), @@ -503,10 +516,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) }, - { XE_RTP_NAME("16018737384"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) - }, /* * These two workarounds are the same, just applying to different * engines. Although Wa_18032095049 (for the RCS) isn't required on @@ -533,31 +542,38 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe2_HPG */ { XE_RTP_NAME("16018712365"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) }, { XE_RTP_NAME("16018737384"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) }, { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) }, { XE_RTP_NAME("14020338487"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) }, { XE_RTP_NAME("18032247524"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) }, { XE_RTP_NAME("14018471104"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) }, /* @@ -566,7 +582,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { * apply this to all engines for simplicity. */ { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(GRAPHICS_VERSION(2001)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002)), XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), GHWSP_CSB_REPORT_DIS | PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, @@ -578,11 +594,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) }, { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, - { XE_RTP_NAME("14021821874"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + { XE_RTP_NAME("14021821874, 14022954250"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) }, @@ -640,6 +657,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { @@ -774,7 +795,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT)) }, { XE_RTP_NAME("18033852989"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) }, { XE_RTP_NAME("14021567978"), @@ -807,7 +828,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) }, { XE_RTP_NAME("14019386621"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) }, { XE_RTP_NAME("14020756599"), @@ -824,13 +845,17 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_AUTOSTRIP)) }, { XE_RTP_NAME("15016589081"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, { XE_RTP_NAME("22021007897"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, + { XE_RTP_NAME("18033852989"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), @@ -852,9 +877,34 @@ static __maybe_unused const struct xe_rtp_entry oob_was[] = { static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); +static __maybe_unused const struct xe_rtp_entry device_oob_was[] = { +#include <generated/xe_device_wa_oob.c> + {} +}; + +static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT); + __diag_pop(); /** + * xe_wa_process_device_oob - process OOB workaround table + * @xe: device instance to process workarounds for + * + * process OOB workaround table for this device, marking in @xe the + * workarounds that are active. + */ + +void xe_wa_process_device_oob(struct xe_device *xe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)); + + xe->wa_active.oob_initialized = true; + xe_rtp_process(&ctx, device_oob_was); +} + +/** * xe_wa_process_oob - process OOB workaround table * @gt: GT instance to process workarounds for * @@ -923,6 +973,28 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) } /** + * xe_wa_device_init - initialize device with workaround oob bookkeeping + * @xe: Xe device instance to initialize + * + * Returns 0 for success, negative with error code otherwise + */ +int xe_wa_device_init(struct xe_device *xe) +{ + unsigned long *p; + + p = drmm_kzalloc(&xe->drm, + sizeof(*p) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)), + GFP_KERNEL); + + if (!p) + return -ENOMEM; + + xe->wa_active.oob = p; + + return 0; +} + +/** * xe_wa_init - initialize gt with workaround bookkeeping * @gt: GT instance to initialize * @@ -956,6 +1028,16 @@ int xe_wa_init(struct xe_gt *gt) } ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */ +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "Device OOB Workarounds\n"); + for_each_set_bit(idx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)) + if (device_oob_was[idx].name) + drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name); +} + void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) { size_t idx; diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 52337405b5bc..f3880c65cb8d 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -13,17 +13,19 @@ struct xe_gt; struct xe_hw_engine; struct xe_tile; +int xe_wa_device_init(struct xe_device *xe); int xe_wa_init(struct xe_gt *gt); +void xe_wa_process_device_oob(struct xe_device *xe); void xe_wa_process_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); /** - * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any - * other more specific type + * XE_WA - Out-of-band workarounds, to be queried and called as needed. * @gt__: gt instance * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h */ @@ -32,4 +34,20 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \ }) +/** + * XE_DEVICE_WA - Out-of-band Device workarounds, to be queried and called + * as needed. + * @xe__: xe_device + * @id__: XE_DEVICE_WA_OOB_<id__>, as generated by build system in generated/xe_device_wa_oob.h + */ +#define XE_DEVICE_WA(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + +#define XE_DEVICE_WA_DISABLE(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + clear_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + #endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 9efc5accd43d..e990f20eccfe 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -21,7 +21,8 @@ GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) -14018094691 GRAPHICS_VERSION(2004) +14018094691 GRAPHICS_VERSION_RANGE(2001, 2002) + GRAPHICS_VERSION(2004) 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) 18024947630 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) @@ -30,17 +31,19 @@ GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) GRAPHICS_VERSION(3001) -14022293748 GRAPHICS_VERSION(2001) +14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) -22019794406 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(3003) +22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 22019338487 MEDIA_VERSION(2000) - GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) -16023588340 GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) no_media_l3 MEDIA_VERSION(3000) @@ -57,5 +60,15 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION(3003) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION(3003) + +# SoC workaround - currently applies to all platforms with the following +# primary GT GMDID +14022085890 GRAPHICS_VERSION(2001) + +15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) |