diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
208 files changed, 10193 insertions, 4127 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 5c2f459a2925..2bb2bc052120 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE - tristate "Intel Xe Graphics" - depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) + tristate "Intel Xe2 Graphics" + depends on DRM && PCI + depends on KUNIT || !KUNIT + depends on INTEL_VSEC || !INTEL_VSEC + depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -27,10 +30,8 @@ config DRM_XE select BACKLIGHT_CLASS_DEVICE if ACPI select INPUT if ACPI select ACPI_VIDEO if X86 && ACPI - select X86_PLATFORM_DEVICES if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE - select IOSF_MBI select CRC32 select SND_HDA_I915 if SND_HDA_CORE select CEC_CORE if CEC_NOTIFIER @@ -39,14 +40,15 @@ config DRM_XE select DRM_TTM_HELPER select DRM_EXEC select DRM_GPUVM - select DRM_GPUSVM if !UML && DEVICE_PRIVATE select DRM_SCHED select MMU_NOTIFIER select WANT_DEV_COREDUMP select AUXILIARY_BUS select HMM_MIRROR + select REGMAP if I2C help - Experimental driver for Intel Xe series GPUs + Driver for Intel Xe2 series GPUs and later. Experimental support + for Xe series is also available. If "M" is selected, the module will be called xe. @@ -74,14 +76,29 @@ config DRM_XE_DP_TUNNEL If in doubt say "Y". -config DRM_XE_DEVMEM_MIRROR - bool "Enable device memory mirror" +config DRM_XE_GPUSVM + bool "Enable CPU to GPU address mirroring" depends on DRM_XE + depends on !UML + depends on DEVICE_PRIVATE + default y + select DRM_GPUSVM + help + Enable this option if you want support for CPU to GPU address + mirroring. + + If in doubt say "Y". + +config DRM_XE_PAGEMAP + bool "Enable device memory pool for SVM" + depends on DRM_XE_GPUSVM select GET_FREE_REGION default y help - Disable this option only if you want to compile out without device - memory mirror. Will reduce KMD memory footprint when disabled. + Disable this option only if you don't want to expose local device + memory for SVM. Will reduce KMD memory footprint when disabled. + + If in doubt say "Y". config DRM_XE_FORCE_PROBE string "Force probe xe for selected Intel hardware IDs" diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 0d749ed44878..01735c6ece8b 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -86,12 +86,17 @@ config DRM_XE_KUNIT_TEST If in doubt, say "N". -config DRM_XE_LARGE_GUC_BUFFER - bool "Enable larger guc log buffer" +config DRM_XE_DEBUG_GUC + bool "Enable extra GuC related debug options" + depends on DRM_XE_DEBUG default n + select STACKDEPOT help Choose this option when debugging guc issues. - Buffer should be large enough for complex issues. + The GuC log buffer is increased to the maximum allowed, which should + be large enough for complex issues. The tracking of FAST_REQ messages + is extended to include a record of the calling stack, which is then + dumped on a FAST_REQ error notification. Recommended for driver developers only. diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 9699b08585f7..07c71a29963d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -21,6 +21,13 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_wa_oob.rules $(call cmd,wa_oob) +generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h +quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob)) + cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob) +$(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \ + $(src)/xe_device_wa_oob.rules + $(call cmd,device_wa_oob) + # Please keep these build lists sorted! # core driver code @@ -80,6 +87,7 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_nvm.o \ xe_oa.o \ xe_observation.o \ xe_pat.o \ @@ -124,13 +132,15 @@ xe-y += xe_bb.o \ xe_wait_user_fence.o \ xe_wopcm.o +xe-$(CONFIG_I2C) += xe_i2c.o xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o -xe-$(CONFIG_DRM_GPUSVM) += xe_svm.o +xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o +xe-$(CONFIG_CONFIGFS_FS) += xe_configfs.o # graphics virtualization (SR-IOV) support xe-y += \ @@ -138,7 +148,8 @@ xe-y += \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o \ - xe_sriov_vf.o + xe_sriov_vf.o \ + xe_tile_sriov_vf.o xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ @@ -152,7 +163,8 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ xe_pci_sriov.o \ - xe_sriov_pf.o + xe_sriov_pf.o \ + xe_sriov_pf_service.o # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST @@ -185,7 +197,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/intel_fbdev_fb.o \ display/xe_display.o \ display/xe_display_misc.o \ - display/xe_display_rps.o \ + display/xe_display_rpm.o \ display/xe_display_wa.o \ display/xe_dsb_buffer.o \ display/xe_fb_pin.o \ @@ -196,7 +208,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ # SOC code shared with i915 xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-soc/intel_dram.o \ - i915-soc/intel_pch.o \ i915-soc/intel_rom.o # Display code shared with i915 @@ -204,7 +215,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/icl_dsi.o \ i915-display/intel_alpm.o \ i915-display/intel_atomic.o \ - i915-display/intel_atomic_plane.o \ i915-display/intel_audio.o \ i915-display/intel_backlight.o \ i915-display/intel_bios.o \ @@ -254,6 +264,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_fbc.o \ i915-display/intel_fdi.o \ i915-display/intel_fifo_underrun.o \ + i915-display/intel_flipq.o \ i915-display/intel_frontbuffer.o \ i915-display/intel_global_state.o \ i915-display/intel_gmbus.o \ @@ -270,7 +281,9 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_modeset_verify.o \ i915-display/intel_panel.o \ i915-display/intel_pfit.o \ + i915-display/intel_plane.o \ i915-display/intel_pmdemand.o \ + i915-display/intel_pch.o \ i915-display/intel_pps.o \ i915-display/intel_psr.o \ i915-display/intel_qp_tables.o \ @@ -336,4 +349,4 @@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) uses_generated_oob := $(addprefix $(obj)/, $(xe-y)) -$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h +$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h $(obj)/generated/xe_device_wa_oob.h diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index ec516e838ee8..81eb046aeebf 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -141,6 +141,8 @@ enum xe_guc_action { XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, + XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D, + XE_GUC_ACTION_OPT_IN_FEATURE_KLV = 0x550E, XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, @@ -160,6 +162,37 @@ enum xe_guc_preempt_options { XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, }; +enum xe_guc_register_context_param_offsets { + XE_GUC_REGISTER_CONTEXT_DATA_0_MBZ = 0, + XE_GUC_REGISTER_CONTEXT_DATA_1_FLAGS, + XE_GUC_REGISTER_CONTEXT_DATA_2_CONTEXT_INDEX, + XE_GUC_REGISTER_CONTEXT_DATA_3_ENGINE_CLASS, + XE_GUC_REGISTER_CONTEXT_DATA_4_ENGINE_SUBMIT_MASK, + XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER, + XE_GUC_REGISTER_CONTEXT_DATA_6_WQ_DESC_ADDR_UPPER, + XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER, + XE_GUC_REGISTER_CONTEXT_DATA_8_WQ_BUF_BASE_UPPER, + XE_GUC_REGISTER_CONTEXT_DATA_9_WQ_BUF_SIZE, + XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR, + XE_GUC_REGISTER_CONTEXT_MSG_LEN, +}; + +enum xe_guc_register_context_multi_lrc_param_offsets { + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_0_MBZ = 0, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_1_FLAGS, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_2_PARENT_CONTEXT, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_3_ENGINE_CLASS, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_4_ENGINE_SUBMIT_MASK, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_6_WQ_DESC_ADDR_UPPER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_8_WQ_BUF_BASE_UPPER, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_9_WQ_BUF_SIZE, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR, + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN = 11, +}; + enum xe_guc_report_status { XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, XE_GUC_REPORT_STATUS_ACKED = 0x1, @@ -239,4 +272,7 @@ enum xe_guc_g2g_type { #define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) #define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) +/* invalid type for XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR */ +#define XE_GUC_CAT_ERR_TYPE_INVALID 0xdeadbeef + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h index 2c627a21648f..ecf748fd87df 100644 --- a/drivers/gpu/drm/xe/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -6,8 +6,7 @@ #ifndef _ABI_GUC_ERRORS_ABI_H #define _ABI_GUC_ERRORS_ABI_H -enum xe_guc_response_status { - XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, +enum xe_guc_response { XE_GUC_RESPONSE_ERROR_PROTOCOL = 0x04, XE_GUC_RESPONSE_INVALID_STATE = 0x0A, XE_GUC_RESPONSE_UNSUPPORTED_VERSION = 0x0B, @@ -21,12 +20,20 @@ enum xe_guc_response_status { XE_GUC_RESPONSE_CANNOT_COMPLETE_ACTION = 0x41, XE_GUC_RESPONSE_INVALID_KLV_DATA = 0x50, XE_GUC_RESPONSE_INVALID_PARAMS = 0x60, + XE_GUC_RESPONSE_INVALID_CONTEXT_INDEX = 0x61, + XE_GUC_RESPONSE_INVALID_CONTEXT_REGISTRATION = 0x62, + XE_GUC_RESPONSE_INVALID_DOORBELL_ID = 0x63, + XE_GUC_RESPONSE_INVALID_ENGINE_ID = 0x64, XE_GUC_RESPONSE_INVALID_BUFFER_RANGE = 0x70, XE_GUC_RESPONSE_INVALID_BUFFER = 0x71, + XE_GUC_RESPONSE_BUFFER_ALREADY_REGISTERED = 0x72, XE_GUC_RESPONSE_INVALID_GGTT_ADDRESS = 0x80, XE_GUC_RESPONSE_PENDING_ACTION = 0x90, + XE_GUC_RESPONSE_CONTEXT_NOT_REGISTERED = 0x100, + XE_GUC_RESPONSE_CONTEXT_ALREADY_REGISTERED = 0X101, XE_GUC_RESPONSE_INVALID_SIZE = 0x102, XE_GUC_RESPONSE_MALFORMED_KLV = 0x103, + XE_GUC_RESPONSE_INVALID_CONTEXT = 0x104, XE_GUC_RESPONSE_INVALID_KLV_KEY = 0x105, XE_GUC_RESPONSE_DATA_TOO_LARGE = 0x106, XE_GUC_RESPONSE_VF_MIGRATED = 0x107, @@ -40,10 +47,11 @@ enum xe_guc_response_status { XE_GUC_RESPONSE_CTB_NOT_REGISTERED = 0x304, XE_GUC_RESPONSE_CTB_IN_USE = 0x305, XE_GUC_RESPONSE_CTB_INVALID_DESC = 0x306, + XE_GUC_RESPONSE_HW_TIMEOUT = 0x30C, XE_GUC_RESPONSE_CTB_SOURCE_INVALID_DESCRIPTOR = 0x30D, XE_GUC_RESPONSE_CTB_DESTINATION_INVALID_DESCRIPTOR = 0x30E, XE_GUC_RESPONSE_INVALID_CONFIG_STATE = 0x30F, - XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, + XE_GUC_RESPONSE_GENERIC_FAIL = 0xF000, }; enum xe_guc_load_status { diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index d633f1c739e4..0366a9da5977 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -16,6 +16,7 @@ * +===+=======+==============================================================+ * | 0 | 31:16 | **KEY** - KLV key identifier | * | | | - `GuC Self Config KLVs`_ | + * | | | - `GuC Opt In Feature KLVs`_ | * | | | - `GuC VGT Policy KLVs`_ | * | | | - `GuC VF Configuration KLVs`_ | * | | | | @@ -125,6 +126,33 @@ enum { }; /** + * DOC: GuC Opt In Feature KLVs + * + * `GuC KLV`_ keys available for use with OPT_IN_FEATURE_KLV + * + * _`GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE` : 0x4001 + * Adds an extra dword to the XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR G2H + * containing the type of the CAT error. On HW that does not support + * reporting the CAT error type, the extra dword is set to 0xdeadbeef. + * + * _`GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH` : 0x4003 + * This KLV enables the Dynamic Inhibit Context Switch optimization, which + * consists in the GuC setting the CTX_CTRL_INHIBIT_SYN_CTX_SWITCH bit to + * zero in the CTX_CONTEXT_CONTROL register of LRCs that are submitted + * to an oversubscribed engine. This will cause those contexts to be + * switched out immediately if they hit an unsatisfied semaphore wait + * (instead of waiting the full timeslice duration). The bit is instead set + * to one if a single context is queued on the engine, to avoid it being + * switched out if there isn't another context that can run in its place. + */ + +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_KEY 0x4001 +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_LEN 0u + +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003 +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u + +/** * DOC: GuC VGT Policy KLVs * * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. @@ -367,6 +395,7 @@ enum xe_guc_klv_ids { GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a, + GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b, }; #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h deleted file mode 100644 index 21fec9cc837c..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef __INTEL_RPS_H__ -#define __INTEL_RPS_H__ - -#define gen5_rps_irq_handler(x) ({}) - -#endif /* __INTEL_RPS_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index dfec5108d2c3..9b7572e06f34 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -13,7 +13,6 @@ #include <drm/drm_drv.h> #include "i915_utils.h" -#include "intel_runtime_pm.h" #include "xe_device.h" /* for xe_device_has_flat_ccs() */ #include "xe_device_types.h" @@ -22,28 +21,12 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) return container_of(dev, struct drm_i915_private, drm); } +/* compat platform checks only for soc/ usage */ #define IS_PLATFORM(xe, x) ((xe)->info.platform == x) -#define INTEL_INFO(dev_priv) (&((dev_priv)->info)) -#define IS_I830(dev_priv) (dev_priv && 0) -#define IS_I845G(dev_priv) (dev_priv && 0) -#define IS_I85X(dev_priv) (dev_priv && 0) -#define IS_I865G(dev_priv) (dev_priv && 0) #define IS_I915G(dev_priv) (dev_priv && 0) #define IS_I915GM(dev_priv) (dev_priv && 0) -#define IS_I945G(dev_priv) (dev_priv && 0) -#define IS_I945GM(dev_priv) (dev_priv && 0) -#define IS_I965G(dev_priv) (dev_priv && 0) -#define IS_I965GM(dev_priv) (dev_priv && 0) -#define IS_G45(dev_priv) (dev_priv && 0) -#define IS_GM45(dev_priv) (dev_priv && 0) -#define IS_G4X(dev_priv) (dev_priv && 0) #define IS_PINEVIEW(dev_priv) (dev_priv && 0) -#define IS_G33(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0) -#define IS_SANDYBRIDGE(dev_priv) (dev_priv && 0) #define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) -#define IS_IVB_GT1(dev_priv) (dev_priv && 0) #define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) #define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) #define IS_HASWELL(dev_priv) (dev_priv && 0) @@ -71,39 +54,10 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) #define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) #define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0) #define IS_MOBILE(xe) (xe && 0) -#define IS_TIGERLAKE_UY(xe) (xe && 0) -#define IS_COMETLAKE_ULX(xe) (xe && 0) -#define IS_COFFEELAKE_ULX(xe) (xe && 0) -#define IS_KABYLAKE_ULX(xe) (xe && 0) -#define IS_SKYLAKE_ULX(xe) (xe && 0) -#define IS_HASWELL_ULX(xe) (xe && 0) -#define IS_COMETLAKE_ULT(xe) (xe && 0) -#define IS_COFFEELAKE_ULT(xe) (xe && 0) -#define IS_KABYLAKE_ULT(xe) (xe && 0) -#define IS_SKYLAKE_ULT(xe) (xe && 0) - -#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10) -#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11) -#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12) -#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) -#define IS_ICL_WITH_PORT_F(xe) (xe && 0) #define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) - #define HAS_128_BYTE_Y_TILING(xe) (xe || 1) -#ifdef CONFIG_ARM64 -/* - * arm64 indirectly includes linux/rtc.h, - * which defines a irq_lock, so include it - * here before #define-ing it - */ -#include <linux/rtc.h> -#endif - -#define irq_lock irq.lock - #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h index a473aa6697d0..4fcd3bf6b76f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -6,37 +6,6 @@ #ifndef __INTEL_PCODE_H__ #define __INTEL_PCODE_H__ -#include "intel_uncore.h" #include "xe_pcode.h" -static inline int -snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, - int fast_timeout_us, int slow_timeout_ms) -{ - return xe_pcode_write_timeout(__compat_uncore_to_tile(uncore), mbox, val, - slow_timeout_ms ?: 1); -} - -static inline int -snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) -{ - - return xe_pcode_write(__compat_uncore_to_tile(uncore), mbox, val); -} - -static inline int -snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) -{ - return xe_pcode_read(__compat_uncore_to_tile(uncore), mbox, val, val1); -} - -static inline int -skl_pcode_request(struct intel_uncore *uncore, u32 mbox, - u32 request, u32 reply_mask, u32 reply, - int timeout_base_ms) -{ - return xe_pcode_request(__compat_uncore_to_tile(uncore), mbox, request, reply_mask, reply, - timeout_base_ms); -} - #endif /* __INTEL_PCODE_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h deleted file mode 100644 index 274042bff1be..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef __INTEL_RUNTIME_PM_H__ -#define __INTEL_RUNTIME_PM_H__ - -#include "intel_wakeref.h" -#include "xe_device_types.h" -#include "xe_pm.h" - -#define intel_runtime_pm xe_runtime_pm - -static inline void disable_rpm_wakeref_asserts(void *rpm) -{ -} - -static inline void enable_rpm_wakeref_asserts(void *rpm) -{ -} - -static inline bool -intel_runtime_pm_suspended(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - return pm_runtime_suspended(xe->drm.dev); -} - -static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - return xe_pm_runtime_resume_and_get(xe) ? INTEL_WAKEREF_DEF : NULL; -} - -static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - return xe_pm_runtime_get_if_in_use(xe) ? INTEL_WAKEREF_DEF : NULL; -} - -static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - xe_pm_runtime_get_noresume(xe); - - return INTEL_WAKEREF_DEF; -} - -static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm) -{ - struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - - xe_pm_runtime_put(xe); -} - -static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, intel_wakeref_t wakeref) -{ - if (wakeref) - intel_runtime_pm_put_unchecked(pm); -} - -#define intel_runtime_pm_get_raw intel_runtime_pm_get -#define intel_runtime_pm_put_raw intel_runtime_pm_put -#define assert_rpm_wakelock_held(x) do { } while (0) -#define assert_rpm_raw_wakeref_held(x) do { } while (0) - -#define with_intel_runtime_pm(rpm, wf) \ - for ((wf) = intel_runtime_pm_get(rpm); (wf); \ - intel_runtime_pm_put((rpm), (wf)), (wf) = NULL) - -#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 0c1e88e36a1e..d012f02bc84f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -24,13 +24,6 @@ static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncor return xe_root_tile_mmio(xe); } -static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore) -{ - struct xe_device *xe = container_of(uncore, struct xe_device, uncore); - - return xe_device_get_root_tile(xe); -} - static inline u32 intel_uncore_read(struct intel_uncore *uncore, i915_reg_t i915_reg) { @@ -110,12 +103,13 @@ static inline int intel_wait_for_register(struct intel_uncore *uncore, static inline int intel_wait_for_register_fw(struct intel_uncore *uncore, i915_reg_t i915_reg, u32 mask, - u32 value, unsigned int timeout) + u32 value, unsigned int timeout, + u32 *out_value) { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, - timeout * USEC_PER_MSEC, NULL, false); + timeout * USEC_PER_MSEC, out_value, false); } static inline int diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h deleted file mode 100644 index 9c46556d33a4..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#include "../../../i915/soc/intel_pch.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h new file mode 100644 index 000000000000..69e1935e9cdf --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2013-2021 Intel Corporation + */ + +#ifndef _VLV_IOSF_SB_H_ +#define _VLV_IOSF_SB_H_ + +#include <linux/types.h> + +#include "vlv_iosf_sb_reg.h" + +struct drm_device; + +enum vlv_iosf_sb_unit { + VLV_IOSF_SB_BUNIT, + VLV_IOSF_SB_CCK, + VLV_IOSF_SB_CCU, + VLV_IOSF_SB_DPIO, + VLV_IOSF_SB_DPIO_2, + VLV_IOSF_SB_FLISDSI, + VLV_IOSF_SB_GPIO, + VLV_IOSF_SB_NC, + VLV_IOSF_SB_PUNIT, +}; + +static inline void vlv_iosf_sb_get(struct drm_device *drm, unsigned long ports) +{ +} +static inline u32 vlv_iosf_sb_read(struct drm_device *drm, enum vlv_iosf_sb_unit unit, u32 addr) +{ + return 0; +} +static inline int vlv_iosf_sb_write(struct drm_device *drm, enum vlv_iosf_sb_unit unit, u32 addr, u32 val) +{ + return 0; +} +static inline void vlv_iosf_sb_put(struct drm_device *drm, unsigned long ports) +{ +} + +#endif /* _VLV_IOSF_SB_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h index 949f134ce3cf..cb7fa8e794a6 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h @@ -3,4 +3,4 @@ * Copyright © 2023 Intel Corporation */ -#include "../../i915/vlv_sideband_reg.h" +#include "../../i915/vlv_iosf_sb_reg.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h deleted file mode 100644 index ec6f12de5727..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h +++ /dev/null @@ -1,132 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2013-2021 Intel Corporation - */ - -#ifndef _VLV_SIDEBAND_H_ -#define _VLV_SIDEBAND_H_ - -#include <linux/types.h> - -#include "vlv_sideband_reg.h" - -enum pipe; -struct drm_i915_private; - -enum { - VLV_IOSF_SB_BUNIT, - VLV_IOSF_SB_CCK, - VLV_IOSF_SB_CCU, - VLV_IOSF_SB_DPIO, - VLV_IOSF_SB_FLISDSI, - VLV_IOSF_SB_GPIO, - VLV_IOSF_SB_NC, - VLV_IOSF_SB_PUNIT, -}; - -static inline void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports) -{ -} -static inline u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg) -{ - return 0; -} -static inline void vlv_iosf_sb_write(struct drm_i915_private *i915, - u8 port, u32 reg, u32 val) -{ -} -static inline void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports) -{ -} -static inline void vlv_bunit_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_bunit_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_cck_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_cck_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_ccu_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_ccu_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_dpio_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_dpio_read(struct drm_i915_private *i915, int pipe, int reg) -{ - return 0; -} -static inline void vlv_dpio_write(struct drm_i915_private *i915, - int pipe, int reg, u32 val) -{ -} -static inline void vlv_dpio_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_flisdsi_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg) -{ - return 0; -} -static inline void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val) -{ -} -static inline void vlv_flisdsi_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_nc_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) -{ - return 0; -} -static inline void vlv_nc_put(struct drm_i915_private *i915) -{ -} -static inline void vlv_punit_get(struct drm_i915_private *i915) -{ -} -static inline u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) -{ - return 0; -} -static inline int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) -{ - return 0; -} -static inline void vlv_punit_put(struct drm_i915_private *i915) -{ -} - -#endif /* _VLV_SIDEBAND_H_ */ diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index 27437c22bd70..910632f57c3d 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -1,7 +1,12 @@ // SPDX-License-Identifier: MIT /* Copyright © 2024 Intel Corporation */ +#include <drm/drm_cache.h> #include <drm/drm_gem.h> +#include <drm/drm_panic.h> + +#include "intel_fb.h" +#include "intel_display_types.h" #include "xe_bo.h" #include "intel_bo.h" @@ -59,3 +64,89 @@ void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) { /* FIXME */ } + +struct xe_panic_data { + struct page **pages; + int page; + void *vaddr; +}; + +struct xe_framebuffer { + struct intel_framebuffer base; + struct xe_panic_data panic; +}; + +static inline struct xe_panic_data *to_xe_panic_data(struct intel_framebuffer *fb) +{ + return &container_of_const(fb, struct xe_framebuffer, base)->panic; +} + +static void xe_panic_kunmap(struct xe_panic_data *panic) +{ + if (panic->vaddr) { + drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); + kunmap_local(panic->vaddr); + panic->vaddr = NULL; + } +} + +/* + * The scanout buffer pages are not mapped, so for each pixel, + * use kmap_local_page_try_from_panic() to map the page, and write the pixel. + * Try to keep the map from the previous pixel, to avoid too much map/unmap. + */ +static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct xe_panic_data *panic = to_xe_panic_data(fb); + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); + unsigned int new_page; + unsigned int offset; + + if (fb->panic_tiling) + offset = fb->panic_tiling(sb->width, x, y); + else + offset = y * sb->pitch[0] + x * sb->format->cpp[0]; + + new_page = offset >> PAGE_SHIFT; + offset = offset % PAGE_SIZE; + if (new_page != panic->page) { + xe_panic_kunmap(panic); + panic->page = new_page; + panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, + panic->page); + } + if (panic->vaddr) { + u32 *pix = panic->vaddr + offset; + *pix = color; + } +} + +struct intel_framebuffer *intel_bo_alloc_framebuffer(void) +{ + struct xe_framebuffer *xe_fb; + + xe_fb = kzalloc(sizeof(*xe_fb), GFP_KERNEL); + if (xe_fb) + return &xe_fb->base; + return NULL; +} + +int intel_bo_panic_setup(struct drm_scanout_buffer *sb) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct xe_panic_data *panic = to_xe_panic_data(fb); + + panic->page = -1; + sb->set_pixel = xe_panic_page_set_pixel; + return 0; +} + +void intel_bo_panic_finish(struct intel_framebuffer *fb) +{ + struct xe_panic_data *panic = to_xe_panic_data(fb); + + xe_panic_kunmap(panic); + panic->page = -1; +} diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 3a1e505ff182..fba9617a75a5 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -5,6 +5,7 @@ #include <drm/drm_fb_helper.h> +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_fbdev_fb.h" @@ -45,7 +46,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, NULL, size, ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT | XE_BO_FLAG_PINNED); + XE_BO_FLAG_GGTT); if (!IS_ERR(obj)) drm_info(&xe->drm, "Allocated fbdev into stolen\n"); else @@ -56,7 +57,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size, ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_GGTT | XE_BO_FLAG_PINNED); + XE_BO_FLAG_GGTT); } if (IS_ERR(obj)) { @@ -65,7 +66,11 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, goto err; } - fb = intel_framebuffer_create(&obj->ttm.base, &mode_cmd); + fb = intel_framebuffer_create(&obj->ttm.base, + drm_get_format_info(dev, + mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd); if (IS_ERR(fb)) { xe_bo_unpin_map_no_vm(obj); goto err; @@ -79,11 +84,11 @@ err: return ERR_CAST(fb); } -int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, - struct drm_gem_object *_obj, struct i915_vma *vma) +int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, + struct drm_gem_object *_obj, struct i915_vma *vma) { struct xe_bo *obj = gem_to_xe_bo(_obj); - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); if (!(obj->flags & XE_BO_FLAG_SYSTEM)) { if (obj->flags & XE_BO_FLAG_STOLEN) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 0b0aca7a25af..e2e0771cf274 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -20,6 +20,7 @@ #include "intel_audio.h" #include "intel_bw.h" #include "intel_display.h" +#include "intel_display_core.h" #include "intel_display_driver.h" #include "intel_display_irq.h" #include "intel_display_types.h" @@ -38,7 +39,9 @@ static bool has_display(struct xe_device *xe) { - return HAS_DISPLAY(&xe->display); + struct intel_display *display = xe->display; + + return HAS_DISPLAY(display); } /** @@ -46,6 +49,8 @@ static bool has_display(struct xe_device *xe) * early on * @pdev: PCI device * + * Note: This is called before xe or display device creation. + * * Returns: true if probe needs to be deferred, false otherwise */ bool xe_display_driver_probe_defer(struct pci_dev *pdev) @@ -63,6 +68,8 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev) * Set features and function hooks in @driver that are needed for driving the * display IP. This sets the driver's capability of driving display, regardless * if the device has it enabled + * + * Note: This is called before xe or display device creation. */ void xe_display_driver_set_hooks(struct drm_driver *driver) { @@ -81,37 +88,10 @@ static void unset_display_features(struct xe_device *xe) xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); } -static void display_destroy(struct drm_device *dev, void *dummy) -{ - struct xe_device *xe = to_xe_device(dev); - - destroy_workqueue(xe->display.hotplug.dp_wq); -} - -/** - * xe_display_create - create display struct - * @xe: XE device instance - * - * Initialize all fields used by the display part. - * - * TODO: once everything can be inside a single struct, make the struct opaque - * to the rest of xe and return it to be xe->display. - * - * Returns: 0 on success - */ -int xe_display_create(struct xe_device *xe) -{ - spin_lock_init(&xe->display.fb_tracking.lock); - - xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); - - return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); -} - static void xe_display_fini_early(void *arg) { struct xe_device *xe = arg; - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -124,7 +104,7 @@ static void xe_display_fini_early(void *arg) int xe_display_init_early(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; int err; if (!xe->info.probe_display) @@ -133,9 +113,6 @@ int xe_display_init_early(struct xe_device *xe) /* Fake uncore lock */ spin_lock_init(&xe->uncore.lock); - /* This must be called before any calls to HAS_PCH_* */ - intel_detect_pch(xe); - intel_display_driver_early_probe(display); /* Early display init.. */ @@ -145,9 +122,11 @@ int xe_display_init_early(struct xe_device *xe) * Fill the dram structure to get the system dram info. This will be * used for memory latency calculation. */ - intel_dram_detect(xe); + err = intel_dram_detect(xe); + if (err) + goto err_opregion; - intel_bw_init_hw(xe); + intel_bw_init_hw(display); intel_display_device_info_runtime_init(display); @@ -171,9 +150,9 @@ err_opregion: static void xe_display_fini(void *arg) { struct xe_device *xe = arg; - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; - intel_hpd_poll_fini(xe); + intel_hpd_poll_fini(display); intel_hdcp_component_fini(display); intel_audio_deinit(display); intel_display_driver_remove(display); @@ -181,7 +160,7 @@ static void xe_display_fini(void *arg) int xe_display_init(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; int err; if (!xe->info.probe_display) @@ -196,7 +175,7 @@ int xe_display_init(struct xe_device *xe) void xe_display_register(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -207,7 +186,7 @@ void xe_display_register(struct xe_device *xe) void xe_display_unregister(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -220,16 +199,18 @@ void xe_display_unregister(struct xe_device *xe) void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) { + struct intel_display *display = xe->display; + if (!xe->info.probe_display) return; if (master_ctl & DISPLAY_IRQ) - gen11_display_irq_handler(xe); + gen11_display_irq_handler(display); } void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -240,19 +221,23 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) void xe_display_irq_reset(struct xe_device *xe) { + struct intel_display *display = xe->display; + if (!xe->info.probe_display) return; - gen11_display_irq_reset(xe); + gen11_display_irq_reset(display); } void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { + struct intel_display *display = xe->display; + if (!xe->info.probe_display) return; if (gt->info.id == XE_GT0) - gen11_de_irq_postinstall(xe); + gen11_de_irq_postinstall(display); } static bool suspend_to_idle(void) @@ -287,7 +272,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe) static void xe_display_enable_d3cold(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -305,12 +290,12 @@ static void xe_display_enable_d3cold(struct xe_device *xe) intel_dmc_suspend(display); if (has_display(xe)) - intel_hpd_poll_enable(xe); + intel_hpd_poll_enable(display); } static void xe_display_disable_d3cold(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -322,10 +307,10 @@ static void xe_display_disable_d3cold(struct xe_device *xe) intel_display_driver_init_hw(display); - intel_hpd_init(xe); + intel_hpd_init(display); if (has_display(xe)) - intel_hpd_poll_disable(xe); + intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -334,7 +319,7 @@ static void xe_display_disable_d3cold(struct xe_device *xe) void xe_display_pm_suspend(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; bool s2idle = suspend_to_idle(); if (!xe->info.probe_display) @@ -355,11 +340,11 @@ void xe_display_pm_suspend(struct xe_device *xe) xe_display_flush_cleanup_work(xe); - intel_hpd_cancel_work(xe); + intel_hpd_cancel_work(display); if (has_display(xe)) { intel_display_driver_suspend_access(display); - intel_encoder_suspend_all(&xe->display); + intel_encoder_suspend_all(display); } intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); @@ -369,7 +354,7 @@ void xe_display_pm_suspend(struct xe_device *xe) void xe_display_pm_shutdown(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -385,7 +370,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) xe_display_flush_cleanup_work(xe); intel_dp_mst_suspend(display); - intel_hpd_cancel_work(xe); + intel_hpd_cancel_work(display); if (has_display(xe)) intel_display_driver_suspend_access(display); @@ -400,6 +385,8 @@ void xe_display_pm_shutdown(struct xe_device *xe) void xe_display_pm_runtime_suspend(struct xe_device *xe) { + struct intel_display *display = xe->display; + if (!xe->info.probe_display) return; @@ -408,12 +395,12 @@ void xe_display_pm_runtime_suspend(struct xe_device *xe) return; } - intel_hpd_poll_enable(xe); + intel_hpd_poll_enable(display); } void xe_display_pm_suspend_late(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; bool s2idle = suspend_to_idle(); if (!xe->info.probe_display) @@ -424,7 +411,7 @@ void xe_display_pm_suspend_late(struct xe_device *xe) void xe_display_pm_runtime_suspend_late(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -442,7 +429,7 @@ void xe_display_pm_runtime_suspend_late(struct xe_device *xe) void xe_display_pm_shutdown_late(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -457,7 +444,7 @@ void xe_display_pm_shutdown_late(struct xe_device *xe) void xe_display_pm_resume_early(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -467,7 +454,7 @@ void xe_display_pm_resume_early(struct xe_device *xe) void xe_display_pm_resume(struct xe_device *xe) { - struct intel_display *display = &xe->display; + struct intel_display *display = xe->display; if (!xe->info.probe_display) return; @@ -482,7 +469,7 @@ void xe_display_pm_resume(struct xe_device *xe) if (has_display(xe)) intel_display_driver_resume_access(display); - intel_hpd_init(xe); + intel_hpd_init(display); if (has_display(xe)) { intel_display_driver_resume(display); @@ -491,7 +478,7 @@ void xe_display_pm_resume(struct xe_device *xe) } if (has_display(xe)) - intel_hpd_poll_disable(xe); + intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -502,6 +489,8 @@ void xe_display_pm_resume(struct xe_device *xe) void xe_display_pm_runtime_resume(struct xe_device *xe) { + struct intel_display *display = xe->display; + if (!xe->info.probe_display) return; @@ -510,9 +499,9 @@ void xe_display_pm_runtime_resume(struct xe_device *xe) return; } - intel_hpd_init(xe); - intel_hpd_poll_disable(xe); - skl_watermark_ipc_update(xe); + intel_hpd_init(display); + intel_hpd_poll_disable(display); + skl_watermark_ipc_update(display); } @@ -523,6 +512,17 @@ static void display_device_remove(struct drm_device *dev, void *arg) intel_display_device_remove(display); } +/** + * xe_display_probe - probe display and create display struct + * @xe: XE device instance + * + * Initialize all fields used by the display part. + * + * TODO: once everything can be inside a single struct, make the struct opaque + * to the rest of xe and return it to be xe->display. + * + * Returns: 0 on success + */ int xe_display_probe(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -533,11 +533,15 @@ int xe_display_probe(struct xe_device *xe) goto no_display; display = intel_display_device_probe(pdev); + if (IS_ERR(display)) + return PTR_ERR(display); err = drmm_add_action_or_reset(&xe->drm, display_device_remove, display); if (err) return err; + xe->display = display; + if (has_display(xe)) return 0; diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 46e14f8dee28..e533aa4750bc 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -15,8 +15,6 @@ struct drm_driver; bool xe_display_driver_probe_defer(struct pci_dev *pdev); void xe_display_driver_set_hooks(struct drm_driver *driver); -int xe_display_create(struct xe_device *xe); - int xe_display_probe(struct xe_device *xe); int xe_display_init_early(struct xe_device *xe); @@ -46,8 +44,6 @@ static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0 static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { } static inline void xe_display_driver_remove(struct xe_device *xe) {} -static inline int xe_display_create(struct xe_device *xe) { return 0; } - static inline int xe_display_probe(struct xe_device *xe) { return 0; } static inline int xe_display_init_early(struct xe_device *xe) { return 0; } diff --git a/drivers/gpu/drm/xe/display/xe_display_rpm.c b/drivers/gpu/drm/xe/display/xe_display_rpm.c new file mode 100644 index 000000000000..3825376e98cc --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_display_rpm.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include "intel_display_core.h" +#include "intel_display_rpm.h" +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_pm.h" + +static struct xe_device *display_to_xe(struct intel_display *display) +{ + return to_xe_device(display->drm); +} + +struct ref_tracker *intel_display_rpm_get_raw(struct intel_display *display) +{ + return intel_display_rpm_get(display); +} + +void intel_display_rpm_put_raw(struct intel_display *display, struct ref_tracker *wakeref) +{ + intel_display_rpm_put(display, wakeref); +} + +struct ref_tracker *intel_display_rpm_get(struct intel_display *display) +{ + return xe_pm_runtime_resume_and_get(display_to_xe(display)) ? INTEL_WAKEREF_DEF : NULL; +} + +struct ref_tracker *intel_display_rpm_get_if_in_use(struct intel_display *display) +{ + return xe_pm_runtime_get_if_in_use(display_to_xe(display)) ? INTEL_WAKEREF_DEF : NULL; +} + +struct ref_tracker *intel_display_rpm_get_noresume(struct intel_display *display) +{ + xe_pm_runtime_get_noresume(display_to_xe(display)); + + return INTEL_WAKEREF_DEF; +} + +void intel_display_rpm_put(struct intel_display *display, struct ref_tracker *wakeref) +{ + if (wakeref) + xe_pm_runtime_put(display_to_xe(display)); +} + +void intel_display_rpm_put_unchecked(struct intel_display *display) +{ + xe_pm_runtime_put(display_to_xe(display)); +} + +bool intel_display_rpm_suspended(struct intel_display *display) +{ + struct xe_device *xe = display_to_xe(display); + + return pm_runtime_suspended(xe->drm.dev); +} + +void assert_display_rpm_held(struct intel_display *display) +{ + /* FIXME */ +} + +void intel_display_rpm_assert_block(struct intel_display *display) +{ + /* FIXME */ +} + +void intel_display_rpm_assert_unblock(struct intel_display *display) +{ + /* FIXME */ +} diff --git a/drivers/gpu/drm/xe/display/xe_display_rps.c b/drivers/gpu/drm/xe/display/xe_display_rps.c deleted file mode 100644 index fa616f9688a5..000000000000 --- a/drivers/gpu/drm/xe/display/xe_display_rps.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include "intel_display_rps.h" - -void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc, - struct dma_fence *fence) -{ -} - -void intel_display_rps_mark_interactive(struct intel_display *display, - struct intel_atomic_state *state, - bool interactive) -{ -} diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c index 68e3d1959ad6..68d1387d81a0 100644 --- a/drivers/gpu/drm/xe/display/xe_display_wa.c +++ b/drivers/gpu/drm/xe/display/xe_display_wa.c @@ -3,14 +3,16 @@ * Copyright © 2024 Intel Corporation */ +#include "intel_display_core.h" #include "intel_display_wa.h" - #include "xe_device.h" #include "xe_wa.h" #include <generated/xe_wa_oob.h> -bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915) +bool intel_display_needs_wa_16023588340(struct intel_display *display) { - return XE_WA(xe_root_mmio_gt(i915), 16023588340); + struct xe_device *xe = to_xe_device(display->drm); + + return XE_WA(xe_root_mmio_gt(xe), 16023588340); } diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..9f941fc2e36b 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d918ae1c8061..c38fba18effe 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -6,6 +6,7 @@ #include <drm/ttm/ttm_bo.h> #include "i915_vma.h" +#include "intel_display_core.h" #include "intel_display_types.h" #include "intel_dpt.h" #include "intel_fb.h" @@ -23,6 +24,7 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ struct xe_device *xe = xe_bo_device(bo); struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; u32 column, row; + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); /* TODO: Maybe rewrite so we can traverse the bo addresses sequentially, * by writing dpt/ggtt in a different order? @@ -32,10 +34,9 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ u32 src_idx = src_stride * (height - 1) + column + bo_ofs; for (row = 0; row < height; row++) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE]); + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); - iosys_map_wr(map, *dpt_ofs, u64, pte); + iosys_map_wr(map, *dpt_ofs, u64, pte | addr); *dpt_ofs += 8; src_idx -= src_stride; } @@ -55,17 +56,15 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, { struct xe_device *xe = xe_bo_device(bo); struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index) - = ggtt->pt_ops->pte_encode_bo; u32 column, row; + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); for (row = 0; row < height; row++) { u32 src_idx = src_stride * row + bo_ofs; for (column = 0; column < width; column++) { - iosys_map_wr(map, *dpt_ofs, u64, - pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE])); + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); + iosys_map_wr(map, *dpt_ofs, u64, pte | addr); *dpt_ofs += 8; src_idx++; @@ -129,13 +128,13 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, return PTR_ERR(dpt); if (view->type == I915_GTT_VIEW_NORMAL) { + u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); u32 x; for (x = 0; x < size / XE_PAGE_SIZE; x++) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE]); + u64 addr = xe_bo_addr(bo, x * XE_PAGE_SIZE, XE_PAGE_SIZE); - iosys_map_wr(&dpt->vmap, x * 8, u64, pte); + iosys_map_wr(&dpt->vmap, x * 8, u64, pte | addr); } } else if (view->type == I915_GTT_VIEW_REMAPPED) { const struct intel_remapped_info *remap_info = &view->remapped; @@ -164,6 +163,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -173,15 +175,15 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo { struct xe_device *xe = xe_bo_device(bo); u32 column, row; + u64 pte = ggtt->pt_ops->pte_encode_flags(bo, xe->pat.idx[XE_CACHE_NONE]); for (column = 0; column < width; column++) { u32 src_idx = src_stride * (height - 1) + column + bo_ofs; for (row = 0; row < height; row++) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_NONE]); + u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); - ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte | addr); *ggtt_ofs += XE_PAGE_SIZE; src_idx -= src_stride; } @@ -199,14 +201,15 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, struct drm_gem_object *obj = intel_fb_bo(&fb->base); struct xe_bo *bo = gem_to_xe_bo(obj); struct xe_device *xe = to_xe_device(fb->base.dev); - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + struct xe_tile *tile0 = xe_device_get_root_tile(xe); + struct xe_ggtt *ggtt = tile0->mem.ggtt; u32 align; int ret; /* TODO: Consider sharing framebuffer mapping? * embed i915_vma inside intel_framebuffer */ - xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(xe); ret = mutex_lock_interruptible(&ggtt->lock); if (ret) goto out; @@ -215,29 +218,22 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) { - vma->node = bo->ggtt_node[ggtt->tile->id]; + if (bo->ggtt_node[tile0->id] && view->type == I915_GTT_VIEW_NORMAL) { + vma->node = bo->ggtt_node[tile0->id]; } else if (view->type == I915_GTT_VIEW_NORMAL) { - u32 x, size = bo->ttm.base.size; - vma->node = xe_ggtt_node_init(ggtt); if (IS_ERR(vma->node)) { ret = PTR_ERR(vma->node); goto out_unlock; } - ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0); if (ret) { xe_ggtt_node_fini(vma->node); goto out_unlock; } - for (x = 0; x < size; x += XE_PAGE_SIZE) { - u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, - xe->pat.idx[XE_CACHE_NONE]); - - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); - } + xe_ggtt_map_bo(ggtt, vma->node, bo, xe->pat.idx[XE_CACHE_NONE]); } else { u32 i, ggtt_ofs; const struct intel_rotation_info *rot_info = &view->rotated; @@ -271,7 +267,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, out_unlock: mutex_unlock(&ggtt->lock); out: - xe_pm_runtime_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(xe); return ret; } @@ -333,8 +329,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: @@ -348,7 +342,7 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { - u8 tile_id = vma->node->ggtt->tile->id; + u8 tile_id = xe_device_get_root_tile(xe_bo_device(vma->bo))->id; if (!refcount_dec_and_test(&vma->ref)) return; @@ -389,6 +383,7 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, { struct intel_framebuffer *fb = to_intel_framebuffer(new_plane_state->hw.fb); struct xe_device *xe = to_xe_device(fb->base.dev); + struct intel_display *display = xe->display; struct i915_vma *vma; if (old_plane_state->hw.fb == new_plane_state->hw.fb && @@ -399,8 +394,8 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, goto found; } - if (fb == intel_fbdev_framebuffer(xe->display.fbdev.fbdev)) { - vma = intel_fbdev_vma_pointer(xe->display.fbdev.fbdev); + if (fb == intel_fbdev_framebuffer(display->fbdev.fbdev)) { + vma = intel_fbdev_vma_pointer(display->fbdev.fbdev); if (vma) goto found; } @@ -463,3 +458,8 @@ u64 intel_dpt_offset(struct i915_vma *dpt_vma) { return 0; } + +void intel_fb_get_map(struct i915_vma *vma, struct iosys_map *map) +{ + *map = vma->bo->vmap; +} diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 7c02323e9531..30f1073141fc 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -9,7 +9,6 @@ #include "abi/gsc_command_header_abi.h" #include "intel_hdcp_gsc.h" -#include "intel_hdcp_gsc_message.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_device_types.h" @@ -22,7 +21,8 @@ #define HECI_MEADDRESS_HDCP 18 -struct intel_hdcp_gsc_message { +struct intel_hdcp_gsc_context { + struct xe_device *xe; struct xe_bo *hdcp_bo; u64 hdcp_cmd_in; u64 hdcp_cmd_out; @@ -30,14 +30,9 @@ struct intel_hdcp_gsc_message { #define HDCP_GSC_HEADER_SIZE sizeof(struct intel_gsc_mtl_header) -bool intel_hdcp_gsc_cs_required(struct intel_display *display) +bool intel_hdcp_gsc_check_status(struct drm_device *drm) { - return DISPLAY_VER(display) >= 14; -} - -bool intel_hdcp_gsc_check_status(struct intel_display *display) -{ - struct xe_device *xe = to_xe_device(display->drm); + struct xe_device *xe = to_xe_device(drm); struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -69,10 +64,9 @@ out: } /*This function helps allocate memory for the command that we will send to gsc cs */ -static int intel_hdcp_gsc_initialize_message(struct intel_display *display, - struct intel_hdcp_gsc_message *hdcp_message) +static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, + struct intel_hdcp_gsc_context *gsc_context) { - struct xe_device *xe = to_xe_device(display->drm); struct xe_bo *bo = NULL; u64 cmd_in, cmd_out; int ret = 0; @@ -84,113 +78,69 @@ static int intel_hdcp_gsc_initialize_message(struct intel_display *display, XE_BO_FLAG_GGTT); if (IS_ERR(bo)) { - drm_err(display->drm, "Failed to allocate bo for HDCP streaming command!\n"); + drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n"); ret = PTR_ERR(bo); goto out; } cmd_in = xe_bo_ggtt_addr(bo); cmd_out = cmd_in + PAGE_SIZE; - xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); + xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo)); + + gsc_context->hdcp_bo = bo; + gsc_context->hdcp_cmd_in = cmd_in; + gsc_context->hdcp_cmd_out = cmd_out; + gsc_context->xe = xe; - hdcp_message->hdcp_bo = bo; - hdcp_message->hdcp_cmd_in = cmd_in; - hdcp_message->hdcp_cmd_out = cmd_out; out: return ret; } -static int intel_hdcp_gsc_hdcp2_init(struct intel_display *display) +struct intel_hdcp_gsc_context *intel_hdcp_gsc_context_alloc(struct drm_device *drm) { - struct intel_hdcp_gsc_message *hdcp_message; + struct xe_device *xe = to_xe_device(drm); + struct intel_hdcp_gsc_context *gsc_context; int ret; - hdcp_message = kzalloc(sizeof(*hdcp_message), GFP_KERNEL); - - if (!hdcp_message) - return -ENOMEM; + gsc_context = kzalloc(sizeof(*gsc_context), GFP_KERNEL); + if (!gsc_context) + return ERR_PTR(-ENOMEM); /* * NOTE: No need to lock the comp mutex here as it is already * going to be taken before this function called */ - ret = intel_hdcp_gsc_initialize_message(display, hdcp_message); + ret = intel_hdcp_gsc_initialize_message(xe, gsc_context); if (ret) { - drm_err(display->drm, "Could not initialize hdcp_message\n"); - kfree(hdcp_message); - return ret; + drm_err(&xe->drm, "Could not initialize gsc_context\n"); + kfree(gsc_context); + gsc_context = ERR_PTR(ret); } - display->hdcp.hdcp_message = hdcp_message; - return ret; -} - -static const struct i915_hdcp_ops gsc_hdcp_ops = { - .initiate_hdcp2_session = intel_hdcp_gsc_initiate_session, - .verify_receiver_cert_prepare_km = - intel_hdcp_gsc_verify_receiver_cert_prepare_km, - .verify_hprime = intel_hdcp_gsc_verify_hprime, - .store_pairing_info = intel_hdcp_gsc_store_pairing_info, - .initiate_locality_check = intel_hdcp_gsc_initiate_locality_check, - .verify_lprime = intel_hdcp_gsc_verify_lprime, - .get_session_key = intel_hdcp_gsc_get_session_key, - .repeater_check_flow_prepare_ack = - intel_hdcp_gsc_repeater_check_flow_prepare_ack, - .verify_mprime = intel_hdcp_gsc_verify_mprime, - .enable_hdcp_authentication = intel_hdcp_gsc_enable_authentication, - .close_hdcp_session = intel_hdcp_gsc_close_session, -}; - -int intel_hdcp_gsc_init(struct intel_display *display) -{ - struct i915_hdcp_arbiter *data; - int ret; - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - mutex_lock(&display->hdcp.hdcp_mutex); - display->hdcp.arbiter = data; - display->hdcp.arbiter->hdcp_dev = display->drm->dev; - display->hdcp.arbiter->ops = &gsc_hdcp_ops; - ret = intel_hdcp_gsc_hdcp2_init(display); - if (ret) - kfree(data); - - mutex_unlock(&display->hdcp.hdcp_mutex); - - return ret; + return gsc_context; } -void intel_hdcp_gsc_fini(struct intel_display *display) +void intel_hdcp_gsc_context_free(struct intel_hdcp_gsc_context *gsc_context) { - struct intel_hdcp_gsc_message *hdcp_message = - display->hdcp.hdcp_message; - struct i915_hdcp_arbiter *arb = display->hdcp.arbiter; - - if (hdcp_message) { - xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo); - kfree(hdcp_message); - display->hdcp.hdcp_message = NULL; - } + if (!gsc_context) + return; - kfree(arb); - display->hdcp.arbiter = NULL; + xe_bo_unpin_map_no_vm(gsc_context->hdcp_bo); + kfree(gsc_context); } static int xe_gsc_send_sync(struct xe_device *xe, - struct intel_hdcp_gsc_message *hdcp_message, + struct intel_hdcp_gsc_context *gsc_context, u32 msg_size_in, u32 msg_size_out, u32 addr_out_off) { - struct xe_gt *gt = hdcp_message->hdcp_bo->tile->media_gt; - struct iosys_map *map = &hdcp_message->hdcp_bo->vmap; + struct xe_gt *gt = gsc_context->hdcp_bo->tile->media_gt; + struct iosys_map *map = &gsc_context->hdcp_bo->vmap; struct xe_gsc *gsc = >->uc.gsc; int ret; - ret = xe_gsc_pkt_submit_kernel(gsc, hdcp_message->hdcp_cmd_in, msg_size_in, - hdcp_message->hdcp_cmd_out, msg_size_out); + ret = xe_gsc_pkt_submit_kernel(gsc, gsc_context->hdcp_cmd_in, msg_size_in, + gsc_context->hdcp_cmd_out, msg_size_out); if (ret) { drm_err(&xe->drm, "failed to send gsc HDCP msg (%d)\n", ret); return ret; @@ -205,12 +155,12 @@ static int xe_gsc_send_sync(struct xe_device *xe, return ret; } -ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, - size_t msg_in_len, u8 *msg_out, - size_t msg_out_len) +ssize_t intel_hdcp_gsc_msg_send(struct intel_hdcp_gsc_context *gsc_context, + void *msg_in, size_t msg_in_len, + void *msg_out, size_t msg_out_len) { + struct xe_device *xe = gsc_context->xe; const size_t max_msg_size = PAGE_SIZE - HDCP_GSC_HEADER_SIZE; - struct intel_hdcp_gsc_message *hdcp_message; u64 host_session_id; u32 msg_size_in, msg_size_out; u32 addr_out_off, addr_in_wr_off = 0; @@ -223,15 +173,14 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, msg_size_in = msg_in_len + HDCP_GSC_HEADER_SIZE; msg_size_out = msg_out_len + HDCP_GSC_HEADER_SIZE; - hdcp_message = xe->display.hdcp.hdcp_message; addr_out_off = PAGE_SIZE; host_session_id = xe_gsc_create_host_session_id(); xe_pm_runtime_get_noresume(xe); - addr_in_wr_off = xe_gsc_emit_header(xe, &hdcp_message->hdcp_bo->vmap, + addr_in_wr_off = xe_gsc_emit_header(xe, &gsc_context->hdcp_bo->vmap, addr_in_wr_off, HECI_MEADDRESS_HDCP, host_session_id, msg_in_len); - xe_map_memcpy_to(xe, &hdcp_message->hdcp_bo->vmap, addr_in_wr_off, + xe_map_memcpy_to(xe, &gsc_context->hdcp_bo->vmap, addr_in_wr_off, msg_in, msg_in_len); /* * Keep sending request in case the pending bit is set no need to add @@ -240,7 +189,7 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, * 20 times each message 50 ms apart */ do { - ret = xe_gsc_send_sync(xe, hdcp_message, msg_size_in, msg_size_out, + ret = xe_gsc_send_sync(xe, gsc_context, msg_size_in, msg_size_out, addr_out_off); /* Only try again if gsc says so */ @@ -254,7 +203,7 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, if (ret) goto out; - xe_map_memcpy_from(xe, msg_out, &hdcp_message->hdcp_bo->vmap, + xe_map_memcpy_from(xe, msg_out, &gsc_context->hdcp_bo->vmap, addr_out_off + HDCP_GSC_HEADER_SIZE, msg_out_len); diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 4ca0cb571194..dcbc4b2d3fd9 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -10,14 +10,15 @@ #include "xe_ggtt.h" #include "xe_mmio.h" -#include "i915_reg.h" -#include "intel_atomic_plane.h" #include "intel_crtc.h" #include "intel_display.h" +#include "intel_display_core.h" +#include "intel_display_regs.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_frontbuffer.h" +#include "intel_plane.h" #include "intel_plane_initial.h" #include "xe_bo.h" #include "xe_wa.h" @@ -83,16 +84,12 @@ initial_plane_bo(struct xe_device *xe, if (plane_config->size == 0) return NULL; - flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT; + flags = XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT; base = round_down(plane_config->base, page_size); if (IS_DGFX(xe)) { - u64 __iomem *gte = tile0->mem.ggtt->gsm; - u64 pte; + u64 pte = xe_ggtt_read_pte(tile0->mem.ggtt, base); - gte += base / XE_PAGE_SIZE; - - pte = ioread64(gte); if (!(pte & XE_GGTT_PTE_DM)) { drm_err(&xe->drm, "Initial plane programming missing DM bit\n"); @@ -187,7 +184,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; if (intel_framebuffer_init(to_intel_framebuffer(fb), - &bo->ttm.base, &mode_cmd)) { + &bo->ttm.base, fb->format, &mode_cmd)) { drm_dbg_kms(&xe->drm, "intel fb init failed\n"); goto err_bo; } diff --git a/drivers/gpu/drm/xe/display/xe_tdf.c b/drivers/gpu/drm/xe/display/xe_tdf.c index 2a7fccbeb1d5..78bda4c47874 100644 --- a/drivers/gpu/drm/xe/display/xe_tdf.c +++ b/drivers/gpu/drm/xe/display/xe_tdf.c @@ -3,9 +3,9 @@ * Copyright © 2024 Intel Corporation */ -#include "xe_device.h" -#include "intel_display_types.h" +#include "intel_display_core.h" #include "intel_tdf.h" +#include "xe_device.h" void intel_td_flush(struct intel_display *display) { diff --git a/drivers/gpu/drm/xe/instructions/xe_alu_commands.h b/drivers/gpu/drm/xe/instructions/xe_alu_commands.h new file mode 100644 index 000000000000..2987b10d3e16 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_alu_commands.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_ALU_COMMANDS_H_ +#define _XE_ALU_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +/* Instruction Opcodes */ +#define CS_ALU_OPCODE_NOOP 0x000 +#define CS_ALU_OPCODE_FENCE_RD 0x001 +#define CS_ALU_OPCODE_FENCE_WR 0x002 +#define CS_ALU_OPCODE_LOAD 0x080 +#define CS_ALU_OPCODE_LOADINV 0x480 +#define CS_ALU_OPCODE_LOAD0 0x081 +#define CS_ALU_OPCODE_LOAD1 0x481 +#define CS_ALU_OPCODE_LOADIND 0x082 +#define CS_ALU_OPCODE_ADD 0x100 +#define CS_ALU_OPCODE_SUB 0x101 +#define CS_ALU_OPCODE_AND 0x102 +#define CS_ALU_OPCODE_OR 0x103 +#define CS_ALU_OPCODE_XOR 0x104 +#define CS_ALU_OPCODE_SHL 0x105 +#define CS_ALU_OPCODE_SHR 0x106 +#define CS_ALU_OPCODE_SAR 0x107 +#define CS_ALU_OPCODE_STORE 0x180 +#define CS_ALU_OPCODE_STOREINV 0x580 +#define CS_ALU_OPCODE_STOREIND 0x181 + +/* Instruction Operands */ +#define CS_ALU_OPERAND_REG(n) REG_FIELD_PREP(GENMASK(3, 0), (n)) +#define CS_ALU_OPERAND_REG0 0x0 +#define CS_ALU_OPERAND_REG1 0x1 +#define CS_ALU_OPERAND_REG2 0x2 +#define CS_ALU_OPERAND_REG3 0x3 +#define CS_ALU_OPERAND_REG4 0x4 +#define CS_ALU_OPERAND_REG5 0x5 +#define CS_ALU_OPERAND_REG6 0x6 +#define CS_ALU_OPERAND_REG7 0x7 +#define CS_ALU_OPERAND_REG8 0x8 +#define CS_ALU_OPERAND_REG9 0x9 +#define CS_ALU_OPERAND_REG10 0xa +#define CS_ALU_OPERAND_REG11 0xb +#define CS_ALU_OPERAND_REG12 0xc +#define CS_ALU_OPERAND_REG13 0xd +#define CS_ALU_OPERAND_REG14 0xe +#define CS_ALU_OPERAND_REG15 0xf +#define CS_ALU_OPERAND_SRCA 0x20 +#define CS_ALU_OPERAND_SRCB 0x21 +#define CS_ALU_OPERAND_ACCU 0x31 +#define CS_ALU_OPERAND_ZF 0x32 +#define CS_ALU_OPERAND_CF 0x33 +#define CS_ALU_OPERAND_NA 0 /* N/A operand */ + +/* Command Streamer ALU Instructions */ +#define CS_ALU_INSTR(opcode, op1, op2) (REG_FIELD_PREP(GENMASK(31, 20), (opcode)) | \ + REG_FIELD_PREP(GENMASK(19, 10), (op1)) | \ + REG_FIELD_PREP(GENMASK(9, 0), (op2))) + +#define __CS_ALU_INSTR(opcode, op1, op2) CS_ALU_INSTR(CS_ALU_OPCODE_##opcode, \ + CS_ALU_OPERAND_##op1, \ + CS_ALU_OPERAND_##op2) + +#define CS_ALU_INSTR_NOOP __CS_ALU_INSTR(NOOP, NA, NA) +#define CS_ALU_INSTR_LOAD(op1, op2) __CS_ALU_INSTR(LOAD, op1, op2) +#define CS_ALU_INSTR_LOADINV(op1, op2) __CS_ALU_INSTR(LOADINV, op1, op2) +#define CS_ALU_INSTR_LOAD0(op1) __CS_ALU_INSTR(LOAD0, op1, NA) +#define CS_ALU_INSTR_LOAD1(op1) __CS_ALU_INSTR(LOAD1, op1, NA) +#define CS_ALU_INSTR_ADD __CS_ALU_INSTR(ADD, NA, NA) +#define CS_ALU_INSTR_SUB __CS_ALU_INSTR(SUB, NA, NA) +#define CS_ALU_INSTR_AND __CS_ALU_INSTR(AND, NA, NA) +#define CS_ALU_INSTR_OR __CS_ALU_INSTR(OR, NA, NA) +#define CS_ALU_INSTR_XOR __CS_ALU_INSTR(XOR, NA, NA) +#define CS_ALU_INSTR_STORE(op1, op2) __CS_ALU_INSTR(STORE, op1, op2) +#define CS_ALU_INSTR_STOREINV(op1, op2) __CS_ALU_INSTR(STOREINV, op1, op2) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h index 31d28a67ef6a..457881af8af9 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -137,6 +137,7 @@ #define CMD_3DSTATE_CLIP_MESH GFXPIPE_3D_CMD(0x0, 0x81) #define CMD_3DSTATE_SBE_MESH GFXPIPE_3D_CMD(0x0, 0x82) #define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER GFXPIPE_3D_CMD(0x0, 0x83) +#define CMD_3DSTATE_COARSE_PIXEL GFXPIPE_3D_CMD(0x0, 0x89) #define CMD_3DSTATE_DRAWING_RECTANGLE GFXPIPE_3D_CMD(0x1, 0x0) #define CMD_3DSTATE_CHROMA_KEY GFXPIPE_3D_CMD(0x1, 0x4) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 5a47991b4b81..e3f5e8bb3ebc 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -32,6 +32,7 @@ #define MI_BATCH_BUFFER_END __MI_INSTR(0xA) #define MI_TOPOLOGY_FILTER __MI_INSTR(0xD) #define MI_FORCE_WAKEUP __MI_INSTR(0x1D) +#define MI_MATH(n) (__MI_INSTR(0x1A) | XE_INSTR_NUM_DW((n) + 1)) #define MI_STORE_DATA_IMM __MI_INSTR(0x20) #define MI_SDI_GGTT REG_BIT(22) @@ -65,6 +66,10 @@ #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4)) #define MI_LRM_USE_GGTT REG_BIT(22) +#define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3)) +#define MI_LRR_DST_CS_MMIO REG_BIT(19) +#define MI_LRR_SRC_CS_MMIO REG_BIT(18) + #define MI_COPY_MEM_MEM (__MI_INSTR(0x2e) | XE_INSTR_NUM_DW(5)) #define MI_COPY_MEM_MEM_SRC_GGTT REG_BIT(22) #define MI_COPY_MEM_MEM_DST_GGTT REG_BIT(21) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 891f928d80ce..7ade41e2b7b3 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -193,6 +193,10 @@ #define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) #define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0) +#define CS_GPR_DATA(base, n) XE_REG((base) + 0x600 + (n) * 4) +#define CS_GPR_REG(base, n) CS_GPR_DATA((base), (n) * 2) +#define CS_GPR_REG_UDW(base, n) CS_GPR_DATA((base), (n) * 2 + 1) + #define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08) #define CG3DDISHRS_CLKGATE_DIS REG_BIT(5) diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 7702364b65f1..9b66cc972a63 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -16,6 +16,10 @@ #define MTL_GSC_HECI1_BASE 0x00116000 #define MTL_GSC_HECI2_BASE 0x00117000 +#define DG1_GSC_HECI2_BASE 0x00259000 +#define PVC_GSC_HECI2_BASE 0x00285000 +#define DG2_GSC_HECI2_BASE 0x00374000 + #define HECI_H_CSR(base) XE_REG((base) + 0x4) #define HECI_H_CSR_IE REG_BIT(0) #define HECI_H_CSR_IS REG_BIT(1) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 181913967ac9..5cd5ab8529c5 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -62,7 +62,6 @@ #define LE_SSE_MASK REG_GENMASK(18, 17) #define LE_SSE(value) REG_FIELD_PREP(LE_SSE_MASK, value) #define LE_COS_MASK REG_GENMASK(16, 15) -#define LE_COS(value) REG_FIELD_PREP(LE_COS_MASK) #define LE_SCF_MASK REG_BIT(14) #define LE_SCF(value) REG_FIELD_PREP(LE_SCF_MASK, value) #define LE_PFM_MASK REG_GENMASK(13, 11) @@ -393,6 +392,18 @@ #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) +#define LSN_VC_REG2 XE_REG_MCR(0xb0c8) +#define LSN_LNI_WGT_MASK REG_GENMASK(31, 28) +#define LSN_LNI_WGT(value) REG_FIELD_PREP(LSN_LNI_WGT_MASK, value) +#define LSN_LNE_WGT_MASK REG_GENMASK(27, 24) +#define LSN_LNE_WGT(value) REG_FIELD_PREP(LSN_LNE_WGT_MASK, value) +#define LSN_DIM_X_WGT_MASK REG_GENMASK(23, 20) +#define LSN_DIM_X_WGT(value) REG_FIELD_PREP(LSN_DIM_X_WGT_MASK, value) +#define LSN_DIM_Y_WGT_MASK REG_GENMASK(19, 16) +#define LSN_DIM_Y_WGT(value) REG_FIELD_PREP(LSN_DIM_Y_WGT_MASK, value) +#define LSN_DIM_Z_WGT_MASK REG_GENMASK(15, 12) +#define LSN_DIM_Z_WGT(value) REG_FIELD_PREP(LSN_DIM_Z_WGT_MASK, value) + #define L3SQCREG2 XE_REG_MCR(0xb104) #define COMPMEMRD256BOVRFETCHEN REG_BIT(20) diff --git a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h new file mode 100644 index 000000000000..af781c8e4a80 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_REGS_H_ +#define _XE_I2C_REGS_H_ + +#include <linux/pci_regs.h> + +#include "xe_reg_defs.h" +#include "xe_regs.h" + +#define I2C_BRIDGE_OFFSET (SOC_BASE + 0xd9000) +#define I2C_CONFIG_SPACE_OFFSET (SOC_BASE + 0xf6000) +#define I2C_MEM_SPACE_OFFSET (SOC_BASE + 0xf7400) + +#define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164) +#define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168) + +#define I2C_CONFIG_CMD XE_REG(I2C_CONFIG_SPACE_OFFSET + PCI_COMMAND) +#define I2C_CONFIG_PMCSR XE_REG(I2C_CONFIG_SPACE_OFFSET + 0x84) + +#endif /* _XE_I2C_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index f0ecfcac4003..13635e4331d4 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -19,6 +19,7 @@ #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) #define DISPLAY_IRQ REG_BIT(16) +#define I2C_IRQ REG_BIT(12) #define GT_DW_IRQ(x) REG_BIT(x) /* diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 994af591a2e8..1b101edb838b 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -12,9 +12,13 @@ #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_PER_CTX_PTR (0x12 + 1) +#define CTX_CS_INDIRECT_CTX (0x14 + 1) +#define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1) #define CTX_TIMESTAMP (0x22 + 1) #define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) +#define CTX_ACC_CTR_THOLD (0x2a + 1) +#define CTX_ASID (0x2e + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) @@ -36,4 +40,7 @@ #define INDIRECT_CTX_RING_START_UDW (0x08 + 1) #define INDIRECT_CTX_RING_CTL (0x0a + 1) +#define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6) +#define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index f5e5234857c1..ef2bf984723f 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -38,10 +38,11 @@ #define TEMP_MASK REG_GENMASK(7, 0) #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) -#define PKG_PWR_LIM_1 REG_GENMASK(14, 0) -#define PKG_PWR_LIM_1_EN REG_BIT(15) -#define PKG_PWR_LIM_1_TIME REG_GENMASK(23, 17) -#define PKG_PWR_LIM_1_TIME_X REG_GENMASK(23, 22) -#define PKG_PWR_LIM_1_TIME_Y REG_GENMASK(21, 17) +#define PWR_LIM_VAL REG_GENMASK(14, 0) +#define PWR_LIM_EN REG_BIT(15) +#define PWR_LIM REG_GENMASK(15, 0) +#define PWR_LIM_TIME REG_GENMASK(23, 17) +#define PWR_LIM_TIME_X REG_GENMASK(23, 22) +#define PWR_LIM_TIME_Y REG_GENMASK(21, 17) #endif /* _XE_MCHBAR_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index a79ad2da070c..e693a50706f8 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -97,4 +97,7 @@ #define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) #define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) +#define OAM_COMPRESSION_T3_CONTROL XE_REG(0x1c2e00) +#define OAM_LAT_MEASURE_ENABLE REG_BIT(4) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h index 8846eb9ce2a4..fb097607b86c 100644 --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -18,13 +18,10 @@ #define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) #define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) -#define BMG_PACKAGE_POWER_SKU XE_REG(0x138098) -#define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc) -#define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) +#define BMG_FAN_1_SPEED XE_REG(0x138140) +#define BMG_FAN_2_SPEED XE_REG(0x138170) +#define BMG_FAN_3_SPEED XE_REG(0x1381a0) #define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) #define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) -#define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440) -#define BMG_PLATFORM_ENERGY_STATUS XE_REG(0x138458) -#define BMG_PLATFORM_POWER_LIMIT XE_REG(0x138460) #endif /* _XE_PCODE_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index f45abcd96ba8..2995d72c3f78 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -5,11 +5,16 @@ #ifndef _XE_PMT_H_ #define _XE_PMT_H_ -#define SOC_BASE 0x280000 +#include "xe_regs.h" #define BMG_PMT_BASE_OFFSET 0xDB000 #define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) +#define PUNIT_TELEMETRY_GUID XE_REG(BMG_DISCOVERY_OFFSET + 0x4) +#define BMG_ENERGY_STATUS_PMT_OFFSET (0x30) +#define ENERGY_PKG REG_GENMASK64(31, 0) +#define ENERGY_CARD REG_GENMASK64(63, 32) + #define BMG_TELEMETRY_BASE_OFFSET 0xE0000 #define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 3abb17d2ca33..1926b4044314 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,6 +7,8 @@ #include "regs/xe_reg_defs.h" +#define SOC_BASE 0x280000 + #define GU_CNTL_PROTECTED XE_REG(0x10100C) #define DRIVERINT_FLR_DIS REG_BIT(31) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 9fde67ca989f..bb469096d072 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Evict to system. CCS data should be copied. */ - ret = xe_bo_evict(bo, true); + ret = xe_bo_evict(bo); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return ret; @@ -106,7 +106,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Check last CCS value, or at least last value in page. */ - offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); + offset = xe_device_ccs_bytes(tile_to_xe(tile), xe_bo_size(bo)); offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; if (cpu_map[offset] != get_val) { KUNIT_FAIL(test, @@ -252,7 +252,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc for_each_gt(__gt, xe, id) xe_gt_sanitize(__gt); - err = xe_bo_restore_kernel(xe); + err = xe_bo_restore_early(xe); /* * Snapshotting the CTB and copying back a potentially old * version seems risky, depending on what might have been @@ -273,7 +273,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc goto cleanup_all; } - err = xe_bo_restore_user(xe); + err = xe_bo_restore_late(xe); if (err) { KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); goto cleanup_all; @@ -514,9 +514,9 @@ static int shrink_test_run_device(struct xe_device *xe) * other way around, they may not be subject to swapping... */ if (alloced < purgeable) { - xe_ttm_tt_account_subtract(&xe_tt->ttm); + xe_ttm_tt_account_subtract(xe, &xe_tt->ttm); xe_tt->purgeable = true; - xe_ttm_tt_account_add(&xe_tt->ttm); + xe_ttm_tt_account_add(xe, &xe_tt->ttm); bo->ttm.priority = 0; spin_lock(&bo->ttm.bdev->lru_lock); ttm_bo_move_to_lru_tail(&bo->ttm); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index cedd3e88a6fb..c53f67ce4b0a 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -65,7 +65,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, * the exporter and the importer should be the same bo. */ swap(exported->ttm.base.dma_buf, dmabuf); - ret = xe_bo_evict(exported, true); + ret = xe_bo_evict(exported); swap(exported->ttm.base.dma_buf, dmabuf); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c deleted file mode 100644 index b683585db852..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 AND MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include <kunit/test.h> - -#include "xe_device.h" -#include "xe_kunit_helpers.h" -#include "xe_pci_test.h" - -static int pf_service_test_init(struct kunit *test) -{ - struct xe_pci_fake_data fake = { - .sriov_mode = XE_SRIOV_MODE_PF, - .platform = XE_TIGERLAKE, /* some random platform */ - .subplatform = XE_SUBPLATFORM_NONE, - }; - struct xe_device *xe; - struct xe_gt *gt; - - test->priv = &fake; - xe_kunit_helper_xe_device_test_init(test); - - xe = test->priv; - KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); - - gt = xe_device_get_gt(xe, 0); - pf_init_versions(gt); - - /* - * sanity check: - * - all supported platforms VF/PF ABI versions must be defined - * - base version can't be newer than latest - */ - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.latest.major); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor, - gt->sriov.pf.service.version.latest.minor); - - test->priv = gt; - return 0; -} - -static void pf_negotiate_any(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY, - VF2PF_HANDSHAKE_MINOR_ANY, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_base_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor); -} - -static void pf_negotiate_base_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major); - if (major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.base.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor - 1, - &major, &minor)); -} - -static void pf_negotiate_base_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major - 1, 1, - &major, &minor)); -} - -static void pf_negotiate_latest_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.latest.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor - 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1); -} - -static void pf_negotiate_latest_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - kunit_skip(test, "no prev major"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major - 1, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); -} - -static struct kunit_case pf_service_test_cases[] = { - KUNIT_CASE(pf_negotiate_any), - KUNIT_CASE(pf_negotiate_base_match), - KUNIT_CASE(pf_negotiate_base_newer), - KUNIT_CASE(pf_negotiate_base_next), - KUNIT_CASE(pf_negotiate_base_older), - KUNIT_CASE(pf_negotiate_base_prev), - KUNIT_CASE(pf_negotiate_latest_match), - KUNIT_CASE(pf_negotiate_latest_newer), - KUNIT_CASE(pf_negotiate_latest_next), - KUNIT_CASE(pf_negotiate_latest_older), - KUNIT_CASE(pf_negotiate_latest_prev), - {} -}; - -static struct kunit_suite pf_service_suite = { - .name = "pf_service", - .test_cases = pf_service_test_cases, - .init = pf_service_test_init, -}; - -kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c index 6faffcd74869..d266882adc0e 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -32,7 +32,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * bo->tile = tile; bo->ttm.bdev = &xe->ttm; - bo->size = size; + bo->ttm.base.size = size; iosys_map_set_vaddr(&bo->vmap, buf); if (flags & XE_BO_FLAG_GGTT) { @@ -42,10 +42,8 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo->ggtt_node[tile->id]); KUNIT_ASSERT_EQ(test, 0, - drm_mm_insert_node_in_range(&ggtt->mm, - &bo->ggtt_node[tile->id]->base, - bo->size, SZ_4K, - 0, 0, U64_MAX, 0)); + xe_ggtt_node_insert(bo->ggtt_node[tile->id], + xe_bo_size(bo), SZ_4K)); } return bo; @@ -67,8 +65,9 @@ static int guc_buf_test_init(struct kunit *test) ggtt = xe_device_get_root_tile(test->priv)->mem.ggtt; guc = &xe_device_get_gt(test->priv, 0)->uc.guc; - drm_mm_init(&ggtt->mm, DUT_GGTT_START, DUT_GGTT_SIZE); - mutex_init(&ggtt->lock); + KUNIT_ASSERT_EQ(test, 0, + xe_ggtt_init_kunit(ggtt, DUT_GGTT_START, + DUT_GGTT_START + DUT_GGTT_SIZE)); kunit_activate_static_stub(test, xe_managed_bo_create_pin_map, replacement_xe_managed_bo_create_pin_map); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index d5fe0ea889ad..edd1e701aa1c 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -74,13 +74,13 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, { struct xe_device *xe = tile_to_xe(m->tile); u64 retval, expected = 0; - bool big = bo->size >= SZ_2M; + bool big = xe_bo_size(bo) >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; int err; struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL, - bo->size, + xe_bo_size(bo), ttm_bo_type_kernel, region | XE_BO_FLAG_NEEDS_CPU_ACCESS | @@ -105,7 +105,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, goto out_unlock; } - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); fence = xe_migrate_clear(m, remote, remote->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : @@ -113,15 +113,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "remote first offset should be cleared", test); - retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(remote) - 8, u64); check(retval, expected, "remote last offset should be cleared", test); } dma_fence_put(fence); /* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */ - xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xc0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xd0, xe_bo_size(bo)); expected = 0xc0c0c0c0c0c0c0c0; fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, @@ -131,15 +131,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &bo->vmap, 0, u64); check(retval, expected, "remote -> vram bo first offset should be copied", test); - retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &bo->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "remote -> vram bo offset should be copied", test); } dma_fence_put(fence); /* And other way around.. slightly hacky.. */ - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xc0, xe_bo_size(bo)); fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, remote->ttm.resource, false); @@ -148,7 +148,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "vram -> remote bo first offset should be copied", test); - retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "vram -> remote bo last offset should be copied", test); } @@ -202,8 +202,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_VRAM_IF_DGFX(tile)); if (IS_ERR(big)) { KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); goto vunmap; @@ -211,8 +210,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_VRAM_IF_DGFX(tile)); if (IS_ERR(pt)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", PTR_ERR(pt)); @@ -222,8 +220,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, 2 * SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_VRAM_IF_DGFX(tile)); if (IS_ERR(tiny)) { KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", PTR_ERR(tiny)); @@ -248,9 +245,9 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) if (m->q->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) - xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + xe_res_first(pt->ttm.resource, 0, xe_bo_size(pt), &src_it); else - xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); + xe_res_first_sg(xe_bo_sg(pt), 0, xe_bo_size(pt), &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, &src_it, XE_PAGE_SIZE, pt->ttm.resource); @@ -279,7 +276,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a small bo */ kunit_info(test, "Clearing small buffer object\n"); - xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); + xe_map_memset(xe, &tiny->vmap, 0, 0x22, xe_bo_size(tiny)); expected = 0; fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -289,7 +286,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &tiny->vmap, 0, u32); check(retval, expected, "Command clear small first value", test); - retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); + retval = xe_map_rd(xe, &tiny->vmap, xe_bo_size(tiny) - 4, u32); check(retval, expected, "Command clear small last value", test); kunit_info(test, "Copying small buffer object to system\n"); @@ -301,7 +298,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); - xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); + xe_map_memset(xe, &big->vmap, 0, 0x11, xe_bo_size(big)); expected = 0; fence = xe_migrate_clear(m, big, big->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -311,7 +308,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &big->vmap, 0, u32); check(retval, expected, "Command clear big first value", test); - retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); + retval = xe_map_rd(xe, &big->vmap, xe_bo_size(big) - 4, u32); check(retval, expected, "Command clear big last value", test); kunit_info(test, "Copying big buffer object to system\n"); @@ -373,7 +370,7 @@ static struct dma_fence *blt_copy(struct xe_tile *tile, struct xe_migrate *m = tile->migrate; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it; struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource; u64 src_L0_ofs, dst_L0_ofs; @@ -501,7 +498,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, long ret; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -512,7 +509,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, dma_fence_put(fence); kunit_info(test, "Evict vram buffer object\n"); - ret = xe_bo_evict(vram_bo, true); + ret = xe_bo_evict(vram_bo); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return; @@ -526,7 +523,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Clear evicted vram data first value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Clear evicted vram data last value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -535,7 +532,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -565,7 +562,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Restored value must be equal to initial value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Restored value must be equal to initial value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -573,7 +570,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -586,7 +583,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, u64 expected, retval; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -600,7 +597,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); } dma_fence_put(fence); @@ -618,7 +615,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear main buffer first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear main buffer last value", test); } dma_fence_put(fence); @@ -628,7 +625,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear ccs data last value", test); } dma_fence_put(fence); diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 1d3e2e50c355..9c715e59f030 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,49 +12,79 @@ #include <kunit/test-bug.h> #include <kunit/visibility.h> +static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) +{ + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s", + param->verx100 / 100, param->verx100 % 100, param->name); +} + +KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); +KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); + +static void xe_pci_id_kunit_desc(const struct pci_device_id *param, char *desc) +{ + const struct xe_device_desc *dev_desc = + (const struct xe_device_desc *)param->driver_data; + + if (dev_desc) + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "0x%X (%s)", + param->device, dev_desc->platform_name); +} + +KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); + /** - * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs - * @xe_fn: Function to call for each device. + * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * - * This function iterates over the descriptors for all graphics IPs recognized - * by the driver and calls @xe_fn: for each one of them. + * This function prepares struct xe_ip parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters */ -void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn) +const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc) { - const struct xe_graphics_desc *desc, *last = NULL; - - for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) { - desc = graphics_ips[i].desc; - if (desc == last) - continue; - - xe_fn(desc); - last = desc; - } + return graphics_ip_gen_params(prev, desc); } -EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip); +EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); /** - * xe_call_for_each_media_ip - Iterate over all recognized media IPs - * @xe_fn: Function to call for each device. + * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct xe_ip parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. * - * This function iterates over the descriptors for all media IPs recognized - * by the driver and calls @xe_fn: for each one of them. + * Return: pointer to the next parameter or NULL if no more parameters */ -void xe_call_for_each_media_ip(xe_media_fn xe_fn) +const void *xe_pci_media_ip_gen_param(const void *prev, char *desc) { - const struct xe_media_desc *desc, *last = NULL; + return media_ip_gen_params(prev, desc); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); - for (int i = 0; i < ARRAY_SIZE(media_ips); i++) { - desc = media_ips[i].desc; - if (desc == last) - continue; +/** + * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct pci_device_id parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_id_gen_param(const void *prev, char *desc) +{ + const struct pci_device_id *pci = pci_id_gen_params(prev, desc); - xe_fn(desc); - last = desc; - } + return pci->driver_data ? pci : NULL; } -EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip); +EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param); static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 744a37583d2d..37b344df2dc3 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -14,9 +14,10 @@ #include "xe_pci_test.h" #include "xe_pci_types.h" -static void check_graphics_ip(const struct xe_graphics_desc *graphics) +static void check_graphics_ip(struct kunit *test) { - struct kunit *test = kunit_get_current_test(); + const struct xe_ip *param = test->param_value; + const struct xe_graphics_desc *graphics = param->desc; u64 mask = graphics->hw_engine_mask; /* RCS, CCS, and BCS engines are allowed on the graphics IP */ @@ -28,9 +29,10 @@ static void check_graphics_ip(const struct xe_graphics_desc *graphics) KUNIT_ASSERT_EQ(test, mask, 0); } -static void check_media_ip(const struct xe_media_desc *media) +static void check_media_ip(struct kunit *test) { - struct kunit *test = kunit_get_current_test(); + const struct xe_ip *param = test->param_value; + const struct xe_media_desc *media = param->desc; u64 mask = media->hw_engine_mask; /* VCS, VECS and GSCCS engines are allowed on the media IP */ @@ -42,19 +44,21 @@ static void check_media_ip(const struct xe_media_desc *media) KUNIT_ASSERT_EQ(test, mask, 0); } -static void xe_gmdid_graphics_ip(struct kunit *test) +static void check_platform_gt_count(struct kunit *test) { - xe_call_for_each_graphics_ip(check_graphics_ip); -} + const struct pci_device_id *pci = test->param_value; + const struct xe_device_desc *desc = + (const struct xe_device_desc *)pci->driver_data; + int max_gt = desc->max_gt_per_tile; -static void xe_gmdid_media_ip(struct kunit *test) -{ - xe_call_for_each_media_ip(check_media_ip); + KUNIT_ASSERT_GT(test, max_gt, 0); + KUNIT_ASSERT_LE(test, max_gt, XE_MAX_GT_PER_TILE); } static struct kunit_case xe_pci_tests[] = { - KUNIT_CASE(xe_gmdid_graphics_ip), - KUNIT_CASE(xe_gmdid_media_ip), + KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), + KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), + KUNIT_CASE_PARAM(check_platform_gt_count, xe_pci_id_gen_param), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index ede46800aff1..ce4d2b86b778 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -12,15 +12,6 @@ #include "xe_sriov_types.h" struct xe_device; -struct xe_graphics_desc; -struct xe_media_desc; - -typedef int (*xe_device_fn)(struct xe_device *); -typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); -typedef void (*xe_media_fn)(const struct xe_media_desc *); - -void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); -void xe_call_for_each_media_ip(xe_media_fn xe_fn); struct xe_pci_fake_data { enum xe_sriov_mode sriov_mode; @@ -34,6 +25,9 @@ struct xe_pci_fake_data { int xe_pci_fake_device_init(struct xe_device *xe); +const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc); +const void *xe_pci_media_ip_gen_param(const void *prev, char *desc); +const void *xe_pci_id_gen_param(const void *prev, char *desc); const void *xe_pci_live_device_gen_param(const void *prev, char *desc); #endif diff --git a/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c new file mode 100644 index 000000000000..ba95e29b597d --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024-2025 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +static int pf_service_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + xe_sriov_pf_service_init(xe); + /* + * sanity check: + * - all supported platforms VF/PF ABI versions must be defined + * - base version can't be newer than latest + */ + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.latest.major); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.minor, + xe->sriov.pf.service.version.latest.minor); + return 0; +} + +static void pf_negotiate_any(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, VF2PF_HANDSHAKE_MAJOR_ANY, + VF2PF_HANDSHAKE_MINOR_ANY, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_base_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.base.minor); +} + +static void pf_negotiate_base_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_GE(test, minor, xe->sriov.pf.service.version.base.minor); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_LE(test, major, xe->sriov.pf.service.version.latest.major); + if (major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.base.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor - 1, + &major, &minor)); +} + +static void pf_negotiate_base_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major - 1, 1, + &major, &minor)); +} + +static void pf_negotiate_latest_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.latest.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor - 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor - 1); +} + +static void pf_negotiate_latest_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + kunit_skip(test, "no prev major"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major - 1, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major - 1); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); +} + +static struct kunit_case pf_service_test_cases[] = { + KUNIT_CASE(pf_negotiate_any), + KUNIT_CASE(pf_negotiate_base_match), + KUNIT_CASE(pf_negotiate_base_newer), + KUNIT_CASE(pf_negotiate_base_next), + KUNIT_CASE(pf_negotiate_base_older), + KUNIT_CASE(pf_negotiate_base_prev), + KUNIT_CASE(pf_negotiate_latest_match), + KUNIT_CASE(pf_negotiate_latest_newer), + KUNIT_CASE(pf_negotiate_latest_next), + KUNIT_CASE(pf_negotiate_latest_older), + KUNIT_CASE(pf_negotiate_latest_prev), + {} +}; + +static struct kunit_suite pf_service_suite = { + .name = "pf_service", + .test_cases = pf_service_test_cases, + .init = pf_service_test_init, +}; + +kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 9570672fce33..5ce0e26822f2 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -19,7 +19,7 @@ static int bb_prefetch(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt)) /* * RCS and CCS require 1K, although other engines would be * okay with 512. diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index fafacd73dcc3..b5cc65506696 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -14,7 +14,7 @@ struct xe_gt; struct xe_exec_queue; struct xe_sched_job; -struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 64f9c936eea0..18f27da47a36 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -19,6 +19,8 @@ #include <kunit/static_stub.h> +#include <trace/events/gpu_mem.h> + #include "xe_device.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" @@ -55,6 +57,8 @@ static struct ttm_placement sys_placement = { .placement = &sys_placement_flags, }; +static struct ttm_placement purge_placement; + static const struct ttm_place tt_placement_flags[] = { { .fpfn = 0, @@ -189,11 +193,18 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, static bool force_contiguous(u32 bo_flags) { + if (bo_flags & XE_BO_FLAG_STOLEN) + return true; /* users expect this */ + else if (bo_flags & XE_BO_FLAG_PINNED && + !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) + return true; /* needs vmap */ + /* * For eviction / restore on suspend / resume objects pinned in VRAM * must be contiguous, also only contiguous BOs support xe_bo_vmap. */ - return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); + return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS && + bo_flags & XE_BO_FLAG_PINNED; } static void add_vram(struct xe_device *xe, struct xe_bo *bo, @@ -281,6 +292,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, static void xe_evict_flags(struct ttm_buffer_object *tbo, struct ttm_placement *placement) { + struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm); + bool device_unplugged = drm_dev_is_unplugged(&xe->drm); struct xe_bo *bo; if (!xe_bo_is_xe_bo(tbo)) { @@ -290,7 +303,7 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, return; } - *placement = sys_placement; + *placement = device_unplugged ? purge_placement : sys_placement; return; } @@ -300,6 +313,11 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, return; } + if (device_unplugged && !tbo->base.dma_buf) { + *placement = purge_placement; + return; + } + /* * For xe, sg bos that are evicted to system just triggers a * rebind of the sg list upon subsequent validation to XE_PL_TT. @@ -320,15 +338,13 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, /* struct xe_ttm_tt - Subclassed ttm_tt for xe */ struct xe_ttm_tt { struct ttm_tt ttm; - /** @xe - The xe device */ - struct xe_device *xe; struct sg_table sgt; struct sg_table *sg; /** @purgeable: Whether the content of the pages of @ttm is purgeable. */ bool purgeable; }; -static int xe_tt_map_sg(struct ttm_tt *tt) +static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); unsigned long num_pages = tt->num_pages; @@ -343,13 +359,13 @@ static int xe_tt_map_sg(struct ttm_tt *tt) ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages, num_pages, 0, (u64)num_pages << PAGE_SHIFT, - xe_sg_segment_size(xe_tt->xe->drm.dev), + xe_sg_segment_size(xe->drm.dev), GFP_KERNEL); if (ret) return ret; xe_tt->sg = &xe_tt->sgt; - ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, + ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); if (ret) { sg_free_table(xe_tt->sg); @@ -360,12 +376,12 @@ static int xe_tt_map_sg(struct ttm_tt *tt) return 0; } -static void xe_tt_unmap_sg(struct ttm_tt *tt) +static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); if (xe_tt->sg) { - dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, + dma_unmap_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL, 0); sg_free_table(xe_tt->sg); xe_tt->sg = NULL; @@ -384,24 +400,37 @@ struct sg_table *xe_bo_sg(struct xe_bo *bo) * Account ttm pages against the device shrinker's shrinkable and * purgeable counts. */ -static void xe_ttm_tt_account_add(struct ttm_tt *tt) +static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); if (xe_tt->purgeable) - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, tt->num_pages); + xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages); else - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, tt->num_pages, 0); + xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0); } -static void xe_ttm_tt_account_subtract(struct ttm_tt *tt) +static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt) { struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); if (xe_tt->purgeable) - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, -(long)tt->num_pages); + xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages); else - xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -(long)tt->num_pages, 0); + xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0); +} + +static void update_global_total_pages(struct ttm_device *ttm_dev, + long num_pages) +{ +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + struct xe_device *xe = ttm_to_xe_device(ttm_dev); + u64 global_total_pages = + atomic64_add_return(num_pages, &xe->global_total_pages); + + trace_gpu_mem_total(xe->drm.primary->index, 0, + global_total_pages << PAGE_SHIFT); +#endif } static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, @@ -420,11 +449,10 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, return NULL; tt = &xe_tt->ttm; - xe_tt->xe = xe; extra_pages = 0; if (xe_bo_needs_ccs_pages(bo)) - extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), + extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)), PAGE_SIZE); /* @@ -511,21 +539,25 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, return err; xe_tt->purgeable = false; - xe_ttm_tt_account_add(tt); + xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt); + update_global_total_pages(ttm_dev, tt->num_pages); return 0; } static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) { + struct xe_device *xe = ttm_to_xe_device(ttm_dev); + if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) && !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE)) return; - xe_tt_unmap_sg(tt); + xe_tt_unmap_sg(xe, tt); ttm_pool_free(&ttm_dev->pool, tt); - xe_ttm_tt_account_subtract(tt); + xe_ttm_tt_account_subtract(xe, tt); + update_global_total_pages(ttm_dev, -(long)tt->num_pages); } static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) @@ -657,11 +689,20 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo, struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm); struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + bool device_unplugged = drm_dev_is_unplugged(&xe->drm); struct sg_table *sg; xe_assert(xe, attach); xe_assert(xe, ttm_bo->ttm); + if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM && + ttm_bo->sg) { + dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + } + if (new_res->mem_type == XE_PL_SYSTEM) goto out; @@ -764,7 +805,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { if (new_mem->mem_type == XE_PL_TT) - ret = xe_tt_map_sg(ttm); + ret = xe_tt_map_sg(xe, ttm); if (!ret) ttm_bo_move_null(ttm_bo, new_mem); goto out; @@ -787,7 +828,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, (!ttm && ttm_bo->type == ttm_bo_type_device); if (new_mem->mem_type == XE_PL_TT) { - ret = xe_tt_map_sg(ttm); + ret = xe_tt_map_sg(xe, ttm); if (ret) goto out; } @@ -816,21 +857,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - /* Reject BO eviction if BO is bound to current VM. */ - if (evict && ctx->resv) { - struct drm_gpuvm_bo *vm_bo; - - drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) { - struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); - - if (xe_vm_resv(vm) == ctx->resv && - xe_vm_in_preempt_fence_mode(vm)) { - ret = -EBUSY; - goto out; - } - } - } - /* * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. @@ -898,79 +924,44 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_pm_runtime_get_noresume(xe); } - if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { - /* - * Kernel memory that is pinned should only be moved on suspend - * / resume, some of the pinned memory is required for the - * device to resume / use the GPU to move other evicted memory - * (user memory) around. This likely could be optimized a bit - * further where we find the minimum set of pinned memory - * required for resume but for simplity doing a memcpy for all - * pinned memory. - */ - ret = xe_bo_vmap(bo); - if (!ret) { - ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem); - - /* Create a new VMAP once kernel BO back in VRAM */ - if (!ret && resource_is_vram(new_mem)) { - struct xe_vram_region *vram = res_to_mem_region(new_mem); - void __iomem *new_addr = vram->mapping + - (new_mem->start << PAGE_SHIFT); + if (move_lacks_source) { + u32 flags = 0; - if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { - ret = -EINVAL; - xe_pm_runtime_put(xe); - goto out; - } + if (mem_type_is_vram(new_mem->mem_type)) + flags |= XE_MIGRATE_CLEAR_FLAG_FULL; + else if (handle_system_ccs) + flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; - xe_assert(xe, new_mem->start == - bo->placements->fpfn); - - iosys_map_set_vaddr_iomem(&bo->vmap, new_addr); - } - } + fence = xe_migrate_clear(migrate, bo, new_mem, flags); } else { - if (move_lacks_source) { - u32 flags = 0; - - if (mem_type_is_vram(new_mem->mem_type)) - flags |= XE_MIGRATE_CLEAR_FLAG_FULL; - else if (handle_system_ccs) - flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; - - fence = xe_migrate_clear(migrate, bo, new_mem, flags); - } - else - fence = xe_migrate_copy(migrate, bo, bo, old_mem, - new_mem, handle_system_ccs); - if (IS_ERR(fence)) { - ret = PTR_ERR(fence); - xe_pm_runtime_put(xe); - goto out; - } - if (!move_lacks_source) { - ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, - true, new_mem); - if (ret) { - dma_fence_wait(fence, false); - ttm_bo_move_null(ttm_bo, new_mem); - ret = 0; - } - } else { - /* - * ttm_bo_move_accel_cleanup() may blow up if - * bo->resource == NULL, so just attach the - * fence and set the new resource. - */ - dma_resv_add_fence(ttm_bo->base.resv, fence, - DMA_RESV_USAGE_KERNEL); + fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem, + handle_system_ccs); + } + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + xe_pm_runtime_put(xe); + goto out; + } + if (!move_lacks_source) { + ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true, + new_mem); + if (ret) { + dma_fence_wait(fence, false); ttm_bo_move_null(ttm_bo, new_mem); + ret = 0; } - - dma_fence_put(fence); + } else { + /* + * ttm_bo_move_accel_cleanup() may blow up if + * bo->resource == NULL, so just attach the + * fence and set the new resource. + */ + dma_resv_add_fence(ttm_bo->base.resv, fence, + DMA_RESV_USAGE_KERNEL); + ttm_bo_move_null(ttm_bo, new_mem); } + dma_fence_put(fence); xe_pm_runtime_put(xe); out: @@ -983,7 +974,7 @@ out: if (timeout < 0) ret = timeout; - xe_tt_unmap_sg(ttm_bo->ttm); + xe_tt_unmap_sg(xe, ttm_bo->ttm); } return ret; @@ -993,6 +984,7 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, unsigned long *scanned) { + struct xe_device *xe = ttm_to_xe_device(bo->bdev); long lret; /* Fake move to system, without copying data. */ @@ -1007,7 +999,7 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, if (lret) return lret; - xe_tt_unmap_sg(bo->ttm); + xe_tt_unmap_sg(xe, bo->ttm); ttm_bo_move_null(bo, new_resource); } @@ -1018,11 +1010,30 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, .allow_move = false}); if (lret > 0) - xe_ttm_tt_account_subtract(bo->ttm); + xe_ttm_tt_account_subtract(xe, bo->ttm); return lret; } +static bool +xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) +{ + struct drm_gpuvm_bo *vm_bo; + + if (!ttm_bo_eviction_valuable(bo, place)) + return false; + + if (!xe_bo_is_xe_bo(bo)) + return true; + + drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) { + if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm))) + return false; + } + + return true; +} + /** * xe_bo_shrink() - Try to shrink an xe bo. * @ctx: The struct ttm_operation_ctx used for shrinking. @@ -1049,7 +1060,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); struct ttm_place place = {.mem_type = bo->resource->mem_type}; struct xe_bo *xe_bo = ttm_to_xe_bo(bo); - struct xe_device *xe = xe_tt->xe; + struct xe_device *xe = ttm_to_xe_device(bo->bdev); bool needs_rpm; long lret = 0L; @@ -1057,7 +1068,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, (flags.purge && !xe_tt->purgeable)) return -EBUSY; - if (!ttm_bo_eviction_valuable(bo, &place)) + if (!xe_bo_eviction_valuable(bo, &place)) return -EBUSY; if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo)) @@ -1086,7 +1097,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, xe_pm_runtime_put(xe); if (lret > 0) - xe_ttm_tt_account_subtract(tt); + xe_ttm_tt_account_subtract(xe, tt); out_unref: xe_bo_put(xe_bo); @@ -1095,6 +1106,80 @@ out_unref: } /** + * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed + * up in system memory. + * @bo: The buffer object to prepare. + * + * On successful completion, the object backup pages are allocated. Expectation + * is that this is called from the PM notifier, prior to suspend/hibernation. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) +{ + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup; + int ret = 0; + + xe_bo_lock(bo, false); + + xe_assert(xe, !bo->backup_obj); + + /* + * Since this is called from the PM notifier we might have raced with + * someone unpinning this after we dropped the pinned list lock and + * grabbing the above bo lock. + */ + if (!xe_bo_is_pinned(bo)) + goto out_unlock_bo; + + if (!xe_bo_is_vram(bo)) + goto out_unlock_bo; + + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + goto out_unlock_bo; + + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); + if (IS_ERR(backup)) { + ret = PTR_ERR(backup); + goto out_unlock_bo; + } + + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + ttm_bo_pin(&backup->ttm); + bo->backup_obj = backup; + +out_unlock_bo: + xe_bo_unlock(bo); + return ret; +} + +/** + * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation. + * @bo: The buffer object to undo the prepare for. + * + * Always returns 0. The backup object is removed, if still present. Expectation + * it that this called from the PM notifier when undoing the prepare step. + * + * Return: Always returns 0. + */ +int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo) +{ + xe_bo_lock(bo, false); + if (bo->backup_obj) { + ttm_bo_unpin(&bo->backup_obj->ttm); + xe_bo_put(bo->backup_obj); + bo->backup_obj = NULL; + } + xe_bo_unlock(bo); + + return 0; +} + +/** * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory * @bo: The buffer object to move. * @@ -1107,59 +1192,100 @@ out_unref: */ int xe_bo_evict_pinned(struct xe_bo *bo) { - struct ttm_place place = { - .mem_type = XE_PL_TT, - }; - struct ttm_placement placement = { - .placement = &place, - .num_placement = 1, - }; - struct ttm_operation_ctx ctx = { - .interruptible = false, - .gfp_retry_mayfail = true, - }; - struct ttm_resource *new_mem; - int ret; + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup = bo->backup_obj; + bool backup_created = false; + bool unmap = false; + int ret = 0; - xe_bo_assert_held(bo); + xe_bo_lock(bo, false); - if (WARN_ON(!bo->ttm.resource)) - return -EINVAL; + if (WARN_ON(!bo->ttm.resource)) { + ret = -EINVAL; + goto out_unlock_bo; + } - if (WARN_ON(!xe_bo_is_pinned(bo))) - return -EINVAL; + if (WARN_ON(!xe_bo_is_pinned(bo))) { + ret = -EINVAL; + goto out_unlock_bo; + } if (!xe_bo_is_vram(bo)) - return 0; + goto out_unlock_bo; + + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + goto out_unlock_bo; + + if (!backup) { + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, + NULL, xe_bo_size(bo), + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); + if (IS_ERR(backup)) { + ret = PTR_ERR(backup); + goto out_unlock_bo; + } + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + backup_created = true; + } - ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx); - if (ret) - return ret; + if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { + struct xe_migrate *migrate; + struct dma_fence *fence; + + if (bo->tile) + migrate = bo->tile->migrate; + else + migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); - if (!bo->ttm.ttm) { - bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0); - if (!bo->ttm.ttm) { - ret = -ENOMEM; - goto err_res_free; + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto out_backup; + + ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); + if (ret) + goto out_backup; + + fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource, + backup->ttm.resource, false); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto out_backup; } - } - ret = ttm_bo_populate(&bo->ttm, &ctx); - if (ret) - goto err_res_free; + dma_resv_add_fence(bo->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_resv_add_fence(backup->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); + } else { + ret = xe_bo_vmap(backup); + if (ret) + goto out_backup; - ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); - if (ret) - goto err_res_free; + if (iosys_map_is_null(&bo->vmap)) { + ret = xe_bo_vmap(bo); + if (ret) + goto out_backup; + unmap = true; + } - ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); - if (ret) - goto err_res_free; + xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, + xe_bo_size(bo)); + } - return 0; + if (!bo->backup_obj) + bo->backup_obj = backup; -err_res_free: - ttm_resource_free(&bo->ttm, &new_mem); +out_backup: + xe_bo_vunmap(backup); + if (ret && backup_created) + xe_bo_put(backup); +out_unlock_bo: + if (unmap) + xe_bo_vunmap(bo); + xe_bo_unlock(bo); return ret; } @@ -1180,50 +1306,110 @@ int xe_bo_restore_pinned(struct xe_bo *bo) .interruptible = false, .gfp_retry_mayfail = false, }; - struct ttm_resource *new_mem; - struct ttm_place *place = &bo->placements[0]; + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup = bo->backup_obj; + bool unmap = false; int ret; - xe_bo_assert_held(bo); + if (!backup) + return 0; - if (WARN_ON(!bo->ttm.resource)) - return -EINVAL; + xe_bo_lock(bo, false); - if (WARN_ON(!xe_bo_is_pinned(bo))) - return -EINVAL; + if (!xe_bo_is_pinned(backup)) { + ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx); + if (ret) + goto out_unlock_bo; + } - if (WARN_ON(xe_bo_is_vram(bo))) - return -EINVAL; + if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { + struct xe_migrate *migrate; + struct dma_fence *fence; - if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo))) - return -EINVAL; + if (bo->tile) + migrate = bo->tile->migrate; + else + migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); - if (!mem_type_is_vram(place->mem_type)) - return 0; + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto out_unlock_bo; - ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx); - if (ret) - return ret; + ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); + if (ret) + goto out_unlock_bo; - ret = ttm_bo_populate(&bo->ttm, &ctx); - if (ret) - goto err_res_free; + fence = xe_migrate_copy(migrate, backup, bo, + backup->ttm.resource, bo->ttm.resource, + false); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto out_unlock_bo; + } - ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); - if (ret) - goto err_res_free; + dma_resv_add_fence(bo->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_resv_add_fence(backup->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); + } else { + ret = xe_bo_vmap(backup); + if (ret) + goto out_unlock_bo; - ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); - if (ret) - goto err_res_free; + if (iosys_map_is_null(&bo->vmap)) { + ret = xe_bo_vmap(bo); + if (ret) + goto out_backup; + unmap = true; + } - return 0; + xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr, + xe_bo_size(bo)); + } -err_res_free: - ttm_resource_free(&bo->ttm, &new_mem); + bo->backup_obj = NULL; + +out_backup: + xe_bo_vunmap(backup); + if (!bo->backup_obj) { + if (xe_bo_is_pinned(backup)) + ttm_bo_unpin(&backup->ttm); + xe_bo_put(backup); + } +out_unlock_bo: + if (unmap) + xe_bo_vunmap(bo); + xe_bo_unlock(bo); return ret; } +int xe_bo_dma_unmap_pinned(struct xe_bo *bo) +{ + struct ttm_buffer_object *ttm_bo = &bo->ttm; + struct ttm_tt *tt = ttm_bo->ttm; + + if (tt) { + struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm); + + if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) { + dma_buf_unmap_attachment(ttm_bo->base.import_attach, + ttm_bo->sg, + DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + xe_tt->sg = NULL; + } else if (xe_tt->sg) { + dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev, + xe_tt->sg, + DMA_BIDIRECTIONAL, 0); + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + } + } + + return 0; +} + static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo, unsigned long page_offset) { @@ -1371,6 +1557,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, struct xe_res_cursor cursor; struct xe_vram_region *vram; int bytes_left = len; + int err = 0; xe_bo_assert_held(bo); xe_device_assert_mem_access(xe); @@ -1378,13 +1565,18 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, if (!mem_type_is_vram(ttm_bo->resource->mem_type)) return -EIO; - /* FIXME: Use GPU for non-visible VRAM */ - if (!xe_ttm_resource_visible(ttm_bo->resource)) - return -EIO; + if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) { + struct xe_migrate *migrate = + mem_type_to_migrate(xe, ttm_bo->resource->mem_type); + + err = xe_migrate_access_memory(migrate, bo, offset, buf, len, + write); + goto out; + } vram = res_to_mem_region(ttm_bo->resource); xe_res_first(ttm_bo->resource, offset & PAGE_MASK, - bo->size - (offset & PAGE_MASK), &cursor); + xe_bo_size(bo) - (offset & PAGE_MASK), &cursor); do { unsigned long page_offset = (offset & ~PAGE_MASK); @@ -1404,7 +1596,8 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, xe_res_next(&cursor, PAGE_SIZE); } while (bytes_left); - return len; +out: + return err ?: len; } const struct ttm_device_funcs xe_ttm_funcs = { @@ -1418,7 +1611,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .io_mem_pfn = xe_ttm_io_mem_pfn, .access_memory = xe_ttm_access_memory, .release_notify = xe_ttm_bo_release_notify, - .eviction_valuable = ttm_bo_eviction_valuable, + .eviction_valuable = xe_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, .swap_notify = xe_ttm_bo_swap_notify, }; @@ -1448,6 +1641,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) if (bo->vm && xe_bo_is_user(bo)) xe_vm_put(bo->vm); + if (bo->parent_obj) + xe_bo_put(bo->parent_obj); + mutex_lock(&xe->mem_access.vram_userfault.lock); if (!list_empty(&bo->vram_userfault_link)) list_del(&bo->vram_userfault_link); @@ -1680,7 +1876,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->ccs_cleared = false; bo->tile = tile; - bo->size = size; bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; @@ -1858,7 +2053,7 @@ __xe_bo_create_locked(struct xe_device *xe, if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, - start + bo->size, U64_MAX); + start + xe_bo_size(bo), U64_MAX); } else { err = xe_ggtt_insert_bo(t->mem.ggtt, bo); } @@ -1947,7 +2142,7 @@ struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, flags |= XE_BO_FLAG_GGTT; bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, - flags | XE_BO_FLAG_NEEDS_CPU_ACCESS, + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, alignment); if (IS_ERR(bo)) return bo; @@ -1979,21 +2174,6 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); } -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags) -{ - struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - type, flags); - if (IS_ERR(bo)) - return bo; - - xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); - - return bo; -} - static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); @@ -2049,13 +2229,14 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str struct xe_bo *bo; u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT; - dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE; + dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); xe_assert(xe, IS_DGFX(xe)); xe_assert(xe, !(*src)->vmap.is_iomem); bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, - (*src)->size, dst_flags); + xe_bo_size(*src), dst_flags); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -2073,10 +2254,16 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) { struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); - if (res->mem_type == XE_PL_STOLEN) + switch (res->mem_type) { + case XE_PL_STOLEN: return xe_ttm_stolen_gpu_offset(xe); - - return res_to_mem_region(res)->dpa_base; + case XE_PL_TT: + case XE_PL_SYSTEM: + return 0; + default: + return res_to_mem_region(res)->dpa_base; + } + return 0; } /** @@ -2102,17 +2289,14 @@ int xe_bo_pin_external(struct xe_bo *bo) if (err) return err; - if (xe_bo_is_vram(bo)) { - spin_lock(&xe->pinned.lock); - list_add_tail(&bo->pinned_link, - &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - } + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, &xe->pinned.late.external); + spin_unlock(&xe->pinned.lock); } ttm_bo_pin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_subtract(bo->ttm.ttm); + xe_ttm_tt_account_subtract(xe, bo->ttm.ttm); /* * FIXME: If we always use the reserve / unreserve functions for locking @@ -2149,31 +2333,18 @@ int xe_bo_pin(struct xe_bo *bo) if (err) return err; - /* - * For pinned objects in on DGFX, which are also in vram, we expect - * these to be in contiguous VRAM memory. Required eviction / restore - * during suspend / resume (force restore to same physical address). - */ - if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && - bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { - if (mem_type_is_vram(place->mem_type)) { - xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS); - - place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) - - vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; - place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); - } - } - if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) { spin_lock(&xe->pinned.lock); - list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) + list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present); + else + list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present); spin_unlock(&xe->pinned.lock); } ttm_bo_pin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_subtract(bo->ttm.ttm); + xe_ttm_tt_account_subtract(xe, bo->ttm.ttm); /* * FIXME: If we always use the reserve / unreserve functions for locking @@ -2209,7 +2380,7 @@ void xe_bo_unpin_external(struct xe_bo *bo) ttm_bo_unpin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_add(bo->ttm.ttm); + xe_ttm_tt_account_add(xe, bo->ttm.ttm); /* * FIXME: If we always use the reserve / unreserve functions for locking @@ -2231,10 +2402,17 @@ void xe_bo_unpin(struct xe_bo *bo) xe_assert(xe, !list_empty(&bo->pinned_link)); list_del_init(&bo->pinned_link); spin_unlock(&xe->pinned.lock); + + if (bo->backup_obj) { + if (xe_bo_is_pinned(bo->backup_obj)) + ttm_bo_unpin(&bo->backup_obj->ttm); + xe_bo_put(bo->backup_obj); + bo->backup_obj = NULL; + } } ttm_bo_unpin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) - xe_ttm_tt_account_add(bo->ttm.ttm); + xe_ttm_tt_account_add(xe, bo->ttm.ttm); } /** @@ -2260,6 +2438,8 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) .no_wait_gpu = false, .gfp_retry_mayfail = true, }; + struct pin_cookie cookie; + int ret; if (vm) { lockdep_assert_held(&vm->lock); @@ -2269,8 +2449,12 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } + cookie = xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); - return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + xe_vm_clear_validating(vm, allow_res_evict, cookie); + + return ret; } bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) @@ -2342,7 +2526,7 @@ int xe_bo_vmap(struct xe_bo *bo) * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap * to use struct iosys_map. */ - ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); + ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap); if (ret) return ret; @@ -2386,7 +2570,7 @@ typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe, u64 value); static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = { - [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_set_pxp_type, + [DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type, }; static int gem_create_user_ext_set_property(struct xe_device *xe, @@ -2398,7 +2582,7 @@ static int gem_create_user_ext_set_property(struct xe_device *xe, int err; u32 idx; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -2435,7 +2619,7 @@ static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo, if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -2759,19 +2943,17 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) /** * xe_bo_evict - Evict an object to evict placement * @bo: The buffer object to migrate. - * @force_alloc: Set force_alloc in ttm_operation_ctx * * On successful completion, the object memory will be moved to evict * placement. This function blocks until the object has been fully moved. * * Return: 0 on success. Negative error code on failure. */ -int xe_bo_evict(struct xe_bo *bo, bool force_alloc) +int xe_bo_evict(struct xe_bo *bo) { struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false, - .force_alloc = force_alloc, .gfp_retry_mayfail = true, }; struct ttm_placement placement; @@ -2813,6 +2995,14 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM)) return false; + /* + * Compression implies coh_none, therefore we know for sure that WB + * memory can't currently use compression, which is likely one of the + * common cases. + */ + if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB) + return false; + return true; } @@ -2888,7 +3078,7 @@ void xe_bo_put(struct xe_bo *bo) #endif for_each_tile(tile, xe_bo_device(bo), id) if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt) - might_lock(&bo->ggtt_node[id]->ggtt->lock); + xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt); drm_gem_object_put(&bo->ttm.base); } } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index ec3e4446d027..02e8cde4c6b2 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -39,20 +39,23 @@ #define XE_BO_FLAG_NEEDS_64K BIT(15) #define XE_BO_FLAG_NEEDS_2M BIT(16) #define XE_BO_FLAG_GGTT_INVALIDATE BIT(17) -#define XE_BO_FLAG_GGTT0 BIT(18) -#define XE_BO_FLAG_GGTT1 BIT(19) -#define XE_BO_FLAG_GGTT2 BIT(20) -#define XE_BO_FLAG_GGTT3 BIT(21) -#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \ - XE_BO_FLAG_GGTT1 | \ - XE_BO_FLAG_GGTT2 | \ - XE_BO_FLAG_GGTT3) -#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(22) +#define XE_BO_FLAG_PINNED_NORESTORE BIT(18) +#define XE_BO_FLAG_PINNED_LATE_RESTORE BIT(19) +#define XE_BO_FLAG_GGTT0 BIT(20) +#define XE_BO_FLAG_GGTT1 BIT(21) +#define XE_BO_FLAG_GGTT2 BIT(22) +#define XE_BO_FLAG_GGTT3 BIT(23) +#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24) /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) #define XE_BO_FLAG_INTERNAL_64K BIT(31) +#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \ + XE_BO_FLAG_GGTT1 | \ + XE_BO_FLAG_GGTT2 | \ + XE_BO_FLAG_GGTT3) + #define XE_BO_FLAG_GGTTx(tile) \ (XE_BO_FLAG_GGTT0 << (tile)->id) @@ -115,9 +118,6 @@ struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, size_t size, u64 offset, enum ttm_bo_type type, u32 flags, u64 alignment); -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, size_t size, u32 flags); struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, @@ -235,6 +235,19 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) return xe_bo_addr(bo, 0, page_size); } +/** + * xe_bo_size() - Xe BO size + * @bo: The bo object. + * + * Simple helper to return Xe BO's size. + * + * Return: Xe BO's size + */ +static inline size_t xe_bo_size(struct xe_bo *bo) +{ + return bo->ttm.base.size; +} + static inline u32 __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) { @@ -243,7 +256,7 @@ __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) if (XE_WARN_ON(!ggtt_node)) return 0; - XE_WARN_ON(ggtt_node->base.size > bo->size); + XE_WARN_ON(ggtt_node->base.size > xe_bo_size(bo)); XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); return ggtt_node->base.start; } @@ -271,11 +284,15 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); -int xe_bo_evict(struct xe_bo *bo, bool force_alloc); +int xe_bo_evict(struct xe_bo *bo); int xe_bo_evict_pinned(struct xe_bo *bo); +int xe_bo_notifier_prepare_pinned(struct xe_bo *bo); +int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); +int xe_bo_dma_unmap_pinned(struct xe_bo *bo); + extern const struct ttm_device_funcs xe_ttm_funcs; extern const char *const xe_mem_type_to_name[]; @@ -293,7 +310,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo); static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) { - return PAGE_ALIGN(bo->ttm.base.size); + return PAGE_ALIGN(xe_bo_size(bo)); } static inline bool xe_bo_has_pages(struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 6a40eedd9db1..7484ce55a303 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -10,28 +10,103 @@ #include "xe_ggtt.h" #include "xe_tile.h" +typedef int (*xe_pinned_fn)(struct xe_bo *bo); + +static int xe_bo_apply_to_pinned(struct xe_device *xe, + struct list_head *pinned_list, + struct list_head *new_list, + const xe_pinned_fn pinned_fn) +{ + LIST_HEAD(still_in_list); + struct xe_bo *bo; + int ret = 0; + + spin_lock(&xe->pinned.lock); + while (!ret) { + bo = list_first_entry_or_null(pinned_list, typeof(*bo), + pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &still_in_list); + spin_unlock(&xe->pinned.lock); + + ret = pinned_fn(bo); + if (ret && pinned_list != new_list) { + spin_lock(&xe->pinned.lock); + /* + * We might no longer be pinned, since PM notifier can + * call this. If the pinned link is now empty, keep it + * that way. + */ + if (!list_empty(&bo->pinned_link)) + list_move(&bo->pinned_link, pinned_list); + spin_unlock(&xe->pinned.lock); + } + xe_bo_put(bo); + spin_lock(&xe->pinned.lock); + } + list_splice_tail(&still_in_list, new_list); + spin_unlock(&xe->pinned.lock); + + return ret; +} + /** - * xe_bo_evict_all - evict all BOs from VRAM + * xe_bo_notifier_prepare_all_pinned() - Pre-allocate the backing pages for all + * pinned VRAM objects which need to be saved. + * @xe: xe device + * + * Should be called from PM notifier when preparing for s3/s4. * + * Return: 0 on success, negative error code on error. + */ +int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe) +{ + int ret; + + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_notifier_prepare_pinned); + if (!ret) + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_notifier_prepare_pinned); + + return ret; +} + +/** + * xe_bo_notifier_unprepare_all_pinned() - Remove the backing pages for all + * pinned VRAM objects which have been restored. * @xe: xe device * - * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next - * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. - * All eviction magic done via TTM calls. + * Should be called from PM notifier after exiting s3/s4 (either on success or + * failure). + */ +void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe) +{ + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_notifier_unprepare_pinned); + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_notifier_unprepare_pinned); +} + +/** + * xe_bo_evict_all_user - evict all non-pinned user BOs from VRAM + * @xe: xe device * - * Evict == move VRAM BOs to temporary (typically system) memory. + * Evict non-pinned user BOs (via GPU). * - * This function should be called before the device goes into a suspend state - * where the VRAM loses power. + * Evict == move VRAM BOs to temporary (typically system) memory. */ -int xe_bo_evict_all(struct xe_device *xe) +int xe_bo_evict_all_user(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; - struct xe_bo *bo; - struct xe_tile *tile; - struct list_head still_in_list; u32 mem_type; - u8 id; int ret; /* User memory */ @@ -57,34 +132,38 @@ int xe_bo_evict_all(struct xe_device *xe) } } - /* Pinned user memory in VRAM */ - INIT_LIST_HEAD(&still_in_list); - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.external_vram, - typeof(*bo), pinned_link); - if (!bo) - break; - xe_bo_get(bo); - list_move_tail(&bo->pinned_link, &still_in_list); - spin_unlock(&xe->pinned.lock); + return 0; +} - xe_bo_lock(bo, false); - ret = xe_bo_evict_pinned(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - if (ret) { - spin_lock(&xe->pinned.lock); - list_splice_tail(&still_in_list, - &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - return ret; - } +/** + * xe_bo_evict_all - evict all BOs from VRAM + * @xe: xe device + * + * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next + * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. + * All eviction magic done via TTM calls. + * + * Evict == move VRAM BOs to temporary (typically system) memory. + * + * This function should be called before the device goes into a suspend state + * where the VRAM loses power. + */ +int xe_bo_evict_all(struct xe_device *xe) +{ + struct xe_tile *tile; + u8 id; + int ret; - spin_lock(&xe->pinned.lock); - } - list_splice_tail(&still_in_list, &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); + ret = xe_bo_evict_all_user(xe); + if (ret) + return ret; + + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.evicted, xe_bo_evict_pinned); + + if (!ret) + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.evicted, xe_bo_evict_pinned); /* * Wait for all user BO to be evicted as those evictions depend on the @@ -93,32 +172,47 @@ int xe_bo_evict_all(struct xe_device *xe) for_each_tile(tile, xe, id) xe_tile_migrate_wait(tile); - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present, - typeof(*bo), pinned_link); - if (!bo) - break; - xe_bo_get(bo); - list_move_tail(&bo->pinned_link, &xe->pinned.evicted); - spin_unlock(&xe->pinned.lock); + if (ret) + return ret; - xe_bo_lock(bo, false); - ret = xe_bo_evict_pinned(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - if (ret) - return ret; + return xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.evicted, + xe_bo_evict_pinned); +} - spin_lock(&xe->pinned.lock); +static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + int ret; + + ret = xe_bo_restore_pinned(bo); + if (ret) + return ret; + + if (bo->flags & XE_BO_FLAG_GGTT) { + struct xe_tile *tile; + u8 id; + + for_each_tile(tile, xe_bo_device(bo), id) { + if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) + continue; + + xe_ggtt_map_bo_unlocked(tile->mem.ggtt, bo); + } } - spin_unlock(&xe->pinned.lock); + + /* + * We expect validate to trigger a move VRAM and our move code + * should setup the iosys map. + */ + xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) || + !iosys_map_is_null(&bo->vmap)); return 0; } /** - * xe_bo_restore_kernel - restore kernel BOs to VRAM + * xe_bo_restore_early - restore early phase kernel BOs to VRAM * * @xe: xe device * @@ -128,111 +222,130 @@ int xe_bo_evict_all(struct xe_device *xe) * This function should be called early, before trying to init the GT, on device * resume. */ -int xe_bo_restore_kernel(struct xe_device *xe) +int xe_bo_restore_early(struct xe_device *xe) { - struct xe_bo *bo; - int ret; + return xe_bo_apply_to_pinned(xe, &xe->pinned.early.evicted, + &xe->pinned.early.kernel_bo_present, + xe_bo_restore_and_map_ggtt); +} - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.evicted, - typeof(*bo), pinned_link); - if (!bo) - break; - xe_bo_get(bo); - list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); - spin_unlock(&xe->pinned.lock); +/** + * xe_bo_restore_late - restore pinned late phase BOs + * + * @xe: xe device + * + * Move pinned user and kernel BOs which can use blitter from temporary + * (typically system) memory to VRAM. All moves done via TTM calls. + * + * This function should be called late, after GT init, on device resume. + */ +int xe_bo_restore_late(struct xe_device *xe) +{ + struct xe_tile *tile; + int ret, id; - xe_bo_lock(bo, false); - ret = xe_bo_restore_pinned(bo); - xe_bo_unlock(bo); - if (ret) { - xe_bo_put(bo); - return ret; - } + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.evicted, + &xe->pinned.late.kernel_bo_present, + xe_bo_restore_and_map_ggtt); - if (bo->flags & XE_BO_FLAG_GGTT) { - struct xe_tile *tile; - u8 id; + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); - for_each_tile(tile, xe, id) { - if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) - continue; + if (ret) + return ret; - mutex_lock(&tile->mem.ggtt->lock); - xe_ggtt_map_bo(tile->mem.ggtt, bo); - mutex_unlock(&tile->mem.ggtt->lock); - } - } + if (!IS_DGFX(xe)) + return 0; - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !iosys_map_is_null(&bo->vmap)); + /* Pinned user memory in VRAM should be validated on resume */ + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, + xe_bo_restore_pinned); - xe_bo_put(bo); + /* Wait for restore to complete */ + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); - spin_lock(&xe->pinned.lock); - } - spin_unlock(&xe->pinned.lock); + return ret; +} - return 0; +static void xe_bo_pci_dev_remove_pinned(struct xe_device *xe) +{ + struct xe_tile *tile; + unsigned int id; + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, + xe_bo_dma_unmap_pinned); + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); } /** - * xe_bo_restore_user - restore pinned user BOs to VRAM - * - * @xe: xe device + * xe_bo_pci_dev_remove_all() - Handle bos when the pci_device is about to be removed + * @xe: The xe device. * - * Move pinned user BOs from temporary (typically system) memory to VRAM via - * CPU. All moves done via TTM calls. + * On pci_device removal we need to drop all dma mappings and move + * the data of exported bos out to system. This includes SVM bos and + * exported dma-buf bos. This is done by evicting all bos, but + * the evict placement in xe_evict_flags() is chosen such that all + * bos except those mentioned are purged, and thus their memory + * is released. * - * This function should be called late, after GT init, on device resume. + * For pinned bos, we're unmapping dma. */ -int xe_bo_restore_user(struct xe_device *xe) +void xe_bo_pci_dev_remove_all(struct xe_device *xe) { - struct xe_bo *bo; - struct xe_tile *tile; - struct list_head still_in_list; - u8 id; - int ret; + unsigned int mem_type; - if (!IS_DGFX(xe)) - return 0; + /* + * Move pagemap bos and exported dma-buf to system, and + * purge everything else. + */ + for (mem_type = XE_PL_VRAM1; mem_type >= XE_PL_TT; --mem_type) { + struct ttm_resource_manager *man = + ttm_manager_type(&xe->ttm, mem_type); - /* Pinned user memory in VRAM should be validated on resume */ - INIT_LIST_HEAD(&still_in_list); - spin_lock(&xe->pinned.lock); - for (;;) { - bo = list_first_entry_or_null(&xe->pinned.external_vram, - typeof(*bo), pinned_link); - if (!bo) - break; - list_move_tail(&bo->pinned_link, &still_in_list); - xe_bo_get(bo); - spin_unlock(&xe->pinned.lock); + if (man) { + int ret = ttm_resource_manager_evict_all(&xe->ttm, man); - xe_bo_lock(bo, false); - ret = xe_bo_restore_pinned(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - if (ret) { - spin_lock(&xe->pinned.lock); - list_splice_tail(&still_in_list, - &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - return ret; + drm_WARN_ON(&xe->drm, ret); } - - spin_lock(&xe->pinned.lock); } - list_splice_tail(&still_in_list, &xe->pinned.external_vram); - spin_unlock(&xe->pinned.lock); - /* Wait for restore to complete */ - for_each_tile(tile, xe, id) - xe_tile_migrate_wait(tile); + xe_bo_pci_dev_remove_pinned(xe); +} - return 0; +static void xe_bo_pinned_fini(void *arg) +{ + struct xe_device *xe = arg; + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_dma_unmap_pinned); + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_dma_unmap_pinned); +} + +/** + * xe_bo_pinned_init() - Initialize pinned bo tracking + * @xe: The xe device. + * + * Initializes the lists and locks required for pinned bo + * tracking and registers a callback to dma-unmap + * any remaining pinned bos on pci device removal. + * + * Return: %0 on success, negative error code on error. + */ +int xe_bo_pinned_init(struct xe_device *xe) +{ + spin_lock_init(&xe->pinned.lock); + INIT_LIST_HEAD(&xe->pinned.early.kernel_bo_present); + INIT_LIST_HEAD(&xe->pinned.early.evicted); + INIT_LIST_HEAD(&xe->pinned.late.kernel_bo_present); + INIT_LIST_HEAD(&xe->pinned.late.evicted); + INIT_LIST_HEAD(&xe->pinned.late.external); + + return devm_add_action_or_reset(xe->drm.dev, xe_bo_pinned_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h index 746894798852..e8385cb7f5e9 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.h +++ b/drivers/gpu/drm/xe/xe_bo_evict.h @@ -9,7 +9,13 @@ struct xe_device; int xe_bo_evict_all(struct xe_device *xe); -int xe_bo_restore_kernel(struct xe_device *xe); -int xe_bo_restore_user(struct xe_device *xe); +int xe_bo_evict_all_user(struct xe_device *xe); +int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe); +void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe); +int xe_bo_restore_early(struct xe_device *xe); +int xe_bo_restore_late(struct xe_device *xe); +void xe_bo_pci_dev_remove_all(struct xe_device *xe); + +int xe_bo_pinned_init(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 15a92e3d4898..ff560d82496f 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -28,8 +28,10 @@ struct xe_vm; struct xe_bo { /** @ttm: TTM base buffer object */ struct ttm_buffer_object ttm; - /** @size: Size of this buffer object */ - size_t size; + /** @backup_obj: The backup object when pinned and suspended (vram only) */ + struct xe_bo *backup_obj; + /** @parent_obj: Ref to parent bo if this a backup_obj */ + struct xe_bo *parent_obj; /** @flags: flags for this buffer object */ u32 flags; /** @vm: VM this BO is attached to, for extobj this will be NULL */ @@ -82,7 +84,7 @@ struct xe_bo { u16 cpu_caching; /** @devmem_allocation: SVM device memory allocation */ - struct drm_gpusvm_devmem devmem_allocation; + struct drm_pagemap_devmem devmem_allocation; /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ struct list_head vram_userfault_link; diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c new file mode 100644 index 000000000000..e9b46a2d0019 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/bitops.h> +#include <linux/configfs.h> +#include <linux/find.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include "xe_configfs.h" +#include "xe_module.h" + +#include "xe_hw_engine_types.h" + +/** + * DOC: Xe Configfs + * + * Overview + * ========= + * + * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a + * configfs subsystem called ``'xe'`` that creates a directory in the mounted configfs directory + * The user can create devices under this directory and configure them as necessary + * See Documentation/filesystems/configfs.rst for more information about how configfs works. + * + * Create devices + * =============== + * + * In order to create a device, the user has to create a directory inside ``'xe'``:: + * + * mkdir /sys/kernel/config/xe/0000:03:00.0/ + * + * Every device created is populated by the driver with entries that can be + * used to configure it:: + * + * /sys/kernel/config/xe/ + * .. 0000:03:00.0/ + * ... survivability_mode + * + * Configure Attributes + * ==================== + * + * Survivability mode: + * ------------------- + * + * Enable survivability mode on supported cards. This setting only takes + * effect when probing the device. Example to enable it:: + * + * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode + * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported) + * + * Allowed engines: + * ---------------- + * + * Allow only a set of engine(s) to be available, disabling the other engines + * even if they are available in hardware. This is applied after HW fuses are + * considered on each tile. Examples: + * + * Allow only one render and one copy engines, nothing else:: + * + * # echo 'rcs0,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed + * + * Allow only compute engines and first copy engine:: + * + * # echo 'ccs*,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed + * + * Note that the engine names are the per-GT hardware names. On multi-tile + * platforms, writing ``rcs0,bcs0`` to this file would allow the first render + * and copy engines on each tile. + * + * The requested configuration may not be supported by the platform and driver + * may fail to probe. For example: if at least one copy engine is expected to be + * available for migrations, but it's disabled. This is intended for debugging + * purposes only. + * + * Remove devices + * ============== + * + * The created device directories can be removed using ``rmdir``:: + * + * rmdir /sys/kernel/config/xe/0000:03:00.0/ + */ + +struct xe_config_device { + struct config_group group; + + bool survivability_mode; + u64 engines_allowed; + + /* protects attributes */ + struct mutex lock; +}; + +struct engine_info { + const char *cls; + u64 mask; +}; + +/* Some helpful macros to aid on the sizing of buffer allocation when parsing */ +#define MAX_ENGINE_CLASS_CHARS 5 +#define MAX_ENGINE_INSTANCE_CHARS 2 + +static const struct engine_info engine_info[] = { + { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK }, + { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK }, + { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK }, + { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK }, + { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK }, + { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK }, +}; + +static struct xe_config_device *to_xe_config_device(struct config_item *item) +{ + return container_of(to_config_group(item), struct xe_config_device, group); +} + +static ssize_t survivability_mode_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + + return sprintf(page, "%d\n", dev->survivability_mode); +} + +static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len) +{ + struct xe_config_device *dev = to_xe_config_device(item); + bool survivability_mode; + int ret; + + ret = kstrtobool(page, &survivability_mode); + if (ret) + return ret; + + mutex_lock(&dev->lock); + dev->survivability_mode = survivability_mode; + mutex_unlock(&dev->lock); + + return len; +} + +static ssize_t engines_allowed_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + char *p = page; + + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { + u64 mask = engine_info[i].mask; + + if ((dev->engines_allowed & mask) == mask) { + p += sprintf(p, "%s*\n", engine_info[i].cls); + } else if (mask & dev->engines_allowed) { + u16 bit0 = __ffs64(mask), bit; + + mask &= dev->engines_allowed; + + for_each_set_bit(bit, (const unsigned long *)&mask, 64) + p += sprintf(p, "%s%u\n", engine_info[i].cls, + bit - bit0); + } + } + + return p - page; +} + +static bool lookup_engine_mask(const char *pattern, u64 *mask) +{ + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { + u8 instance; + u16 bit; + + if (!str_has_prefix(pattern, engine_info[i].cls)) + continue; + + pattern += strlen(engine_info[i].cls); + + if (!strcmp(pattern, "*")) { + *mask = engine_info[i].mask; + return true; + } + + if (kstrtou8(pattern, 10, &instance)) + return false; + + bit = __ffs64(engine_info[i].mask) + instance; + if (bit >= fls64(engine_info[i].mask)) + return false; + + *mask = BIT_ULL(bit); + return true; + } + + return false; +} + +static ssize_t engines_allowed_store(struct config_item *item, const char *page, + size_t len) +{ + struct xe_config_device *dev = to_xe_config_device(item); + size_t patternlen, p; + u64 mask, val = 0; + + for (p = 0; p < len; p += patternlen + 1) { + char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; + + patternlen = strcspn(page + p, ",\n"); + if (patternlen >= sizeof(buf)) + return -EINVAL; + + memcpy(buf, page + p, patternlen); + buf[patternlen] = '\0'; + + if (!lookup_engine_mask(buf, &mask)) + return -EINVAL; + + val |= mask; + } + + mutex_lock(&dev->lock); + dev->engines_allowed = val; + mutex_unlock(&dev->lock); + + return len; +} + +CONFIGFS_ATTR(, survivability_mode); +CONFIGFS_ATTR(, engines_allowed); + +static struct configfs_attribute *xe_config_device_attrs[] = { + &attr_survivability_mode, + &attr_engines_allowed, + NULL, +}; + +static void xe_config_device_release(struct config_item *item) +{ + struct xe_config_device *dev = to_xe_config_device(item); + + mutex_destroy(&dev->lock); + kfree(dev); +} + +static struct configfs_item_operations xe_config_device_ops = { + .release = xe_config_device_release, +}; + +static const struct config_item_type xe_config_device_type = { + .ct_item_ops = &xe_config_device_ops, + .ct_attrs = xe_config_device_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_group *xe_config_make_device_group(struct config_group *group, + const char *name) +{ + unsigned int domain, bus, slot, function; + struct xe_config_device *dev; + struct pci_dev *pdev; + int ret; + + ret = sscanf(name, "%04x:%02x:%02x.%x", &domain, &bus, &slot, &function); + if (ret != 4) + return ERR_PTR(-EINVAL); + + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); + if (!pdev) + return ERR_PTR(-ENODEV); + pci_dev_put(pdev); + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + /* Default values */ + dev->engines_allowed = U64_MAX; + + config_group_init_type_name(&dev->group, name, &xe_config_device_type); + + mutex_init(&dev->lock); + + return &dev->group; +} + +static struct configfs_group_operations xe_config_device_group_ops = { + .make_group = xe_config_make_device_group, +}; + +static const struct config_item_type xe_configfs_type = { + .ct_group_ops = &xe_config_device_group_ops, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem xe_configfs = { + .su_group = { + .cg_item = { + .ci_namebuf = "xe", + .ci_type = &xe_configfs_type, + }, + }, +}; + +static struct xe_config_device *configfs_find_group(struct pci_dev *pdev) +{ + struct config_item *item; + char name[64]; + + snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus), + pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + mutex_lock(&xe_configfs.su_mutex); + item = config_group_find_item(&xe_configfs.su_group, name); + mutex_unlock(&xe_configfs.su_mutex); + + if (!item) + return NULL; + + return to_xe_config_device(item); +} + +/** + * xe_configfs_get_survivability_mode - get configfs survivability mode attribute + * @pdev: pci device + * + * find the configfs group that belongs to the pci device and return + * the survivability mode attribute + * + * Return: survivability mode if config group is found, false otherwise + */ +bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) +{ + struct xe_config_device *dev = configfs_find_group(pdev); + bool mode; + + if (!dev) + return false; + + mode = dev->survivability_mode; + config_item_put(&dev->group.cg_item); + + return mode; +} + +/** + * xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute + * @pdev: pci device + * + * find the configfs group that belongs to the pci device and clear survivability + * mode attribute + */ +void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) +{ + struct xe_config_device *dev = configfs_find_group(pdev); + + if (!dev) + return; + + mutex_lock(&dev->lock); + dev->survivability_mode = 0; + mutex_unlock(&dev->lock); + + config_item_put(&dev->group.cg_item); +} + +/** + * xe_configfs_get_engines_allowed - get engine allowed mask from configfs + * @pdev: pci device + * + * Find the configfs group that belongs to the pci device and return + * the mask of engines allowed to be used. + * + * Return: engine mask with allowed engines + */ +u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) +{ + struct xe_config_device *dev = configfs_find_group(pdev); + u64 engines_allowed; + + if (!dev) + return U64_MAX; + + engines_allowed = dev->engines_allowed; + config_item_put(&dev->group.cg_item); + + return engines_allowed; +} + +int __init xe_configfs_init(void) +{ + struct config_group *root = &xe_configfs.su_group; + int ret; + + config_group_init(root); + mutex_init(&xe_configfs.su_mutex); + ret = configfs_register_subsystem(&xe_configfs); + if (ret) { + pr_err("Error %d while registering %s subsystem\n", + ret, root->cg_item.ci_namebuf); + return ret; + } + + return 0; +} + +void __exit xe_configfs_exit(void) +{ + configfs_unregister_subsystem(&xe_configfs); +} + diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h new file mode 100644 index 000000000000..fb8764008089 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ +#ifndef _XE_CONFIGFS_H_ +#define _XE_CONFIGFS_H_ + +#include <linux/limits.h> +#include <linux/types.h> + +struct pci_dev; + +#if IS_ENABLED(CONFIG_CONFIGFS_FS) +int xe_configfs_init(void); +void xe_configfs_exit(void); +bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); +void xe_configfs_clear_survivability_mode(struct pci_dev *pdev); +u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); +#else +static inline int xe_configfs_init(void) { return 0; } +static inline void xe_configfs_exit(void) { } +static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; } +static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { } +static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index d0503959a8ed..26e9d146ccbf 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -20,7 +20,9 @@ #include "xe_pm.h" #include "xe_pxp_debugfs.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" #include "xe_step.h" +#include "xe_wa.h" #ifdef CONFIG_DRM_XE_DEBUG #include "xe_bo_evict.h" @@ -82,9 +84,28 @@ static int sriov_info(struct seq_file *m, void *data) return 0; } +static int workarounds(struct xe_device *xe, struct drm_printer *p) +{ + xe_pm_runtime_get(xe); + xe_wa_device_dump(xe, p); + xe_pm_runtime_put(xe); + + return 0; +} + +static int workaround_info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + workarounds(xe, &p); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, { .name = "sriov_info", .show = sriov_info, }, + { .name = "workarounds", .show = workaround_info, }, }; static int forcewake_open(struct inode *inode, struct file *file) @@ -191,6 +212,41 @@ static const struct file_operations wedged_mode_fops = { .write = wedged_mode_set, }; +static ssize_t atomic_svm_timeslice_ms_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + char buf[32]; + int len = 0; + + len = scnprintf(buf, sizeof(buf), "%d\n", xe->atomic_svm_timeslice_ms); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t atomic_svm_timeslice_ms_set(struct file *f, + const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + u32 atomic_svm_timeslice_ms; + ssize_t ret; + + ret = kstrtouint_from_user(ubuf, size, 0, &atomic_svm_timeslice_ms); + if (ret) + return ret; + + xe->atomic_svm_timeslice_ms = atomic_svm_timeslice_ms; + + return size; +} + +static const struct file_operations atomic_svm_timeslice_ms_fops = { + .owner = THIS_MODULE, + .read = atomic_svm_timeslice_ms_show, + .write = atomic_svm_timeslice_ms_set, +}; + void xe_debugfs_register(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; @@ -211,6 +267,9 @@ void xe_debugfs_register(struct xe_device *xe) debugfs_create_file("wedged_mode", 0600, root, xe, &wedged_mode_fops); + debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe, + &atomic_svm_timeslice_ms_fops); + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { man = ttm_manager_type(bdev, mem_type); @@ -235,4 +294,7 @@ void xe_debugfs_register(struct xe_device *xe) xe_pxp_debugfs_register(xe->pxp); fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); + + if (IS_SRIOV_PF(xe)) + xe_sriov_pf_debugfs_register(xe, root); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 81b9d9bb3f57..203e3038cc81 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static ssize_t __xe_devcoredump_read(char *buffer, size_t count, +static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count, + ssize_t start, struct xe_devcoredump *coredump) { struct xe_device *xe; @@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, ss = &coredump->snapshot; iter.data = buffer; - iter.start = 0; + iter.start = start; iter.remain = count; p = drm_coredump_printer(&iter); @@ -168,12 +169,34 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) ss->vm = NULL; } +#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) + +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; + u32 chunk_offset; + ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -183,25 +206,45 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) + xe_pm_runtime_get(gt_to_xe(ss->gt)); + mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; + if (offset >= ss->read.size) + goto unlock; + + new_chunk_position = div_u64_rem(offset, + XE_DEVCOREDUMP_CHUNK_MAX, + &chunk_offset); + + if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX || + offset < ss->read.chunk_position) { + ss->read.chunk_position = new_chunk_position * + XE_DEVCOREDUMP_CHUNK_MAX; + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + ss->read.chunk_position, coredump); } byte_copied = count < ss->read.size - offset ? count : ss->read.size - offset; - memcpy(buffer, ss->read.buffer + offset, byte_copied); + memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - return byte_copied; + if (pm_needed) + xe_pm_runtime_put(gt_to_xe(ss->gt)); + + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) @@ -254,17 +297,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); - xe_pm_runtime_put(xe); + ss->read.chunk_position = 0; /* Calculate devcoredump size */ - ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); - - ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); - if (!ss->read.buffer) - return; + ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump); + + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) { + ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX, + GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + 0, coredump); + } else { + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0, + coredump); + xe_devcoredump_snapshot_free(ss); + } - __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); - xe_devcoredump_snapshot_free(ss); +put_pm: + xe_pm_runtime_put(xe); } static void devcoredump_snapshot(struct xe_devcoredump *coredump, @@ -273,13 +331,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_guc *guc = exec_queue_to_guc(q); - u32 adj_logical_mask = q->logical_mask; - u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - unsigned int fw_ref; bool cookie; - int i; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); @@ -295,14 +349,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { - if (adj_logical_mask & BIT(i)) { - adj_logical_mask |= width_mask << i; - i += q->width; - } else { - ++i; - } - } /* keep going if fw fails as we still want to save the memory and SW data */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); @@ -425,7 +471,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi if (offset & 3) drm_printf(p, "Offset not word aligned: %zu", offset); - line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL); + line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC); if (!line_buff) { drm_printf(p, "Failed to allocate line buffer\n"); return; diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 1a1d16a96b2d..a174385a6d83 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot { struct { /** @read.size: size of devcoredump in human readable format */ ssize_t size; + /** @read.chunk_position: position of devcoredump chunk */ + ssize_t chunk_position; /** @read.buffer: buffer of devcoredump in human readable format */ char *buffer; } read; diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 00191227bc95..5bd2f7d7b4ea 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -23,8 +23,10 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" +#include "xe_bo_evict.h" #include "xe_debugfs.h" #include "xe_devcoredump.h" +#include "xe_device_sysfs.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" #include "xe_drv.h" @@ -38,12 +40,14 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" +#include "xe_i2c.h" #include "xe_irq.h" -#include "xe_memirq.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_nvm.h" #include "xe_oa.h" #include "xe_observation.h" #include "xe_pat.h" @@ -64,6 +68,7 @@ #include "xe_wait_user_fence.h" #include "xe_wa.h" +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> static int xe_file_open(struct drm_device *dev, struct drm_file *file) @@ -400,9 +405,6 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->unordered_wq) destroy_workqueue(xe->unordered_wq); - if (!IS_ERR_OR_NULL(xe->mem.shrinker)) - xe_shrinker_destroy(xe->mem.shrinker); - if (xe->destroy_wq) destroy_workqueue(xe->destroy_wq); @@ -436,13 +438,14 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (err) goto err; - xe->mem.shrinker = xe_shrinker_create(xe); - if (IS_ERR(xe->mem.shrinker)) - return ERR_CAST(xe->mem.shrinker); + err = xe_shrinker_create(xe); + if (err) + goto err; xe->info.devid = pdev->device; xe->info.revid = pdev->revision; xe->info.force_execlist = xe_modparam.force_execlist; + xe->atomic_svm_timeslice_ms = 5; err = xe_irq_init(xe); if (err) @@ -467,10 +470,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xa_erase(&xe->usm.asid_to_vm, asid); } - spin_lock_init(&xe->pinned.lock); - INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); - INIT_LIST_HEAD(&xe->pinned.external_vram); - INIT_LIST_HEAD(&xe->pinned.evicted); + err = xe_bo_pinned_init(xe); + if (err) + goto err; xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", WQ_MEM_RECLAIM); @@ -492,10 +494,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (err) goto err; - err = xe_display_create(xe); - if (WARN_ON(err)) - goto err; - return xe; err: @@ -505,7 +503,15 @@ ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */ static bool xe_driver_flr_disabled(struct xe_device *xe) { - return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS; + if (IS_SRIOV_VF(xe)) + return true; + + if (xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) { + drm_info(&xe->drm, "Driver-FLR disabled by BIOS\n"); + return true; + } + + return false; } /* @@ -523,7 +529,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe) */ static void __xe_driver_flr(struct xe_device *xe) { - const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ + const unsigned int flr_timeout = 3 * USEC_PER_SEC; /* specs recommend a 3s wait */ struct xe_mmio *mmio = xe_root_tile_mmio(xe); int ret; @@ -569,10 +575,8 @@ static void __xe_driver_flr(struct xe_device *xe) static void xe_driver_flr(struct xe_device *xe) { - if (xe_driver_flr_disabled(xe)) { - drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); + if (xe_driver_flr_disabled(xe)) return; - } __xe_driver_flr(xe); } @@ -677,6 +681,7 @@ static void sriov_update_device_info(struct xe_device *xe) /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; @@ -697,6 +702,9 @@ int xe_device_probe_early(struct xe_device *xe) { int err; + xe_wa_device_init(xe); + xe_wa_process_device_oob(xe); + err = xe_mmio_probe_early(xe); if (err) return err; @@ -706,7 +714,7 @@ int xe_device_probe_early(struct xe_device *xe) sriov_update_device_info(xe); err = xe_pcode_probe_early(xe); - if (err) { + if (err || xe_survivability_mode_is_requested(xe)) { int save_err = err; /* @@ -729,6 +737,7 @@ int xe_device_probe_early(struct xe_device *xe) return 0; } +ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */ static int probe_has_flat_ccs(struct xe_device *xe) { @@ -781,45 +790,16 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - err = xe_ttm_sys_mgr_init(xe); - if (err) - return err; - for_each_gt(gt, xe, id) { err = xe_gt_init_early(gt); if (err) return err; - - /* - * Only after this point can GT-specific MMIO operations - * (including things like communication with the GuC) - * be performed. - */ - xe_gt_mmio_init(gt); } for_each_tile(tile, xe, id) { - if (IS_SRIOV_VF(xe)) { - xe_guc_comm_init_early(&tile->primary_gt->uc.guc); - err = xe_gt_sriov_vf_bootstrap(tile->primary_gt); - if (err) - return err; - err = xe_gt_sriov_vf_query_config(tile->primary_gt); - if (err) - return err; - } err = xe_ggtt_init_early(tile->mem.ggtt); if (err) return err; - err = xe_memirq_init(&tile->memirq); - if (err) - return err; - } - - for_each_gt(gt, xe, id) { - err = xe_gt_init_hwconfig(gt); - if (err) - return err; } err = xe_devcoredump_init(xe); @@ -847,6 +827,14 @@ int xe_device_probe(struct xe_device *xe) return err; } + /* + * Allow allocations only now to ensure xe_display_init_early() + * is the first to allocate, always. + */ + err = xe_ttm_sys_mgr_init(xe); + if (err) + return err; + /* Allocate and map stolen after potential VRAM resize */ err = xe_ttm_stolen_mgr_init(xe); if (err) @@ -878,6 +866,12 @@ int xe_device_probe(struct xe_device *xe) return err; } + if (xe->tiles->media_gt && + XE_WA(xe->tiles->media_gt, 15015404425_disable)) + XE_DEVICE_WA_DISABLE(xe, 15015404425); + + xe_nvm_init(xe); + err = xe_heci_gsc_init(xe); if (err) return err; @@ -908,12 +902,20 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; + err = xe_device_sysfs_init(xe); + if (err) + goto err_unregister_display; + xe_debugfs_register(xe); err = xe_hwmon_register(xe); if (err) goto err_unregister_display; + err = xe_i2c_probe(xe); + if (err) + goto err_unregister_display; + for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); @@ -931,7 +933,11 @@ void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); + xe_nvm_fini(xe); + drm_dev_unplug(&xe->drm); + + xe_bo_pci_dev_remove_all(xe); } void xe_device_shutdown(struct xe_device *xe) @@ -972,38 +978,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1013,6 +996,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1043,15 +1027,52 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (XE_WA(root_gt, 16023588340)) { + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); + tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? @@ -1154,7 +1175,8 @@ void xe_device_declare_wedged(struct xe_device *xe) /* Notify userspace of wedged device */ drm_dev_wedged_event(&xe->drm, - DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET); + DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET, + NULL); } for_each_gt(gt, xe, id) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 0bc3bc8e6803..bc802e066a7d 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -60,35 +60,32 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) return &xe->tiles[0]; } +/* + * Highest GT/tile count for any platform. Used only for memory allocation + * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT + * structures should use the per-platform xe->info.max_gt_per_tile instead. + */ #define XE_MAX_GT_PER_TILE 2 -static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) -{ - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) - gt_id = 0; - - return gt_id ? tile->media_gt : tile->primary_gt; -} - static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { - struct xe_tile *root_tile = xe_device_get_root_tile(xe); + struct xe_tile *tile; struct xe_gt *gt; - /* - * FIXME: This only works for now because multi-tile and standalone - * media are mutually exclusive on the platforms we have today. - * - * id => GT mapping may change once we settle on how we want to handle - * our UAPI. - */ - if (MEDIA_VER(xe) >= 13) { - gt = xe_tile_get_gt(root_tile, gt_id); - } else { - if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) - gt_id = 0; - - gt = xe->tiles[gt_id].primary_gt; + if (gt_id >= xe->info.tile_count * xe->info.max_gt_per_tile) + return NULL; + + tile = &xe->tiles[gt_id / xe->info.max_gt_per_tile]; + switch (gt_id % xe->info.max_gt_per_tile) { + default: + xe_assert(xe, false); + fallthrough; + case 0: + gt = tile->primary_gt; + break; + case 1: + gt = tile->media_gt; + break; } if (!gt) @@ -130,14 +127,14 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \ for_each_if((tile__) = &(xe__)->tiles[(id__)]) -/* - * FIXME: This only works for now since multi-tile and standalone media - * happen to be mutually exclusive. Future platforms may change this... - */ #define for_each_gt(gt__, xe__, id__) \ - for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \ for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) +#define for_each_gt_on_tile(gt__, tile__, id__) \ + for_each_gt((gt__), (tile__)->xe, (id__)) \ + for_each_if((gt__)->tile == (tile__)) + static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) { return >->pm.fw; @@ -195,6 +192,8 @@ void xe_device_declare_wedged(struct xe_device *xe); struct xe_file *xe_file_get(struct xe_file *xef); void xe_file_put(struct xe_file *xef); +int xe_is_injection_active(void); + /* * Occasionally it is seen that the G2H worker starts running after a delay of more than * a second even after being queued and activated by the Linux workqueue subsystem. This diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 7efbd4c52791..bd9015761aa0 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -3,14 +3,16 @@ * Copyright © 2023 Intel Corporation */ +#include <linux/device.h> #include <linux/kobject.h> #include <linux/pci.h> #include <linux/sysfs.h> -#include <drm/drm_managed.h> - #include "xe_device.h" #include "xe_device_sysfs.h" +#include "xe_mmio.h" +#include "xe_pcode_api.h" +#include "xe_pcode.h" #include "xe_pm.h" /** @@ -22,6 +24,12 @@ * * vram_d3cold_threshold - Report/change vram used threshold(in MB) below * which vram save/restore is permissible during runtime D3cold entry/exit. + * + * lb_fan_control_version - Fan control version provisioned by late binding. + * Exposed only if supported by the device. + * + * lb_voltage_regulator_version - Voltage regulator version provisioned by late + * binding. Exposed only if supported by the device. */ static ssize_t @@ -63,11 +71,230 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RW(vram_d3cold_threshold); +static ssize_t +lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap, ver_low = FAN_TABLE, ver_high = FAN_TABLE; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version); + +static ssize_t +lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap, ver_low = VR_CONFIG, ver_high = VR_CONFIG; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); + +static int late_bind_create_files(struct device *dev) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) { + if (ret == -ENXIO) { + drm_dbg(&xe->drm, "Late binding not supported by firmware\n"); + ret = 0; + } + goto out; + } + + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); + if (ret) + goto out; + } + + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); +out: + xe_pm_runtime_put(xe); + + return ret; +} + +static void late_bind_remove_files(struct device *dev) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) + sysfs_remove_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); + + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + sysfs_remove_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); +out: + xe_pm_runtime_put(xe); +} + +/** + * DOC: PCIe Gen5 Limitations + * + * Default link speed of discrete GPUs is determined by configuration parameters + * stored in their flash memory, which are subject to override through user + * initiated firmware updates. It has been observed that devices configured with + * PCIe Gen5 as their default link speed can come across link quality issues due + * to host or motherboard limitations and may have to auto-downgrade their link + * to PCIe Gen4 speed when faced with unstable link at Gen5, which makes + * firmware updates rather risky on such setups. It is required to ensure that + * the device is capable of auto-downgrading its link to PCIe Gen4 speed before + * pushing the firmware image with PCIe Gen5 as default configuration. This can + * be done by reading ``auto_link_downgrade_capable`` sysfs entry, which will + * denote if the device is capable of auto-downgrading its link to PCIe Gen4 + * speed with boolean output value of ``0`` or ``1``, meaning `incapable` or + * `capable` respectively. + * + * .. code-block:: shell + * + * $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_capable + * + * Pushing the firmware image with PCIe Gen5 as default configuration on a auto + * link downgrade incapable device and facing link instability due to host or + * motherboard limitations can result in driver failing to bind to the device, + * making further firmware updates impossible with RMA being the only last + * resort. + * + * Link downgrade status of auto link downgrade capable devices is available + * through ``auto_link_downgrade_status`` sysfs entry with boolean output value + * of ``0`` or ``1``, where ``0`` means no auto-downgrading was required during + * link training (which is the optimal scenario) and ``1`` means the device has + * auto-downgraded its link to PCIe Gen4 speed due to unstable Gen5 link. + * + * .. code-block:: shell + * + * $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_status + */ + +static ssize_t +auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + u32 cap, val; + + xe_pm_runtime_get(xe); + val = xe_mmio_read32(xe_root_tile_mmio(xe), BMG_PCIE_CAP); + xe_pm_runtime_put(xe); + + cap = REG_FIELD_GET(LINK_DOWNGRADE, val); + return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE); +} +static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable); + +static ssize_t +auto_link_downgrade_status_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + /* default the auto_link_downgrade status to 0 */ + u32 val = 0; + int ret; + + xe_pm_runtime_get(xe); + ret = xe_pcode_read(xe_device_get_root_tile(xe), + PCODE_MBOX(DGFX_PCODE_STATUS, DGFX_GET_INIT_STATUS, 0), + &val, NULL); + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u\n", REG_FIELD_GET(DGFX_LINK_DOWNGRADE_STATUS, val)); +} +static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status); + +static const struct attribute *auto_link_downgrade_attrs[] = { + &dev_attr_auto_link_downgrade_capable.attr, + &dev_attr_auto_link_downgrade_status.attr, + NULL +}; + static void xe_device_sysfs_fini(void *arg) { struct xe_device *xe = arg; - sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + if (xe->d3cold.capable) + sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + + if (xe->info.platform == XE_BATTLEMAGE) { + sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); + late_bind_remove_files(xe->drm.dev); + } } int xe_device_sysfs_init(struct xe_device *xe) @@ -75,9 +302,21 @@ int xe_device_sysfs_init(struct xe_device *xe) struct device *dev = xe->drm.dev; int ret; - ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - if (ret) - return ret; + if (xe->d3cold.capable) { + ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + if (ret) + return ret; + } + + if (xe->info.platform == XE_BATTLEMAGE) { + ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); + if (ret) + return ret; + + ret = late_bind_create_files(dev); + if (ret) + return ret; + } return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 0482f26aa480..d4d2c6854790 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -21,7 +21,9 @@ #include "xe_platform_types.h" #include "xe_pmu_types.h" #include "xe_pt_types.h" +#include "xe_sriov_pf_types.h" #include "xe_sriov_types.h" +#include "xe_sriov_vf_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" #include "xe_ttm_vram_mgr_types.h" @@ -30,13 +32,11 @@ #define TEST_VM_OPS_ERROR #endif -#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) -#include "soc/intel_pch.h" -#include "intel_display_core.h" -#include "intel_display_device.h" -#endif - +struct dram_info; +struct intel_display; +struct intel_dg_nvm_dev; struct xe_ggtt; +struct xe_i2c; struct xe_pat_ops; struct xe_pxp; @@ -107,6 +107,9 @@ struct xe_vram_region { resource_size_t actual_physical_size; /** @mapping: pointer to VRAM mappable space */ void __iomem *mapping; + /** @ttm: VRAM TTM manager */ + struct xe_ttm_vram_mgr ttm; +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) /** @pagemap: Used to remap device memory as ZONE_DEVICE */ struct dev_pagemap pagemap; /** @@ -120,8 +123,7 @@ struct xe_vram_region { * This is generated when remap device memory as ZONE_DEVICE */ resource_size_t hpa_base; - /** @ttm: VRAM TTM manager */ - struct xe_ttm_vram_mgr ttm; +#endif }; /** @@ -295,6 +297,8 @@ struct xe_device { u8 vram_flags; /** @info.tile_count: Number of tiles */ u8 tile_count; + /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */ + u8 max_gt_per_tile; /** @info.gt_count: Total number of GTs for entire device */ u8 gt_count; /** @info.vm_max_level: Max VM level */ @@ -314,14 +318,22 @@ struct xe_device { u8 has_atomic_enable_pte_bit:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; + /** @info.has_fan_control: Device supports fan control */ + u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; + /** @info.has_gsc_nvm: Device has gsc non-volatile memory */ + u8 has_gsc_nvm:1; /** @info.has_heci_cscfi: device has heci cscfi */ u8 has_heci_cscfi:1; /** @info.has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; + /** @info.has_mbx_power_limits: Device has support to manage power limits using + * pcode mailbox commands. + */ + u8 has_mbx_power_limits:1; /** @info.has_pxp: Device has PXP support */ u8 has_pxp:1; /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */ @@ -334,6 +346,8 @@ struct xe_device { u8 has_64bit_timestamp:1; /** @info.is_dgfx: is discrete device */ u8 is_dgfx:1; + /** @info.needs_scratch: needs scratch page for oob prefetch to work */ + u8 needs_scratch:1; /** * @info.probe_display: Probe display hardware. If set to * false, the driver will behave as if there is no display @@ -351,6 +365,19 @@ struct xe_device { u8 skip_pcode:1; } info; + /** @wa_active: keep track of active workarounds */ + struct { + /** @wa_active.oob: bitmap with active OOB workarounds */ + unsigned long *oob; + + /** + * @wa_active.oob_initialized: Mark oob as initialized to help detecting misuse + * of XE_DEVICE_WA() - it can only be called on initialization after + * Device OOB WAs have been processed. + */ + bool oob_initialized; + } wa_active; + /** @survivability: survivability information for device */ struct xe_survivability survivability; @@ -397,10 +424,12 @@ struct xe_device { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; - /** @sriov.pf: PF specific data */ - struct xe_device_pf pf; - /** @sriov.vf: VF specific data */ - struct xe_device_vf vf; + union { + /** @sriov.pf: PF specific data */ + struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; + }; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; @@ -420,12 +449,22 @@ struct xe_device { struct { /** @pinned.lock: protected pinned BO list state */ spinlock_t lock; - /** @pinned.kernel_bo_present: pinned kernel BO that are present */ - struct list_head kernel_bo_present; - /** @pinned.evicted: pinned BO that have been evicted */ - struct list_head evicted; - /** @pinned.external_vram: pinned external BO in vram*/ - struct list_head external_vram; + /** @pinned.early: early pinned lists */ + struct { + /** @pinned.early.kernel_bo_present: pinned kernel BO that are present */ + struct list_head kernel_bo_present; + /** @pinned.early.evicted: pinned BO that have been evicted */ + struct list_head evicted; + } early; + /** @pinned.late: late pinned lists */ + struct { + /** @pinned.late.kernel_bo_present: pinned kernel BO that are present */ + struct list_head kernel_bo_present; + /** @pinned.late.evicted: pinned BO that have been evicted */ + struct list_head evicted; + /** @pinned.external: pinned external and dma-buf. */ + struct list_head external; + } late; } pinned; /** @ufence_wq: user fence wait queue */ @@ -483,6 +522,10 @@ struct xe_device { const struct xe_pat_table_entry *table; /** @pat.n_entries: Number of PAT entries */ int n_entries; + /** @pat.ats_entry: PAT entry for PCIe ATS responses */ + const struct xe_pat_table_entry *pat_ats; + /** @pat.pta_entry: PAT entry for page table accesses */ + const struct xe_pat_table_entry *pat_pta; u32 idx[__XE_CACHE_LEVEL_COUNT]; } pat; @@ -508,6 +551,9 @@ struct xe_device { struct mutex lock; } d3cold; + /** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */ + struct notifier_block pm_notifier; + /** @pmt: Support the PMT driver callback interface */ struct { /** @pmt.lock: protect access for telemetry data */ @@ -526,6 +572,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @nvm: discrete graphics non-volatile memory */ + struct intel_dg_nvm_dev *nvm; + /** @oa: oa observation subsystem */ struct xe_oa oa; @@ -554,6 +603,12 @@ struct xe_device { /** @pmu: performance monitoring unit */ struct xe_pmu pmu; + /** @i2c: I2C host controller */ + struct xe_i2c *i2c; + + /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */ + u32 atomic_svm_timeslice_ms; + #ifdef TEST_VM_OPS_ERROR /** * @vm_inject_error_position: inject errors at different places in VM @@ -562,6 +617,14 @@ struct xe_device { u8 vm_inject_error_position; #endif +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + /** + * @global_total_pages: global GPU page usage tracked for gpu_mem + * tracepoints + */ + atomic64_t global_total_pages; +#endif + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) @@ -571,27 +634,9 @@ struct xe_device { * drm_i915_private during build. After cleanup these should go away, * migrating to the right sub-structs */ - struct intel_display display; - enum intel_pch pch_type; - - struct dram_info { - bool wm_lv_0_adjust_needed; - u8 num_channels; - bool symmetric_memory; - enum intel_dram_type { - INTEL_DRAM_UNKNOWN, - INTEL_DRAM_DDR3, - INTEL_DRAM_DDR4, - INTEL_DRAM_LPDDR3, - INTEL_DRAM_LPDDR4, - INTEL_DRAM_DDR5, - INTEL_DRAM_LPDDR5, - INTEL_DRAM_GDDR, - INTEL_DRAM_GDDR_ECC, - } type; - u8 num_qgv_points; - u8 num_psf_gv_points; - } dram_info; + struct intel_display *display; + + const struct dram_info *dram_info; /* * edram size in MB. diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules new file mode 100644 index 000000000000..3a0c4ccc4224 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -0,0 +1,2 @@ +15015404425 PLATFORM(LUNARLAKE) + PLATFORM(PANTHERLAKE) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index f7a20264ea33..346f857f3837 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -233,7 +233,7 @@ static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) struct drm_gem_object *obj = attach->importer_priv; struct xe_bo *bo = gem_to_xe_bo(obj); - XE_WARN_ON(xe_bo_evict(bo, false)); + XE_WARN_ON(xe_bo_evict(bo)); } static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = { diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 31f688e953d7..f931ff9b1ec0 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -167,7 +167,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { - u64 sz = bo->size; + u64 sz = xe_bo_size(bo); u32 mem_type = bo->ttm.resource->mem_type; xe_bo_assert_held(bo); diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index d61650d4aa0b..95242a375e54 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -9,7 +9,7 @@ #include <drm/drm_drv.h> #define DRIVER_NAME "xe" -#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DESC "Intel Xe2 Graphics" /* Interface history: * diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index e2bb156c71fb..af7916315ac6 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -258,11 +258,13 @@ static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, struct eu_stall_open_properties *props) { - if (value >= xe->info.gt_count) { + struct xe_gt *gt = xe_device_get_gt(xe, value); + + if (!gt) { drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); return -EINVAL; } - props->gt = xe_device_get_gt(xe, value); + props->gt = gt; return 0; } @@ -283,7 +285,7 @@ static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension int err; u32 idx; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -313,7 +315,7 @@ static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension, if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index b75adfc99fb7..44364c042ad7 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -176,8 +176,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (xe_exec_queue_is_parallel(q)) { - err = __copy_from_user(addresses, addresses_user, sizeof(u64) * - q->width); + err = copy_from_user(addresses, addresses_user, sizeof(u64) * + q->width); if (err) { err = -EFAULT; goto err_syncs; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index cd9b1c32f30f..8991b4aed440 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -114,7 +114,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { - struct xe_vm *vm = q->vm; int i, err; u32 flags = 0; @@ -132,32 +131,20 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) flags |= XE_LRC_CREATE_RUNALONE; } - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - return err; - } - for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_unlock; + goto err_lrc; } } - if (vm) - xe_vm_unlock(vm); - err = q->ops->init(q); if (err) goto err_lrc; return 0; -err_unlock: - if (vm) - xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); @@ -479,7 +466,7 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, int err; u32 idx; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -518,7 +505,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -618,13 +605,12 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) return -EINVAL; - err = __copy_from_user(eci, user_eci, - sizeof(struct drm_xe_engine_class_instance) * - len); + err = copy_from_user(eci, user_eci, + sizeof(struct drm_xe_engine_class_instance) * len); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) + if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) return -EINVAL; if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 4f6784e5abf8..c59a9b330697 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -49,9 +49,6 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) fw->gt = gt; spin_lock_init(&fw->lock); - /* Assuming gen11+ so assert this assumption is correct */ - xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); - if (xe->info.graphics_verx100 >= 1270) { init_domain(fw, XE_FW_DOMAIN_ID_GT, FORCEWAKE_GT, @@ -67,10 +64,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) { int i, j; - /* Assuming gen11+ so assert this assumption is correct */ - xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); - - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) init_domain(fw, XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, FORCEWAKE_ACK_RENDER); diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index ed9183599e31..6581cb0f0e59 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -18,8 +18,8 @@ " *\n" \ " * This file was generated from rules: %s\n" \ " */\n" \ - "#ifndef _GENERATED_XE_WA_OOB_\n" \ - "#define _GENERATED_XE_WA_OOB_\n" \ + "#ifndef _GENERATED_%s_\n" \ + "#define _GENERATED_%s_\n" \ "\n" \ "enum {\n" @@ -52,7 +52,7 @@ static char *strip(char *line, size_t linelen) } #define MAX_LINE_LEN 4096 -static int parse(FILE *input, FILE *csource, FILE *cheader) +static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) { char line[MAX_LINE_LEN + 1]; char *name, *prev_name = NULL, *rules; @@ -96,7 +96,7 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) } if (name) { - fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); + fprintf(cheader, "\t%s_%s = %u,\n", prefix, name, idx); /* Close previous entry before starting a new one */ if (idx) @@ -118,7 +118,33 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) if (idx) fprintf(csource, ") },\n"); - fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); + fprintf(cheader, "\t_%s_COUNT = %u\n", prefix, idx); + + return 0; +} + +static int fn_to_prefix(const char *fn, char *prefix, size_t size) +{ + size_t len; + + fn = basename(fn); + len = strlen(fn); + + if (len > size - 1) + return -ENAMETOOLONG; + + memcpy(prefix, fn, len + 1); + + for (char *p = prefix; *p; p++) { + switch (*p) { + case '.': + *p = '\0'; + return 0; + default: + *p = toupper(*p); + break; + } + } return 0; } @@ -141,6 +167,7 @@ int main(int argc, const char *argv[]) [ARGS_CHEADER] = { .fn = argv[3], .mode = "w" }, }; int ret = 1; + char prefix[128]; if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); @@ -148,6 +175,9 @@ int main(int argc, const char *argv[]) return 1; } + if (fn_to_prefix(args[ARGS_CHEADER].fn, prefix, sizeof(prefix)) < 0) + return 1; + for (int i = 0; i < _ARGS_COUNT; i++) { args[i].f = fopen(args[i].fn, args[i].mode); if (!args[i].f) { @@ -157,9 +187,10 @@ int main(int argc, const char *argv[]) } } - fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn); + fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix); + ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, - args[ARGS_CHEADER].f); + args[ARGS_CHEADER].f, prefix); if (!ret) fprintf(args[ARGS_CHEADER].f, FOOTER); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 5fcb2b4c2c13..29d4d3f51da1 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -5,6 +5,7 @@ #include "xe_ggtt.h" +#include <kunit/visibility.h> #include <linux/fault-inject.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/sizes.h> @@ -22,12 +23,13 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_printk.h" -#include "xe_gt_sriov_vf.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_res_cursor.h" #include "xe_sriov.h" +#include "xe_tile_sriov_vf.h" #include "xe_wa.h" #include "xe_wopcm.h" @@ -64,13 +66,9 @@ * give us the correct placement for free. */ -static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - u16 pat_index) +static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) { - u64 pte; - - pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pte |= XE_PAGE_PRESENT; + u64 pte = XE_PAGE_PRESENT; if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) pte |= XE_GGTT_PTE_DM; @@ -78,13 +76,12 @@ static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, return pte; } -static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - u16 pat_index) +static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) { struct xe_device *xe = xe_bo_device(bo); u64 pte; - pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index); + pte = xelp_ggtt_pte_flags(bo, pat_index); xe_assert(xe, pat_index <= 3); @@ -149,8 +146,9 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) xe_tile_assert(ggtt->tile, start < end); if (ggtt->scratch) - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, - pat_index); + scratch_pte = xe_bo_addr(ggtt->scratch, 0, XE_PAGE_SIZE) | + ggtt->pt_ops->pte_encode_flags(ggtt->scratch, + pat_index); else scratch_pte = 0; @@ -160,6 +158,22 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) } } +/** + * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile + * @tile: &xe_tile + * + * Allocates a &xe_ggtt for a given tile. + * + * Return: &xe_ggtt on success, or NULL when out of memory. + */ +struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile) +{ + struct xe_ggtt *ggtt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*ggtt), GFP_KERNEL); + if (ggtt) + ggtt->tile = tile; + return ggtt; +} + static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; @@ -176,6 +190,13 @@ static void ggtt_fini(void *arg) ggtt->scratch = NULL; } +#ifdef CONFIG_LOCKDEP +void xe_ggtt_might_lock(struct xe_ggtt *ggtt) +{ + might_lock(&ggtt->lock); +} +#endif + static void primelockdep(struct xe_ggtt *ggtt) { if (!IS_ENABLED(CONFIG_LOCKDEP)) @@ -187,20 +208,43 @@ static void primelockdep(struct xe_ggtt *ggtt) } static const struct xe_ggtt_pt_ops xelp_pt_ops = { - .pte_encode_bo = xelp_ggtt_pte_encode_bo, + .pte_encode_flags = xelp_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_ops = { - .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .pte_encode_flags = xelpg_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte, }; static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { - .pte_encode_bo = xelpg_ggtt_pte_encode_bo, + .pte_encode_flags = xelpg_ggtt_pte_flags, .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; +static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved) +{ + drm_mm_init(&ggtt->mm, reserved, + ggtt->size - reserved); + mutex_init(&ggtt->lock); + primelockdep(ggtt); +} + +int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) +{ + ggtt->size = size; + __xe_ggtt_init_early(ggtt, reserved); + return 0; +} +EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); + +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -219,7 +263,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) unsigned int gsm_size; int err; - if (IS_SRIOV_VF(xe)) + if (IS_SRIOV_VF(xe) || GRAPHICS_VERx100(xe) >= 1250) gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */ else gsm_size = probe_gsm_size(pdev); @@ -247,18 +291,18 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->pt_ops = &xelp_pt_ops; ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); - - drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), - ggtt->size - xe_wopcm_size(xe)); - mutex_init(&ggtt->lock); - primelockdep(ggtt); + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { - err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0)); + err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); if (err) return err; } @@ -365,7 +409,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) * scratch entries, rather keep the scratch page in system memory on * platforms where 64K pages are needed for VRAM. */ - flags = XE_BO_FLAG_PINNED; + flags = 0; if (ggtt->flags & XE_GGTT_FLAGS_64K) flags |= XE_BO_FLAG_SYSTEM; else @@ -377,7 +421,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) goto err; } - xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); + xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); xe_ggtt_initial_clear(ggtt); @@ -429,16 +473,17 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, } /** - * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses + * xe_ggtt_node_insert_balloon_locked - prevent allocation of specified GGTT addresses * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region * - * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. + * To be used in cases where ggtt->lock is already taken. + * Use xe_ggtt_node_remove_balloon_locked() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) +int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 end) { struct xe_ggtt *ggtt = node->ggtt; int err; @@ -447,14 +492,13 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); + lockdep_assert_held(&ggtt->lock); node->base.color = 0; node->base.start = start; node->base.size = end - start; - mutex_lock(&ggtt->lock); err = drm_mm_reserve_node(&ggtt->mm, &node->base); - mutex_unlock(&ggtt->lock); if (xe_gt_WARN(ggtt->tile->primary_gt, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", @@ -466,27 +510,72 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) } /** - * xe_ggtt_node_remove_balloon - release a reserved GGTT region + * xe_ggtt_node_remove_balloon_locked - release a reserved GGTT region * @node: the &xe_ggtt_node with reserved GGTT region * - * See xe_ggtt_node_insert_balloon() for details. + * To be used in cases where ggtt->lock is already taken. + * See xe_ggtt_node_insert_balloon_locked() for details. */ -void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) +void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node) { - if (!node || !node->ggtt) + if (!xe_ggtt_node_allocated(node)) return; - if (!drm_mm_node_allocated(&node->base)) - goto free_node; + lockdep_assert_held(&node->ggtt->lock); xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); - mutex_lock(&node->ggtt->lock); drm_mm_remove_node(&node->base); - mutex_unlock(&node->ggtt->lock); +} -free_node: - xe_ggtt_node_fini(node); +static void xe_ggtt_assert_fit(struct xe_ggtt *ggtt, u64 start, u64 size) +{ + struct xe_tile *tile = ggtt->tile; + struct xe_device *xe = tile_to_xe(tile); + u64 __maybe_unused wopcm = xe_wopcm_size(xe); + + xe_tile_assert(tile, start >= wopcm); + xe_tile_assert(tile, start + size < ggtt->size - wopcm); +} + +/** + * xe_ggtt_shift_nodes_locked - Shift GGTT nodes to adjust for a change in usable address range. + * @ggtt: the &xe_ggtt struct instance + * @shift: change to the location of area provisioned for current VF + * + * This function moves all nodes from the GGTT VM, to a temp list. These nodes are expected + * to represent allocations in range formerly assigned to current VF, before the range changed. + * When the GGTT VM is completely clear of any nodes, they are re-added with shifted offsets. + * + * The function has no ability of failing - because it shifts existing nodes, without + * any additional processing. If the nodes were successfully existing at the old address, + * they will do the same at the new one. A fail inside this function would indicate that + * the list of nodes was either already damaged, or that the shift brings the address range + * outside of valid bounds. Both cases justify an assert rather than error code. + */ +void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift) +{ + struct xe_tile *tile __maybe_unused = ggtt->tile; + struct drm_mm_node *node, *tmpn; + LIST_HEAD(temp_list_head); + + lockdep_assert_held(&ggtt->lock); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) + xe_ggtt_assert_fit(ggtt, node->start + shift, node->size); + + drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) { + drm_mm_remove_node(node); + list_add(&node->node_list, &temp_list_head); + } + + list_for_each_entry_safe(node, tmpn, &temp_list_head, node_list) { + list_del(&node->node_list); + node->start += shift; + drm_mm_reserve_node(&ggtt->mm, node); + xe_tile_assert(tile, drm_mm_node_allocated(node)); + } } /** @@ -537,12 +626,12 @@ int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) * xe_ggtt_node_init - Initialize %xe_ggtt_node struct * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. * - * This function will allocated the struct %xe_ggtt_node and return it's pointer. + * This function will allocate the struct %xe_ggtt_node and return its pointer. * This struct will then be freed after the node removal upon xe_ggtt_node_remove() - * or xe_ggtt_node_remove_balloon(). + * or xe_ggtt_node_remove_balloon_locked(). * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), - * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. + * xe_ggtt_node_insert_balloon_locked() will ensure the node is inserted or reserved in GGTT. * * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. **/ @@ -564,7 +653,7 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) * @node: the &xe_ggtt_node to be freed * * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), - * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, + * or xe_ggtt_node_insert_balloon_locked(); and this @node is not going to be reused, then, * this function needs to be called to free the %xe_ggtt_node struct **/ void xe_ggtt_node_fini(struct xe_ggtt_node *node) @@ -589,26 +678,59 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) /** * xe_ggtt_map_bo - Map the BO into GGTT * @ggtt: the &xe_ggtt where node will be mapped + * @node: the &xe_ggtt_node where this BO is mapped * @bo: the &xe_bo to be mapped + * @pat_index: Which pat_index to use. */ -void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + struct xe_bo *bo, u16 pat_index) { - u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start; - u64 offset, pte; - if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id])) + u64 start, pte, end; + struct xe_res_cursor cur; + + if (XE_WARN_ON(!node)) return; - start = bo->ggtt_node[ggtt->tile->id]->base.start; + start = node->base.start; + end = start + xe_bo_size(bo); - for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { - pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); - ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); + pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); + if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { + xe_assert(xe_bo_device(bo), bo->ttm.ttm); + + for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); + cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) + ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, + pte | xe_res_dma(&cur)); + } else { + /* Prepend GPU offset */ + pte |= vram_region_gpu_offset(bo->ttm.resource); + + for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); + cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) + ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, + pte + cur.start); } } +/** + * xe_ggtt_map_bo_unlocked - Restore a mapping of a BO into GGTT + * @ggtt: the &xe_ggtt where node will be mapped + * @bo: the &xe_bo to be mapped + * + * This is used to restore a GGTT mapping after suspend. + */ +void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; + + mutex_lock(&ggtt->lock); + xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index); + mutex_unlock(&ggtt->lock); +} + static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { @@ -621,7 +743,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (XE_WARN_ON(bo->ggtt_node[tile_id])) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); return 0; } @@ -640,12 +762,15 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, - bo->size, alignment, 0, start, end, 0); + xe_bo_size(bo), alignment, 0, start, end, 0); if (err) { xe_ggtt_node_fini(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; } else { - xe_ggtt_map_bo(ggtt, bo); + u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; + + xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index); } mutex_unlock(&ggtt->lock); @@ -698,7 +823,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); xe_ggtt_node_remove(bo->ggtt_node[tile_id], bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); @@ -841,3 +966,30 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer return total; } + +/** + * xe_ggtt_encode_pte_flags - Get PTE encoding flags for BO + * @ggtt: &xe_ggtt + * @bo: &xe_bo + * @pat_index: The pat_index for the PTE. + * + * This function returns the pte_flags for a given BO, without address. + * It's used for DPT to fill a GGTT mapped BO with a linear lookup table. + */ +u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, + struct xe_bo *bo, u16 pat_index) +{ + return ggtt->pt_ops->pte_encode_flags(bo, pat_index); +} + +/** + * xe_ggtt_read_pte - Read a PTE from the GGTT + * @ggtt: &xe_ggtt + * @offset: the offset for which the mapping should be read. + * + * Used by testcases, and by display reading out an inherited bios FB. + */ +u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) +{ + return ioread64(ggtt->gsm + (offset / XE_PAGE_SIZE)); +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 27e7d67de004..fbe1e397d05d 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -9,22 +9,28 @@ #include "xe_ggtt_types.h" struct drm_printer; +struct xe_tile; +struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile); int xe_ggtt_init_early(struct xe_ggtt *ggtt); +int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size); int xe_ggtt_init(struct xe_ggtt *ggtt); struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); void xe_ggtt_node_fini(struct xe_ggtt_node *node); -int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, - u64 start, u64 size); -void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, + u64 start, u64 size); +void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node); +void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift); int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags); void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); -void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + struct xe_bo *bo, u16 pat_index); +void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end); @@ -38,4 +44,14 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); #endif +#ifndef CONFIG_LOCKDEP +static inline void xe_ggtt_might_lock(struct xe_ggtt *ggtt) +{ } +#else +void xe_ggtt_might_lock(struct xe_ggtt *ggtt); +#endif + +u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, struct xe_bo *bo, u16 pat_index); +u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset); + #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index cb02b7994a9a..c5e999d58ff2 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -74,8 +74,8 @@ struct xe_ggtt_node { * Which can vary from platform to platform. */ struct xe_ggtt_pt_ops { - /** @pte_encode_bo: Encode PTE address for a given BO */ - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + /** @pte_encode_flags: Encode PTE flags for a given BO */ + u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); }; diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index c250ea773491..308061f0cf37 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -51,7 +51,15 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched) static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) { - drm_sched_resubmit_jobs(&sched->base); + struct drm_sched_job *s_job; + + list_for_each_entry(s_job, &sched->base.pending_list, list) { + struct drm_sched_fence *s_fence = s_job->s_fence; + struct dma_fence *hw_fence = s_fence->parent; + + if (hw_fence && !dma_fence_is_signaled(hw_fence)) + sched->base.ops->run_job(s_job); + } } static inline bool diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 0bcf97063ff6..1d84bf2f2cef 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -59,7 +59,8 @@ static int memcpy_fw(struct xe_gsc *gsc) xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); - xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); + xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, + xe_bo_size(gsc->private) - fw_size); kfree(storage); @@ -82,7 +83,8 @@ static int emit_gsc_upload(struct xe_gsc *gsc) bb->cs[bb->len++] = GSC_FW_LOAD; bb->cs[bb->len++] = lower_32_bits(offset); bb->cs[bb->len++] = upper_32_bits(offset); - bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; + bb->cs[bb->len++] = (xe_bo_size(gsc->private) / SZ_4K) | + GSC_FW_LOAD_LIMIT_VALID; job = xe_bb_create_job(gsc->q, bb); if (IS_ERR(job)) { diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index d0519cd6704a..464282a89eef 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -23,6 +23,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_tile.h" /* * GSC proxy: @@ -483,7 +484,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) } /* no multi-tile devices with this feature yet */ - if (tile->id > 0) { + if (!xe_tile_is_root(tile)) { xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 66198cf2662c..c8eda36546d3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -12,8 +12,10 @@ #include <generated/xe_wa_oob.h> +#include "instructions/xe_alu_commands.h" #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" +#include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_bb.h" @@ -110,13 +112,13 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) if (!fw_ref) return; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); xe_force_wake_put(gt_to_fw(gt), fw_ref); } @@ -144,30 +146,23 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) static void gt_reset_worker(struct work_struct *w); -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, + long timeout_jiffies) { struct xe_sched_job *job; - struct xe_bb *bb; struct dma_fence *fence; long timeout; - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); + if (IS_ERR(job)) return PTR_ERR(job); - } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); + timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies); dma_fence_put(fence); - xe_bb_free(bb, NULL); if (timeout < 0) return timeout; else if (!timeout) @@ -176,90 +171,143 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } -/* - * Convert back from encoded value to type-safe, only to be used when reg.mcr - * is true - */ -static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) { - return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; + struct xe_bb *bb; + int ret; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + ret = emit_job_sync(q, bb, HZ); + xe_bb_free(bb, NULL); + + return ret; } static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; + int count_rmw = 0, count = 0, ret; unsigned long idx; - struct xe_sched_job *job; struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count = 0; + size_t bb_len = 0; + u32 *cs; + + /* count RMW registers as those will be handled separately */ + xa_for_each(&sr->xa, idx, entry) { + if (entry->reg.masked || entry->clr_bits == ~0) + ++count; + else + ++count_rmw; + } + + if (count) + bb_len += count * 2 + 1; + + if (count_rmw) + bb_len += count_rmw * 20 + 7; if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); + /* + * Big enough to emit all of the context's 3DSTATE via + * xe_lrc_emit_hwe_state_instructions() + */ + bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + bb = xe_bb_new(gt, bb_len, false); if (IS_ERR(bb)) return PTR_ERR(bb); - xa_for_each(&sr->xa, idx, entry) - ++count; + cs = bb->cs; if (count) { - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + /* + * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per + * reg + 1 + */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; - struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); u32 val; - /* - * Skip reading the register if it's not really needed - */ if (reg.masked) val = entry->clr_bits << 16; - else if (entry->clr_bits + 1) - val = (reg.mcr ? - xe_gt_mcr_unicast_read_any(gt, reg_mcr) : - xe_mmio_read32(>->mmio, reg)) & (~entry->clr_bits); - else + else if (entry->clr_bits == ~0) val = 0; + else + continue; val |= entry->set_bits; - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; + *cs++ = reg.addr; + *cs++ = val; xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); } } - xe_lrc_emit_hwe_state_instructions(q, bb); + if (count_rmw) { + /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); + xa_for_each(&sr->xa, idx, entry) { + if (entry->reg.masked || entry->clr_bits == ~0) + continue; + + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + *cs++ = entry->reg.addr; + *cs++ = CS_GPR_REG(0, 0).addr; + + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = entry->clr_bits; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = entry->set_bits; + + *cs++ = MI_MATH(8); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1); + *cs++ = CS_ALU_INSTR_AND; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2); + *cs++ = CS_ALU_INSTR_OR; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = entry->reg.addr; + + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", + entry->reg.addr, entry->clr_bits, entry->set_bits); + } + + /* reset used GPR */ + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = 0; } - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); + cs = xe_lrc_emit_hwe_state_instructions(q, cs); + + bb->len = cs - bb->cs; + + ret = emit_job_sync(q, bb, HZ); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - return 0; + return ret; } int xe_gt_record_default_lrcs(struct xe_gt *gt) @@ -321,14 +369,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) goto put_nop_q; } - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - xe_map_memcpy_from(xe, default_lrc, &q->lrc[0]->bo->vmap, xe_lrc_pphwsp_offset(q->lrc[0]), @@ -348,6 +388,7 @@ put_exec_queue: int xe_gt_init_early(struct xe_gt *gt) { + unsigned int fw_ref; int err; if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -375,6 +416,27 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; + xe_mocs_init_early(gt); + + /* + * Only after this point can GT-specific MMIO operations + * (including things like communication with the GuC) + * be performed. + */ + xe_gt_mmio_init(gt); + + err = xe_uc_init_noalloc(>->uc); + if (err) + return err; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + + xe_gt_mcr_init_early(gt); + xe_pat_init(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } @@ -389,7 +451,7 @@ static void dump_pat_on_error(struct xe_gt *gt) xe_pat_dump(gt, &p); } -static int gt_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_gt_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -398,7 +460,15 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (!fw_ref) return -ETIMEDOUT; - if (!xe_gt_is_media_type(gt)) { + err = xe_uc_init(>->uc); + if (err) + goto err_force_wake; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); + + if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; @@ -413,8 +483,10 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_gt_mcr_init(gt); err = xe_hw_engines_init_early(gt); - if (err) + if (err) { + dump_pat_on_error(gt); goto err_force_wake; + } err = xe_hw_engine_class_sysfs_init(gt); if (err) @@ -435,13 +507,12 @@ static int gt_fw_domain_init(struct xe_gt *gt) return 0; err_force_wake: - dump_pat_on_error(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } -static int all_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_all_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -474,7 +545,7 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* * USM has its only SA pool to non-block behind user operations */ @@ -490,7 +561,7 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { struct xe_tile *tile = gt_to_tile(gt); tile->migrate = xe_migrate_init(tile); @@ -500,7 +571,7 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) goto err_force_wake; @@ -510,7 +581,7 @@ static int all_fw_domain_init(struct xe_gt *gt) xe_gt_apply_ccs_mode(gt); } - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -528,39 +599,6 @@ err_force_wake: return err; } -/* - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and - * enable CTB communication. - */ -int xe_gt_init_hwconfig(struct xe_gt *gt) -{ - unsigned int fw_ref; - int err; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return -ETIMEDOUT; - - xe_gt_mcr_init_early(gt); - xe_pat_init(gt); - - err = xe_uc_init(>->uc); - if (err) - goto out_fw; - - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto out_fw; - - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - xe_gt_enable_host_l2_vram(gt); - -out_fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return err; -} - static void xe_gt_fini(void *arg) { struct xe_gt *gt = arg; @@ -588,17 +626,15 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); + err = xe_gt_sysfs_init(gt); if (err) return err; - xe_mocs_init_early(gt); - - err = xe_gt_sysfs_init(gt); + err = gt_init_with_gt_forcewake(gt); if (err) return err; - err = gt_fw_domain_init(gt); + err = xe_gt_pagefault_init(gt); if (err) return err; @@ -612,7 +648,7 @@ int xe_gt_init(struct xe_gt *gt) xe_force_wake_init_engines(gt, gt_to_fw(gt)); - err = all_fw_domain_init(gt); + err = gt_init_with_all_forcewake(gt); if (err) return err; @@ -700,7 +736,7 @@ static int vf_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; @@ -738,11 +774,11 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) @@ -797,6 +833,9 @@ static int gt_reset(struct xe_gt *gt) goto err_out; } + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 187fa6490eaf..41880979f4de 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -24,11 +24,10 @@ extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { - return should_fail(>_reset_failure, 1); + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(>_reset_failure, 1); } struct xe_gt *xe_gt_alloc(struct xe_tile *tile); -int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); void xe_gt_mmio_init(struct xe_gt *gt); @@ -107,6 +106,11 @@ static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt) xe_device_uc_enabled(gt_to_xe(gt)); } +static inline bool xe_gt_is_main_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MAIN; +} + static inline bool xe_gt_is_media_type(struct xe_gt *gt) { return gt->info.type == XE_GT_TYPE_MEDIA; diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index f7005a3643e6..848618acdca8 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -122,24 +122,6 @@ static int powergate_info(struct xe_gt *gt, struct drm_printer *p) return ret; } -static int force_reset(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_reset_async(gt); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p) -{ - xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_reset(gt); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - static int sa_info(struct xe_gt *gt, struct drm_printer *p) { struct xe_tile *tile = gt_to_tile(gt); @@ -300,20 +282,18 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p) return 0; } -static const struct drm_info_list debugfs_list[] = { - {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, - {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, - {"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync}, +/* + * only for GT debugfs files which can be safely used on the VF as well: + * - without access to the GT privileged registers + * - without access to the PF specific data + */ +static const struct drm_info_list vf_safe_debugfs_list[] = { {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, - {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, - {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, {"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings}, - {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, - {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc}, {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, @@ -323,6 +303,87 @@ static const struct drm_info_list debugfs_list[] = { {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; +/* everything else should be added here */ +static const struct drm_info_list pf_only_debugfs_list[] = { + {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, + {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, + {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, + {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, + {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, +}; + +static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t *ppos, + void (*call)(struct xe_gt *), struct xe_gt *gt) +{ + bool yes; + int ret; + + if (*ppos) + return -EINVAL; + ret = kstrtobool_from_user(userbuf, count, &yes); + if (ret < 0) + return ret; + if (yes) + call(gt); + return count; +} + +static void force_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); + xe_gt_reset_async(gt); + xe_pm_runtime_put(xe); +} + +static ssize_t force_reset_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct xe_gt *gt = s->private; + + return write_to_gt_call(userbuf, count, ppos, force_reset, gt); +} + +static int force_reset_show(struct seq_file *s, void *unused) +{ + struct xe_gt *gt = s->private; + + force_reset(gt); /* to be deprecated! */ + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset); + +static void force_reset_sync(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); + xe_gt_reset(gt); + xe_pm_runtime_put(xe); +} + +static ssize_t force_reset_sync_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct xe_gt *gt = s->private; + + return write_to_gt_call(userbuf, count, ppos, force_reset_sync, gt); +} + +static int force_reset_sync_show(struct seq_file *s, void *unused) +{ + struct xe_gt *gt = s->private; + + force_reset_sync(gt); /* to be deprecated! */ + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset_sync); + void xe_gt_debugfs_register(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -346,10 +407,19 @@ void xe_gt_debugfs_register(struct xe_gt *gt) */ root->d_inode->i_private = gt; - drm_debugfs_create_files(debugfs_list, - ARRAY_SIZE(debugfs_list), + /* VF safe */ + debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops); + debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops); + + drm_debugfs_create_files(vf_safe_debugfs_list, + ARRAY_SIZE(vf_safe_debugfs_list), root, minor); + if (!IS_SRIOV_VF(xe)) + drm_debugfs_create_files(pf_only_debugfs_list, + ARRAY_SIZE(pf_only_debugfs_list), + root, minor); + xe_uc_debugfs_register(>->uc, root); if (IS_SRIOV_PF(xe)) diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 604bdc7c8173..60d9354e7dbf 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -32,13 +32,18 @@ * Xe's Freq provides a sysfs API for frequency management: * * device/tile#/gt#/freq0/<item>_freq *read-only* files: + * * - act_freq: The actual resolved frequency decided by PCODE. * - cur_freq: The current one requested by GuC PC to the PCODE. * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. + * - rpa_freq: The Render Performance (RP) A level, which is the achiveable one. + * Calculated by PCODE at runtime based on multiple running conditions * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. + * Calculated by PCODE at runtime based on multiple running conditions * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. * * device/tile#/gt#/freq0/<item>_freq *read-write* files: + * * - min_freq: Min frequency request. * - max_freq: Max frequency request. * If max <= min, then freq_min becomes a fixed frequency request. @@ -56,9 +61,10 @@ dev_to_xe(struct device *dev) return gt_to_xe(kobj_to_gt(dev->kobj.parent)); } -static ssize_t act_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t act_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -68,11 +74,12 @@ static ssize_t act_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(act_freq); +static struct kobj_attribute attr_act_freq = __ATTR_RO(act_freq); -static ssize_t cur_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t cur_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -85,11 +92,12 @@ static ssize_t cur_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(cur_freq); +static struct kobj_attribute attr_cur_freq = __ATTR_RO(cur_freq); -static ssize_t rp0_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rp0_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -99,11 +107,12 @@ static ssize_t rp0_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(rp0_freq); +static struct kobj_attribute attr_rp0_freq = __ATTR_RO(rp0_freq); -static ssize_t rpe_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rpe_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -113,11 +122,12 @@ static ssize_t rpe_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(rpe_freq); +static struct kobj_attribute attr_rpe_freq = __ATTR_RO(rpe_freq); -static ssize_t rpa_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rpa_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; @@ -127,20 +137,22 @@ static ssize_t rpa_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static DEVICE_ATTR_RO(rpa_freq); +static struct kobj_attribute attr_rpa_freq = __ATTR_RO(rpa_freq); -static ssize_t rpn_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t rpn_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpn_freq(pc)); } -static DEVICE_ATTR_RO(rpn_freq); +static struct kobj_attribute attr_rpn_freq = __ATTR_RO(rpn_freq); -static ssize_t min_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t min_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -154,9 +166,10 @@ static ssize_t min_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, - const char *buff, size_t count) +static ssize_t min_freq_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -173,11 +186,12 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR_RW(min_freq); +static struct kobj_attribute attr_min_freq = __ATTR_RW(min_freq); -static ssize_t max_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t max_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -191,9 +205,10 @@ static ssize_t max_freq_show(struct device *dev, return sysfs_emit(buf, "%d\n", freq); } -static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, - const char *buff, size_t count) +static ssize_t max_freq_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { + struct device *dev = kobj_to_dev(kobj); struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; ssize_t ret; @@ -210,17 +225,17 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR_RW(max_freq); +static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq); static const struct attribute *freq_attrs[] = { - &dev_attr_act_freq.attr, - &dev_attr_cur_freq.attr, - &dev_attr_rp0_freq.attr, - &dev_attr_rpa_freq.attr, - &dev_attr_rpe_freq.attr, - &dev_attr_rpn_freq.attr, - &dev_attr_min_freq.attr, - &dev_attr_max_freq.attr, + &attr_act_freq.attr, + &attr_cur_freq.attr, + &attr_rp0_freq.attr, + &attr_rpa_freq.attr, + &attr_rpe_freq.attr, + &attr_rpn_freq.attr, + &attr_min_freq.attr, + &attr_max_freq.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index fbbace7b0b12..ffb210216aa9 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -121,7 +121,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (vcs_mask || vecs_mask) gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; if (xe->info.platform != XE_DG1) { @@ -249,9 +249,10 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) return 0; } -static ssize_t name_show(struct device *dev, - struct device_attribute *attr, char *buff) +static ssize_t name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); ssize_t ret; @@ -262,11 +263,12 @@ static ssize_t name_show(struct device *dev, return ret; } -static DEVICE_ATTR_RO(name); +static struct kobj_attribute name_attr = __ATTR_RO(name); -static ssize_t idle_status_show(struct device *dev, - struct device_attribute *attr, char *buff) +static ssize_t idle_status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); enum xe_gt_idle_state state; @@ -277,6 +279,7 @@ static ssize_t idle_status_show(struct device *dev, return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } +static struct kobj_attribute idle_status_attr = __ATTR_RO(idle_status); u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle) { @@ -291,10 +294,11 @@ u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle) return residency; } -static DEVICE_ATTR_RO(idle_status); -static ssize_t idle_residency_ms_show(struct device *dev, - struct device_attribute *attr, char *buff) + +static ssize_t idle_residency_ms_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); u64 residency; @@ -305,12 +309,12 @@ static ssize_t idle_residency_ms_show(struct device *dev, return sysfs_emit(buff, "%llu\n", residency); } -static DEVICE_ATTR_RO(idle_residency_ms); +static struct kobj_attribute idle_residency_attr = __ATTR_RO(idle_residency_ms); static const struct attribute *gt_idle_attrs[] = { - &dev_attr_name.attr, - &dev_attr_idle_status.attr, - &dev_attr_idle_residency_ms.attr, + &name_attr.attr, + &idle_status_attr.attr, + &idle_residency_attr.attr, NULL, }; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 605aad3554e7..64a2f0d6aaf9 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -345,7 +345,8 @@ fallback: * Some older platforms don't have tables or don't have complete tables. * Newer platforms should always have the required info. */ - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000) + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000 && + !gt_to_xe(gt)->info.force_execlist) xe_gt_err(gt, "Slice/Subslice counts missing from hwconfig table; using typical fallback values\n"); if (gt_to_xe(gt)->info.platform == XE_PVC) @@ -419,12 +420,6 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) gt->steering[SQIDI_PSMI].instance_target = select & 0x1; } -static void init_steering_inst0(struct xe_gt *gt) -{ - gt->steering[INSTANCE0].group_target = 0; /* unused */ - gt->steering[INSTANCE0].instance_target = 0; /* unused */ -} - static const struct { const char *name; void (*init)(struct xe_gt *gt); @@ -435,7 +430,7 @@ static const struct { [DSS] = { "DSS", init_steering_dss }, [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, - [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, + [INSTANCE0] = { "INSTANCE 0", NULL }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, }; @@ -445,25 +440,17 @@ static const struct { * * Perform early software only initialization of the MCR lock to allow * the synchronization on accessing the STEER_SEMAPHORE register and - * use the xe_gt_mcr_multicast_write() function. + * use the xe_gt_mcr_multicast_write() function, plus the minimum + * safe MCR registers required for VRAM/CCS probing. */ void xe_gt_mcr_init_early(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); + BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); spin_lock_init(>->mcr_lock); -} - -/** - * xe_gt_mcr_init - Normal initialization of the MCR support - * @gt: GT structure - * - * Perform normal initialization of the MCR for all usages. - */ -void xe_gt_mcr_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); if (IS_SRIOV_VF(xe)) return; @@ -504,10 +491,27 @@ void xe_gt_mcr_init(struct xe_gt *gt) } } + /* Mark instance 0 as initialized, we need this early for VRAM and CCS probe. */ + gt->steering[INSTANCE0].initialized = true; +} + +/** + * xe_gt_mcr_init - Normal initialization of the MCR support + * @gt: GT structure + * + * Perform normal initialization of the MCR for all usages. + */ +void xe_gt_mcr_init(struct xe_gt *gt) +{ + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + /* Select non-terminated steering target for each type */ - for (int i = 0; i < NUM_STEERING_TYPES; i++) + for (int i = 0; i < NUM_STEERING_TYPES; i++) { + gt->steering[i].initialized = true; if (gt->steering[i].ranges && xe_steering_types[i].init) xe_steering_types[i].init(gt); + } } /** @@ -569,6 +573,10 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { if (xe_mmio_in_range(>->mmio, >->steering[type].ranges[i], reg)) { + drm_WARN(>_to_xe(gt)->drm, !gt->steering[type].initialized, + "Uninitialized usage of MCR register %s/%#x\n", + xe_steering_types[type].name, reg.addr); + *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; return true; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 0c22b3a36655..5a75d56d8558 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -14,6 +14,7 @@ #include "abi/guc_actions_abi.h" #include "xe_bo.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_stats.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" @@ -68,31 +69,8 @@ static bool access_is_atomic(enum access_type access_type) static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) { - return BIT(tile->id) & vma->tile_present && - !(BIT(tile->id) & vma->tile_invalidated); -} - -static bool vma_matches(struct xe_vma *vma, u64 page_addr) -{ - if (page_addr > xe_vma_end(vma) - 1 || - page_addr + SZ_4K - 1 < xe_vma_start(vma)) - return false; - - return true; -} - -static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) -{ - struct xe_vma *vma = NULL; - - if (vm->usm.last_fault_vma) { /* Fast lookup */ - if (vma_matches(vm->usm.last_fault_vma, page_addr)) - vma = vm->usm.last_fault_vma; - } - if (!vma) - vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); - - return vma; + return xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, + vma->tile_invalidated); } static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, @@ -143,7 +121,7 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, trace_xe_vma_pagefault(vma); - /* Check if VMA is valid */ + /* Check if VMA is valid, opportunistic check only */ if (vma_is_valid(tile, vma) && !atomic) return 0; @@ -180,7 +158,6 @@ retry_userptr: dma_fence_wait(fence, false); dma_fence_put(fence); - vma->tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: drm_exec_fini(&exec); @@ -231,7 +208,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) goto unlock_vm; } - vma = lookup_vma(vm, pf->page_addr); + vma = xe_vm_find_vma_by_addr(vm, pf->page_addr); if (!vma) { err = -EINVAL; goto unlock_vm; @@ -240,7 +217,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) atomic = access_is_atomic(pf->access_type); if (xe_vma_is_cpu_addr_mirror(vma)) - err = xe_svm_handle_pagefault(vm, vma, gt_to_tile(gt), + err = xe_svm_handle_pagefault(vm, vma, gt, pf->page_addr, atomic); else err = handle_vma_pagefault(gt, vma, atomic); @@ -266,22 +243,22 @@ static int send_pagefault_reply(struct xe_guc *guc, return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } -static void print_pagefault(struct xe_device *xe, struct pagefault *pf) +static void print_pagefault(struct xe_gt *gt, struct pagefault *pf) { - drm_dbg(&xe->drm, "\n\tASID: %d\n" - "\tVFID: %d\n" - "\tPDATA: 0x%04x\n" - "\tFaulted Address: 0x%08x%08x\n" - "\tFaultType: %d\n" - "\tAccessType: %d\n" - "\tFaultLevel: %d\n" - "\tEngineClass: %d %s\n" - "\tEngineInstance: %d\n", - pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), - lower_32_bits(pf->page_addr), - pf->fault_type, pf->access_type, pf->fault_level, - pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), - pf->engine_instance); + xe_gt_dbg(gt, "\n\tASID: %d\n" + "\tVFID: %d\n" + "\tPDATA: 0x%04x\n" + "\tFaulted Address: 0x%08x%08x\n" + "\tFaultType: %d\n" + "\tAccessType: %d\n" + "\tFaultLevel: %d\n" + "\tEngineClass: %d %s\n" + "\tEngineInstance: %d\n", + pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), + lower_32_bits(pf->page_addr), + pf->fault_type, pf->access_type, pf->fault_level, + pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), + pf->engine_instance); } #define PF_MSG_LEN_DW 4 @@ -333,7 +310,6 @@ static bool pf_queue_full(struct pf_queue *pf_queue) int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); struct pf_queue *pf_queue; unsigned long flags; u32 asid; @@ -358,7 +334,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) pf_queue->num_dw; queue_work(gt->usm.pf_wq, &pf_queue->worker); } else { - drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); + xe_gt_warn(gt, "PageFault Queue full, shouldn't be possible\n"); } spin_unlock_irqrestore(&pf_queue->lock, flags); @@ -371,7 +347,6 @@ static void pf_queue_work_func(struct work_struct *w) { struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); struct xe_gt *gt = pf_queue->gt; - struct xe_device *xe = gt_to_xe(gt); struct xe_guc_pagefault_reply reply = {}; struct pagefault pf = {}; unsigned long threshold; @@ -382,9 +357,9 @@ static void pf_queue_work_func(struct work_struct *w) while (get_pagefault(pf_queue, &pf)) { ret = handle_pagefault(gt, &pf); if (unlikely(ret)) { - print_pagefault(xe, &pf); + print_pagefault(gt, &pf); pf.fault_unsuccessful = 1; - drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); + xe_gt_dbg(gt, "Fault response: Unsuccessful %pe\n", ERR_PTR(ret)); } reply.dw0 = FIELD_PREP(PFR_VALID, 1) | @@ -444,6 +419,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) #define PF_MULTIPLIER 8 pf_queue->num_dw = (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); #undef PF_MULTIPLIER pf_queue->gt = gt; @@ -537,21 +513,21 @@ static int sub_granularity_in_byte(int val) return (granularity_in_byte(val) / 32); } -static void print_acc(struct xe_device *xe, struct acc *acc) +static void print_acc(struct xe_gt *gt, struct acc *acc) { - drm_warn(&xe->drm, "Access counter request:\n" - "\tType: %s\n" - "\tASID: %d\n" - "\tVFID: %d\n" - "\tEngine: %d:%d\n" - "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" - "\tSub_Granularity Vector: 0x%08x\n" - "\tVA Range base: 0x%016llx\n", - acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", - acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, - granularity_in_byte(acc->granularity) / SZ_1K, - sub_granularity_in_byte(acc->granularity) / SZ_1K, - acc->sub_granularity, acc->va_range_base); + xe_gt_warn(gt, "Access counter request:\n" + "\tType: %s\n" + "\tASID: %d\n" + "\tVFID: %d\n" + "\tEngine: %d:%d\n" + "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" + "\tSub_Granularity Vector: 0x%08x\n" + "\tVA Range base: 0x%016llx\n", + acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", + acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, + granularity_in_byte(acc->granularity) / SZ_1K, + sub_granularity_in_byte(acc->granularity) / SZ_1K, + acc->sub_granularity, acc->va_range_base); } static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) @@ -649,7 +625,6 @@ static void acc_queue_work_func(struct work_struct *w) { struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); struct xe_gt *gt = acc_queue->gt; - struct xe_device *xe = gt_to_xe(gt); struct acc acc = {}; unsigned long threshold; int ret; @@ -659,8 +634,8 @@ static void acc_queue_work_func(struct work_struct *w) while (get_acc(acc_queue, &acc)) { ret = handle_acc(gt, &acc); if (unlikely(ret)) { - print_acc(xe, &acc); - drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); + print_acc(gt, &acc); + xe_gt_warn(gt, "ACC: Unsuccessful %pe\n", ERR_PTR(ret)); } if (time_after(jiffies, threshold) && @@ -705,7 +680,7 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; queue_work(gt->usm.acc_wq, &acc_queue->worker); } else { - drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); + xe_gt_warn(gt, "ACC Queue full, dropping ACC\n"); } spin_unlock(&acc_queue->lock); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index c08efca6420e..35489fa81825 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -172,6 +172,25 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_cancel_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (cancel_work_sync(>->sriov.pf.workers.restart)) + xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); +} + +/** + * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support. + * @gt: the &xe_gt + * + * This function can only be called on the PF. + */ +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ + pf_cancel_restart(gt); +} + static void pf_restart(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f474509411c0..e2b2ff8132dc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -13,6 +13,7 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt); int xe_gt_sriov_pf_init(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) @@ -29,6 +30,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } +static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ +} + static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 10be109bf357..494909f74eb2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -104,13 +104,13 @@ static int pf_push_vf_buf_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs } if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); void *klvs = xe_guc_buf_cpu_ptr(buf); char name[8]; - xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n", - xe_sriov_function_name(vfid, name, sizeof(name)), - num_klvs, str_plural(num_klvs)); + xe_gt_sriov_dbg(gt, "pushed %s config with %u KLV%s:\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs)); xe_guc_klv_print(klvs, num_dwords, &p); } @@ -238,26 +238,35 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i } /* Return: number of configuration dwords written */ -static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +static u32 encode_ggtt(u32 *cfg, u64 start, u64 size, bool details) { u32 n = 0; - if (xe_ggtt_node_allocated(config->ggtt_region)) { - if (details) { - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region->base.start); - cfg[n++] = upper_32_bits(config->ggtt_region->base.start); - } - - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region->base.size); - cfg[n++] = upper_32_bits(config->ggtt_region->base.size); + if (details) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); + cfg[n++] = lower_32_bits(start); + cfg[n++] = upper_32_bits(start); } + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); + cfg[n++] = lower_32_bits(size); + cfg[n++] = upper_32_bits(size); + return n; } /* Return: number of configuration dwords written */ +static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +{ + struct xe_ggtt_node *node = config->ggtt_region; + + if (!xe_ggtt_node_allocated(node)) + return 0; + + return encode_ggtt(cfg, node->base.start, node->base.size, details); +} + +/* Return: number of configuration dwords written */ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) { u32 n = 0; @@ -282,8 +291,8 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool if (config->lmem_obj) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE); - cfg[n++] = lower_32_bits(config->lmem_obj->size); - cfg[n++] = upper_32_bits(config->lmem_obj->size); + cfg[n++] = lower_32_bits(xe_bo_size(config->lmem_obj)); + cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj)); } cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); @@ -332,6 +341,17 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) } xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + if (vfid == PFID) { + u64 ggtt_start = xe_wopcm_size(gt_to_xe(gt)); + u64 ggtt_size = gt_to_tile(gt)->mem.ggtt->size - ggtt_start; + + /* plain PF config data will never include a real GGTT region */ + xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true)); + + /* fake PF GGTT config covers full GGTT range except reserved WOPCM */ + num_dwords += encode_ggtt(cfg + num_dwords, ggtt_start, ggtt_size, true); + } + num_klvs = xe_guc_klv_count(cfg, num_dwords); err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); @@ -376,7 +396,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) { u64 spare; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -388,7 +408,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size) { - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -443,7 +463,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); size = round_up(size, alignment); @@ -492,7 +512,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_ggtt_node *node = config->ggtt_region; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return xe_ggtt_node_allocated(node) ? node->base.size : 0; } @@ -560,7 +580,7 @@ int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size { int err; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) @@ -622,7 +642,7 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -693,7 +713,7 @@ int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); fair = pf_estimate_fair_ggtt(gt, num_vfs); @@ -1299,7 +1319,7 @@ static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid) struct xe_bo *bo; bo = config->lmem_obj; - return bo ? bo->size : 0; + return bo ? xe_bo_size(bo) : 0; } static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1327,7 +1347,17 @@ static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 si static void pf_force_lmtt_invalidate(struct xe_device *xe) { - /* TODO */ + struct xe_lmtt *lmtt; + struct xe_tile *tile; + unsigned int tid; + + xe_assert(xe, xe_device_has_lmtt(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_invalidate_hw(lmtt); + } } static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) @@ -1388,7 +1418,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset); if (err) goto fail; - offset += bo->size; + offset += xe_bo_size(bo); } } @@ -1406,7 +1436,7 @@ fail: static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); if (config->lmem_obj) { @@ -1425,7 +1455,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) xe_gt_assert(gt, vfid); xe_gt_assert(gt, IS_DGFX(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1444,15 +1474,23 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) return 0; xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_NEEDS_2M | - XE_BO_FLAG_PINNED); + bo = xe_bo_create_locked(xe, tile, NULL, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_NEEDS_2M | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PINNED_LATE_RESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); + err = xe_bo_pin(bo); + xe_bo_unlock(bo); + if (unlikely(err)) { + xe_bo_put(bo); + return err; + } + config->lmem_obj = bo; if (xe_device_has_lmtt(xe)) { @@ -1461,12 +1499,12 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) goto release; } - err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); + err = pf_push_vf_cfg_lmem(gt, vfid, xe_bo_size(bo)); if (unlikely(err)) goto reset_lmtt; xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n", - vfid, bo->size, bo->size / SZ_1M); + vfid, xe_bo_size(bo), xe_bo_size(bo) / SZ_1M); return 0; reset_lmtt: @@ -1512,6 +1550,8 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size { int err; + xe_gt_assert(gt, xe_device_has_lmtt(gt_to_xe(gt))); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) err = pf_provision_vf_lmem(gt, vfid, size); @@ -1542,7 +1582,7 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -1619,9 +1659,9 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); - if (!IS_DGFX(gt_to_xe(gt))) + if (!xe_device_has_lmtt(gt_to_xe(gt))) return 0; mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); @@ -1653,7 +1693,7 @@ int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs); result = result ?: err; err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs); @@ -1981,7 +2021,7 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_device *xe = gt_to_xe(gt); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_release_vf_config_ggtt(gt, config); if (IS_DGFX(xe)) { pf_release_vf_config_lmem(gt, config); @@ -2072,7 +2112,7 @@ static int pf_sanitize_vf_resources(struct xe_gt *gt, u32 vfid, long timeout) * Only GGTT and LMEM requires to be cleared by the PF. * GuC doorbell IDs and context IDs do not need any clearing. */ - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_sanitize_ggtt(config->ggtt_region, vfid); if (IS_DGFX(xe)) err = pf_sanitize_lmem(tile, config->lmem_obj, timeout); @@ -2139,7 +2179,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool is_primary = !xe_gt_is_media_type(gt); + bool is_primary = xe_gt_is_main_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -2155,7 +2195,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_all = valid_all && valid_ggtt; valid_any = valid_any || (valid_ggtt && is_primary); - if (IS_DGFX(xe)) { + if (xe_device_has_lmtt(xe)) { bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid); valid_any = valid_any || (valid_lmem && is_primary); @@ -2339,7 +2379,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return -EINVAL; if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); drm_printf(&p, "restoring VF%u config:\n", vfid); xe_guc_klv_print(buf, size / sizeof(u32), &p); @@ -2356,6 +2396,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void pf_prepare_self_config(struct xe_gt *gt) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, PFID); + + /* + * We want PF to be allowed to use all of context ID, doorbells IDs + * and whole usable GGTT area. While we can store ctxs/dbs numbers + * directly in the config structure, can't do the same with the GGTT + * configuration, so let it be prepared on demand while pushing KLVs. + */ + config->num_ctxs = GUC_ID_MAX; + config->num_dbs = GUC_NUM_DOORBELLS; +} + +static int pf_push_self_config(struct xe_gt *gt) +{ + int err; + + err = pf_push_full_vf_config(gt, PFID); + if (err) { + xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n", + ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n"); + return 0; +} + static void fini_config(void *arg) { struct xe_gt *gt = arg; @@ -2379,9 +2448,18 @@ static void fini_config(void *arg) int xe_gt_sriov_pf_config_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_prepare_self_config(gt); + err = pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (err) + return err; + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); } @@ -2399,6 +2477,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt) unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); unsigned int fail = 0, skip = 0; + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) { if (xe_gt_sriov_pf_config_is_empty(gt, n)) skip++; @@ -2542,10 +2624,10 @@ int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p) if (!config->lmem_obj) continue; - string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2, + string_get_size(xe_bo_size(config->lmem_obj), 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "VF%u:\t%zu\t(%s)\n", - n, config->lmem_obj->size, buf); + n, xe_bo_size(config->lmem_obj), buf); } mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 1f50aec3a059..4f7fff892bc0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -15,10 +15,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_monitor.h" -#include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" +#include "xe_sriov_pf_service.h" +#include "xe_tile.h" static const char *control_cmd_to_string(u32 cmd) { @@ -1064,7 +1065,9 @@ static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) return false; - xe_gt_sriov_pf_service_reset(gt, vfid); + if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) + xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); + xe_gt_sriov_pf_monitor_flr(gt, vfid); pf_enter_vf_flr_reset_mmio(gt, vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index b2521dd6ec42..bf679b21f485 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -51,27 +51,18 @@ static unsigned int extract_vfid(struct dentry *d) * /sys/kernel/debug/dri/0/ * ├── gt0 * │  ├── pf - * │  │  ├── ggtt_available - * │  │  ├── ggtt_provisioned * │  │  ├── contexts_provisioned * │  │  ├── doorbells_provisioned * │  │  ├── runtime_registers * │  │  ├── negotiated_versions * │  │  ├── adverse_events + * ├── gt1 + * │  ├── pf + * │  │  ├── ... */ static const struct drm_info_list pf_info[] = { { - "ggtt_available", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_available_ggtt, - }, - { - "ggtt_provisioned", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_ggtt, - }, - { "contexts_provisioned", .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_config_print_ctxs, @@ -82,24 +73,50 @@ static const struct drm_info_list pf_info[] = { .data = xe_gt_sriov_pf_config_print_dbs, }, { - "lmem_provisioned", + "runtime_registers", .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_config_print_lmem, + .data = xe_gt_sriov_pf_service_print_runtime, }, { - "runtime_registers", + "adverse_events", .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_service_print_runtime, + .data = xe_gt_sriov_pf_monitor_print_events, + }, +}; + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │  ├── pf + * │  │  ├── ggtt_available + * │  │  ├── ggtt_provisioned + */ + +static const struct drm_info_list pf_ggtt_info[] = { + { + "ggtt_available", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_available_ggtt, }, { - "negotiated_versions", + "ggtt_provisioned", .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_service_print_version, + .data = xe_gt_sriov_pf_config_print_ggtt, }, +}; + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │  ├── pf + * │  │  ├── lmem_provisioned + */ + +static const struct drm_info_list pf_lmem_info[] = { { - "adverse_events", + "lmem_provisioned", .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_monitor_print_events, + .data = xe_gt_sriov_pf_config_print_lmem, }, }; @@ -283,10 +300,10 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne xe_gt_assert(gt, gt == extract_gt(parent)); xe_gt_assert(gt, vfid == extract_vfid(parent)); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", 0644, parent, parent, &ggtt_fops); - if (IS_DGFX(gt_to_xe(gt))) + if (xe_device_has_lmtt(gt_to_xe(gt))) debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare", 0644, parent, parent, &lmem_fops); } @@ -532,6 +549,16 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) pfdentry->d_inode->i_private = gt; drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); + if (xe_gt_is_main_type(gt)) { + drm_debugfs_create_files(pf_ggtt_info, + ARRAY_SIZE(pf_ggtt_info), + pfdentry, minor); + if (xe_device_has_lmtt(gt_to_xe(gt))) + drm_debugfs_create_files(pf_lmem_info, + ARRAY_SIZE(pf_lmem_info), + pfdentry, minor); + } + pf_add_policy_attrs(gt, pfdentry); pf_add_config_attrs(gt, pfdentry, PFID); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 4efde5f46b43..76dd9233ef9f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -19,91 +19,7 @@ #include "xe_gt_sriov_pf_service_types.h" #include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" - -static void pf_init_versions(struct xe_gt *gt) -{ - BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); - BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); - - /* base versions may differ between platforms */ - gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; - gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; - - /* latest version is same for all platforms */ - gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; - gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_negotiate_version(struct xe_gt *gt, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base; - struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest; - - xe_gt_assert(gt, base.major); - xe_gt_assert(gt, base.major <= latest.major); - xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor)); - - /* VF doesn't care - return our latest */ - if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && - wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants newer than our - return our latest */ - if (wanted_major > latest.major) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants older than min required - reject */ - if (wanted_major < base.major || - (wanted_major == base.major && wanted_minor < base.minor)) { - return -EPERM; - } - - /* previous major - return wanted, as we should still support it */ - if (wanted_major < latest.major) { - /* XXX: we are not prepared for multi-versions yet */ - xe_gt_assert(gt, base.major == latest.major); - return -ENOPKG; - } - - /* same major - return common minor */ - *major = wanted_major; - *minor = min_t(u32, latest.minor, wanted_minor); - return 0; -} - -static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - xe_gt_assert(gt, major || minor); - - gt->sriov.pf.vfs[vfid].version.major = major; - gt->sriov.pf.vfs[vfid].version.minor = minor; -} - -static void pf_disconnect(struct xe_gt *gt, u32 vfid) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - gt->sriov.pf.vfs[vfid].version.major = 0; - gt->sriov.pf.vfs[vfid].version.minor = 0; -} - -static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - return major == gt->sriov.pf.vfs[vfid].version.major && - minor <= gt->sriov.pf.vfs[vfid].version.minor; -} +#include "xe_sriov_pf_service.h" static const struct xe_reg tgl_runtime_regs[] = { RPM_CONFIG0, /* _MMIO(0x0d00) */ @@ -112,7 +28,6 @@ static const struct xe_reg tgl_runtime_regs[] = { XELP_GT_SLICE_ENABLE, /* _MMIO(0x9138) */ XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -124,7 +39,6 @@ static const struct xe_reg ats_m_runtime_regs[] = { XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -136,7 +50,6 @@ static const struct xe_reg pvc_runtime_regs[] = { GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ - CTC_MODE, /* _MMIO(0xA26C) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -150,7 +63,6 @@ static const struct xe_reg ver_1270_runtime_regs[] = { GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -167,7 +79,6 @@ static const struct xe_reg ver_2000_runtime_regs[] = { XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -185,7 +96,6 @@ static const struct xe_reg ver_3000_runtime_regs[] = { XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ - CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ }; @@ -272,7 +182,7 @@ static void pf_prepare_runtime_info(struct xe_gt *gt) read_many(gt, size, regs, values); if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); xe_gt_sriov_pf_service_print_runtime(gt, &p); } @@ -291,8 +201,6 @@ int xe_gt_sriov_pf_service_init(struct xe_gt *gt) { int err; - pf_init_versions(gt); - err = pf_alloc_runtime_info(gt); if (unlikely(err)) goto failed; @@ -317,47 +225,6 @@ void xe_gt_sriov_pf_service_update(struct xe_gt *gt) pf_prepare_runtime_info(gt); } -/** - * xe_gt_sriov_pf_service_reset - Reset a connection with the VF. - * @gt: the &xe_gt - * @vfid: the VF identifier - * - * Reset a VF driver negotiated VF/PF ABI version. - * After that point, the VF driver will have to perform new version handshake - * to continue use of the PF services again. - * - * This function can only be called on PF. - */ -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid) -{ - pf_disconnect(gt, vfid); -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_process_handshake(struct xe_gt *gt, u32 vfid, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - int err; - - xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n", - vfid, wanted_major, wanted_minor); - - err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor); - - if (err < 0) { - xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n", - vfid, wanted_major, wanted_minor, ERR_PTR(err)); - pf_disconnect(gt, vfid); - } else { - xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n", - vfid, *major, *minor); - pf_connect(gt, vfid, *major, *minor); - } - - return 0; -} - /* Return: length of the response message or a negative error code on failure. */ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, const u32 *request, u32 len, u32 *response, u32 size) @@ -377,7 +244,8 @@ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]); wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]); - err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor); + err = xe_sriov_pf_service_handshake_vf(gt_to_xe(gt), origin, wanted_major, wanted_minor, + &major, &minor); if (err < 0) return err; @@ -436,8 +304,10 @@ static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin, u32 remaining = 0; int ret; - if (!pf_is_negotiated(gt, origin, 1, 0)) + /* this action is available from ABI 1.0 */ + if (!xe_sriov_pf_service_is_negotiated(gt_to_xe(gt), origin, 1, 0)) return -EACCES; + if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) return -EMSGSIZE; if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) @@ -534,33 +404,3 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p return 0; } - -/** - * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs. - * @gt: the &xe_gt - * @p: the &drm_printer - * - * This function is for PF use only. - */ -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); - struct xe_gt_sriov_pf_service_version *version; - - xe_gt_assert(gt, IS_SRIOV_PF(xe)); - - for (n = 1; n <= total_vfs; n++) { - version = >->sriov.pf.vfs[n].version; - if (!version->major && !version->minor) - continue; - - drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor); - } - - return 0; -} - -#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) -#include "tests/xe_gt_sriov_pf_service_test.c" -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h index 56aaadf0360d..10b02c9b651c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h @@ -14,9 +14,7 @@ struct xe_gt; int xe_gt_sriov_pf_service_init(struct xe_gt *gt); void xe_gt_sriov_pf_service_update(struct xe_gt *gt); -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid); -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index a439261bf4d7..b282838d59e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -82,17 +82,17 @@ int xe_gt_sriov_vf_reset(struct xe_gt *gt) } static int guc_action_match_version(struct xe_guc *guc, - u32 wanted_branch, u32 wanted_major, u32 wanted_minor, - u32 *branch, u32 *major, u32 *minor, u32 *patch) + struct xe_uc_fw_version *wanted, + struct xe_uc_fw_version *found) { u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = { FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_MATCH_VERSION), - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted_branch) | - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted_major) | - FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted_minor), + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted->branch) | + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted->major) | + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted->minor), }; u32 response[GUC_MAX_MMIO_MSG_LEN]; int ret; @@ -106,120 +106,138 @@ static int guc_action_match_version(struct xe_guc *guc, if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0]))) return -EPROTO; - *branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); - *major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); - *minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); - *patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); + memset(found, 0, sizeof(struct xe_uc_fw_version)); + found->branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); + found->major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); + found->minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); + found->patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); return 0; } -static void vf_minimum_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) +static int guc_action_match_version_any(struct xe_guc *guc, + struct xe_uc_fw_version *found) +{ + struct xe_uc_fw_version wanted = { + .branch = GUC_VERSION_BRANCH_ANY, + .major = GUC_VERSION_MAJOR_ANY, + .minor = GUC_VERSION_MINOR_ANY, + .patch = 0 + }; + + return guc_action_match_version(guc, &wanted, found); +} + +static void vf_minimum_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) { struct xe_device *xe = gt_to_xe(gt); + memset(ver, 0, sizeof(struct xe_uc_fw_version)); + switch (xe->info.platform) { case XE_TIGERLAKE ... XE_PVC: /* 1.1 this is current baseline for Xe driver */ - *branch = 0; - *major = 1; - *minor = 1; + ver->branch = 0; + ver->major = 1; + ver->minor = 1; break; default: /* 1.2 has support for the GMD_ID KLV */ - *branch = 0; - *major = 1; - *minor = 2; + ver->branch = 0; + ver->major = 1; + ver->minor = 2; break; } } -static void vf_wanted_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) +static void vf_wanted_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) { /* for now it's the same as minimum */ - return vf_minimum_guc_version(gt, branch, major, minor); + return vf_minimum_guc_version(gt, ver); } static int vf_handshake_with_guc(struct xe_gt *gt) { - struct xe_gt_sriov_vf_guc_version *guc_version = >->sriov.vf.guc_version; + struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; + struct xe_uc_fw_version wanted = {0}; struct xe_guc *guc = >->uc.guc; - u32 wanted_branch, wanted_major, wanted_minor; - u32 branch, major, minor, patch; + bool old = false; int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); /* select wanted version - prefer previous (if any) */ if (guc_version->major || guc_version->minor) { - wanted_branch = guc_version->branch; - wanted_major = guc_version->major; - wanted_minor = guc_version->minor; + wanted = *guc_version; + old = true; } else { - vf_wanted_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); - xe_gt_assert(gt, wanted_major != GUC_VERSION_MAJOR_ANY); + vf_wanted_guc_version(gt, &wanted); + xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); + + /* First time we handshake, so record the minimum wanted */ + gt->sriov.vf.wanted_guc_version = wanted; } - err = guc_action_match_version(guc, wanted_branch, wanted_major, wanted_minor, - &branch, &major, &minor, &patch); + err = guc_action_match_version(guc, &wanted, guc_version); if (unlikely(err)) goto fail; - /* we don't support interface version change */ - if ((guc_version->major || guc_version->minor) && - (guc_version->branch != branch || guc_version->major != major || - guc_version->minor != minor)) { - xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", - branch, major, minor, patch); - xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", - guc_version->branch, guc_version->major, - guc_version->minor, guc_version->patch); - err = -EREMCHG; - goto fail; + if (old) { + /* we don't support interface version change */ + if (MAKE_GUC_VER_STRUCT(*guc_version) != MAKE_GUC_VER_STRUCT(wanted)) { + xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch); + xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", + wanted.branch, wanted.major, + wanted.minor, wanted.patch); + err = -EREMCHG; + goto fail; + } else { + /* version is unchanged, no need to re-verify it */ + return 0; + } } /* illegal */ - if (major > wanted_major) { + if (guc_version->major > wanted.major) { err = -EPROTO; goto unsupported; } /* there's no fallback on major version. */ - if (major != wanted_major) { + if (guc_version->major != wanted.major) { err = -ENOPKG; goto unsupported; } /* check against minimum version supported by us */ - vf_minimum_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); - xe_gt_assert(gt, major != GUC_VERSION_MAJOR_ANY); - if (major < wanted_major || (major == wanted_major && minor < wanted_minor)) { + vf_minimum_guc_version(gt, &wanted); + xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); + if (MAKE_GUC_VER_STRUCT(*guc_version) < MAKE_GUC_VER_STRUCT(wanted)) { err = -ENOKEY; goto unsupported; } xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n", - branch, major, minor, patch); + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch); - guc_version->branch = branch; - guc_version->major = major; - guc_version->minor = minor; - guc_version->patch = patch; return 0; unsupported: xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n", - branch, major, minor, patch, ERR_PTR(err)); + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch, + ERR_PTR(err)); fail: xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n", - wanted_major, wanted_minor, ERR_PTR(err)); + wanted.major, wanted.minor, ERR_PTR(err)); /* try again with *any* just to query which version is supported */ - if (!guc_action_match_version(guc, GUC_VERSION_BRANCH_ANY, - GUC_VERSION_MAJOR_ANY, GUC_VERSION_MINOR_ANY, - &branch, &major, &minor, &patch)) + if (!guc_action_match_version_any(guc, &wanted)) xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n", - branch, major, minor, patch); + wanted.branch, wanted.major, wanted.minor, wanted.patch); return err; } @@ -250,6 +268,29 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) return 0; } +/** + * xe_gt_sriov_vf_guc_versions - Minimum required and found GuC ABI versions + * @gt: the &xe_gt + * @wanted: pointer to the xe_uc_fw_version to be filled with the wanted version + * @found: pointer to the xe_uc_fw_version to be filled with the found version + * + * This function is for VF use only and it can only be used after successful + * version handshake with the GuC. + */ +void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, + struct xe_uc_fw_version *wanted, + struct xe_uc_fw_version *found) +{ + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); + + if (wanted) + *wanted = gt->sriov.vf.wanted_guc_version; + + if (found) + *found = gt->sriov.vf.guc_version; +} + static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) { u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { @@ -415,6 +456,7 @@ static int vf_get_ggtt_info(struct xe_gt *gt) xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n", start, start + size - 1, size / SZ_1K); + config->ggtt_shift = start - (s64)config->ggtt_base; config->ggtt_base = start; config->ggtt_size = size; @@ -510,7 +552,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt) if (unlikely(err)) return err; - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { err = vf_get_lmem_info(gt); if (unlikely(err)) return err; @@ -560,106 +602,56 @@ u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt) return gt->sriov.vf.self_config.lmem_size; } -static struct xe_ggtt_node * -vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) -{ - struct xe_ggtt_node *node; - int err; - - node = xe_ggtt_node_init(ggtt); - if (IS_ERR(node)) - return node; - - err = xe_ggtt_node_insert_balloon(node, start, end); - if (err) { - xe_ggtt_node_fini(node); - return ERR_PTR(err); - } - - return node; -} - -static int vf_balloon_ggtt(struct xe_gt *gt) +/** + * xe_gt_sriov_vf_ggtt - VF GGTT configuration. + * @gt: the &xe_gt + * + * This function is for VF use only. + * + * Return: size of the GGTT assigned to VF. + */ +u64 xe_gt_sriov_vf_ggtt(struct xe_gt *gt) { - struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; - struct xe_tile *tile = gt_to_tile(gt); - struct xe_ggtt *ggtt = tile->mem.ggtt; - struct xe_device *xe = gt_to_xe(gt); - u64 start, end; - - xe_gt_assert(gt, IS_SRIOV_VF(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - - if (!config->ggtt_size) - return -ENODATA; - - /* - * VF can only use part of the GGTT as allocated by the PF: - * - * WOPCM GUC_GGTT_TOP - * |<------------ Total GGTT size ------------------>| - * - * VF GGTT base -->|<- size ->| - * - * +--------------------+----------+-----------------+ - * |////////////////////| block |\\\\\\\\\\\\\\\\\| - * +--------------------+----------+-----------------+ - * - * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| - */ - - start = xe_wopcm_size(xe); - end = config->ggtt_base; - if (end != start) { - tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) - return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); - } - - start = config->ggtt_base + config->ggtt_size; - end = GUC_GGTT_TOP; - if (end != start) { - tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); - return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); - } - } + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); + xe_gt_assert(gt, gt->sriov.vf.self_config.ggtt_size); - return 0; + return gt->sriov.vf.self_config.ggtt_size; } -static void deballoon_ggtt(struct drm_device *drm, void *arg) +/** + * xe_gt_sriov_vf_ggtt_base - VF GGTT base offset. + * @gt: the &xe_gt + * + * This function is for VF use only. + * + * Return: base offset of the GGTT assigned to VF. + */ +u64 xe_gt_sriov_vf_ggtt_base(struct xe_gt *gt) { - struct xe_tile *tile = arg; + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); + xe_gt_assert(gt, gt->sriov.vf.self_config.ggtt_size); - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); + return gt->sriov.vf.self_config.ggtt_base; } /** - * xe_gt_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. - * @gt: the &xe_gt + * xe_gt_sriov_vf_ggtt_shift - Return shift in GGTT range due to VF migration + * @gt: the &xe_gt struct instance * * This function is for VF use only. * - * Return: 0 on success or a negative error code on failure. + * Return: The shift value; could be negative */ -int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt) +s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt) { - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); - int err; - - if (xe_gt_is_media_type(gt)) - return 0; + struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; - err = vf_balloon_ggtt(gt); - if (err) - return err; + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); - return drmm_add_action_or_reset(&xe->drm, deballoon_ggtt, tile); + return config->ggtt_shift; } static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) @@ -694,21 +686,22 @@ static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) return 0; } -static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor) +static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_assert(xe, IS_SRIOV_VF(xe)); - gt->sriov.vf.pf_version.major = major; - gt->sriov.vf.pf_version.minor = minor; + xe->sriov.vf.pf_version.major = major; + xe->sriov.vf.pf_version.minor = minor; } -static void vf_disconnect_pf(struct xe_gt *gt) +static void vf_disconnect_pf(struct xe_device *xe) { - vf_connect_pf(gt, 0, 0); + vf_connect_pf(xe, 0, 0); } static int vf_handshake_with_pf(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR; u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR; u32 major = major_wanted, minor = minor_wanted; @@ -724,13 +717,13 @@ static int vf_handshake_with_pf(struct xe_gt *gt) } xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor); - vf_connect_pf(gt, major, minor); + vf_connect_pf(xe, major, minor); return 0; failed: xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n", major, minor, ERR_PTR(err)); - vf_disconnect_pf(gt); + vf_disconnect_pf(xe); return err; } @@ -783,10 +776,12 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + struct xe_device *xe = gt_to_xe(gt); - return major == gt->sriov.vf.pf_version.major && - minor <= gt->sriov.vf.pf_version.minor; + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + return major == xe->sriov.vf.pf_version.major && + minor <= xe->sriov.vf.pf_version.minor; } static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs) @@ -974,7 +969,6 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) struct vf_runtime_reg *rr; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.pf_version.major); xe_gt_assert(gt, !reg.vf); if (reg.addr == GMD_ID.addr) { @@ -1043,7 +1037,9 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) string_get_size(config->ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "GGTT size:\t%llu (%s)\n", config->ggtt_size, buf); - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + drm_printf(p, "GGTT shift on last restore:\t%lld\n", config->ggtt_shift); + + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); } @@ -1079,19 +1075,21 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) */ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt_sriov_vf_guc_version *guc_version = >->sriov.vf.guc_version; - struct xe_gt_sriov_vf_relay_version *pf_version = >->sriov.vf.pf_version; - u32 branch, major, minor; + struct xe_device *xe = gt_to_xe(gt); + struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; + struct xe_uc_fw_version *wanted = >->sriov.vf.wanted_guc_version; + struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version; + struct xe_uc_fw_version ver; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); drm_printf(p, "GuC ABI:\n"); - vf_minimum_guc_version(gt, &branch, &major, &minor); - drm_printf(p, "\tbase:\t%u.%u.%u.*\n", branch, major, minor); + vf_minimum_guc_version(gt, &ver); + drm_printf(p, "\tbase:\t%u.%u.%u.*\n", ver.branch, ver.major, ver.minor); - vf_wanted_guc_version(gt, &branch, &major, &minor); - drm_printf(p, "\twanted:\t%u.%u.%u.*\n", branch, major, minor); + drm_printf(p, "\twanted:\t%u.%u.%u.*\n", + wanted->branch, wanted->major, wanted->minor); drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n", guc_version->branch, guc_version->major, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index ba6c5d74e326..e0357f341a2d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -11,19 +11,26 @@ struct drm_printer; struct xe_gt; struct xe_reg; +struct xe_uc_fw_version; int xe_gt_sriov_vf_reset(struct xe_gt *gt); int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt); +void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, + struct xe_uc_fw_version *wanted, + struct xe_uc_fw_version *found); int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); -int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt); +u64 xe_gt_sriov_vf_ggtt(struct xe_gt *gt); +u64 xe_gt_sriov_vf_ggtt_base(struct xe_gt *gt); +s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt); + u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index a57f13b5afcd..298dedf4b009 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -7,30 +7,7 @@ #define _XE_GT_SRIOV_VF_TYPES_H_ #include <linux/types.h> - -/** - * struct xe_gt_sriov_vf_guc_version - GuC ABI version details. - */ -struct xe_gt_sriov_vf_guc_version { - /** @branch: branch version. */ - u8 branch; - /** @major: major version. */ - u8 major; - /** @minor: minor version. */ - u8 minor; - /** @patch: patch version. */ - u8 patch; -}; - -/** - * struct xe_gt_sriov_vf_relay_version - PF ABI version details. - */ -struct xe_gt_sriov_vf_relay_version { - /** @major: major version. */ - u16 major; - /** @minor: minor version. */ - u16 minor; -}; +#include "xe_uc_fw_types.h" /** * struct xe_gt_sriov_vf_selfconfig - VF configuration data. @@ -40,6 +17,8 @@ struct xe_gt_sriov_vf_selfconfig { u64 ggtt_base; /** @ggtt_size: assigned size of the GGTT region. */ u64 ggtt_size; + /** @ggtt_shift: difference in ggtt_base on last migration */ + s64 ggtt_shift; /** @lmem_size: assigned size of the LMEM. */ u64 lmem_size; /** @num_ctxs: assigned number of GuC submission context IDs. */ @@ -71,12 +50,12 @@ struct xe_gt_sriov_vf_runtime { * struct xe_gt_sriov_vf - GT level VF virtualization data. */ struct xe_gt_sriov_vf { + /** @wanted_guc_version: minimum wanted GuC ABI version. */ + struct xe_uc_fw_version wanted_guc_version; /** @guc_version: negotiated GuC ABI version. */ - struct xe_gt_sriov_vf_guc_version guc_version; + struct xe_uc_fw_version guc_version; /** @self_config: resource configurations. */ struct xe_gt_sriov_vf_selfconfig self_config; - /** @pf_version: negotiated VF/PF ABI version. */ - struct xe_gt_sriov_vf_relay_version pf_version; /** @runtime: runtime data retrieved from the PF. */ struct xe_gt_sriov_vf_runtime runtime; }; diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 6155ea354432..30f942671c2b 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -27,6 +27,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) } static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { + "svm_pagefault_count", "tlb_inval_count", "vma_pagefault_count", "vma_pagefault_kb", diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index d556771f99d6..be3244d7133c 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -7,6 +7,7 @@ #define _XE_GT_STATS_TYPES_H_ enum xe_gt_stats_id { + XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, XE_GT_STATS_ID_TLB_INVAL, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index 8db78d616b6f..aa962c783cdf 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -114,115 +114,115 @@ static u32 read_reason_vr_tdc(struct xe_gt *gt) return tdc; } -static ssize_t status_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool status = !!read_status(gt); return sysfs_emit(buff, "%u\n", status); } -static DEVICE_ATTR_RO(status); +static struct kobj_attribute attr_status = __ATTR_RO(status); -static ssize_t reason_pl1_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_pl1_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool pl1 = !!read_reason_pl1(gt); return sysfs_emit(buff, "%u\n", pl1); } -static DEVICE_ATTR_RO(reason_pl1); +static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1); -static ssize_t reason_pl2_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_pl2_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool pl2 = !!read_reason_pl2(gt); return sysfs_emit(buff, "%u\n", pl2); } -static DEVICE_ATTR_RO(reason_pl2); +static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2); -static ssize_t reason_pl4_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_pl4_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool pl4 = !!read_reason_pl4(gt); return sysfs_emit(buff, "%u\n", pl4); } -static DEVICE_ATTR_RO(reason_pl4); +static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4); -static ssize_t reason_thermal_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_thermal_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool thermal = !!read_reason_thermal(gt); return sysfs_emit(buff, "%u\n", thermal); } -static DEVICE_ATTR_RO(reason_thermal); +static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal); -static ssize_t reason_prochot_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_prochot_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool prochot = !!read_reason_prochot(gt); return sysfs_emit(buff, "%u\n", prochot); } -static DEVICE_ATTR_RO(reason_prochot); +static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot); -static ssize_t reason_ratl_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_ratl_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool ratl = !!read_reason_ratl(gt); return sysfs_emit(buff, "%u\n", ratl); } -static DEVICE_ATTR_RO(reason_ratl); +static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl); -static ssize_t reason_vr_thermalert_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_vr_thermalert_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool thermalert = !!read_reason_vr_thermalert(gt); return sysfs_emit(buff, "%u\n", thermalert); } -static DEVICE_ATTR_RO(reason_vr_thermalert); +static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert); -static ssize_t reason_vr_tdc_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t reason_vr_tdc_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { + struct device *dev = kobj_to_dev(kobj); struct xe_gt *gt = dev_to_gt(dev); bool tdc = !!read_reason_vr_tdc(gt); return sysfs_emit(buff, "%u\n", tdc); } -static DEVICE_ATTR_RO(reason_vr_tdc); +static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc); static struct attribute *throttle_attrs[] = { - &dev_attr_status.attr, - &dev_attr_reason_pl1.attr, - &dev_attr_reason_pl2.attr, - &dev_attr_reason_pl4.attr, - &dev_attr_reason_thermal.attr, - &dev_attr_reason_prochot.attr, - &dev_attr_reason_ratl.attr, - &dev_attr_reason_vr_thermalert.attr, - &dev_attr_reason_vr_tdc.attr, + &attr_status.attr, + &attr_reason_pl1.attr, + &attr_reason_pl2.attr, + &attr_reason_pl4.attr, + &attr_reason_thermal.attr, + &attr_reason_prochot.attr, + &attr_reason_ratl.attr, + &attr_reason_vr_thermalert.attr, + &attr_reason_vr_tdc.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 084cbdeba8ea..086c12ee3d9d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -138,6 +138,14 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) int pending_seqno; /* + * we can get here before the CTs are even initialized if we're wedging + * very early, in which case there are not going to be any pending + * fences so we can bail immediately. + */ + if (!xe_guc_ct_initialized(>->uc.guc.ct)) + return; + + /* * CT channel is already disabled at this point. No new TLB requests can * appear. */ @@ -322,6 +330,40 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +static int send_tlb_invalidation_all(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) +{ + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION_ALL, + 0, /* seqno, replaced in send_tlb_invalidation */ + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); +} + +/** + * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs. + * @gt: the &xe_gt structure + * @fence: the &xe_gt_tlb_invalidation_fence to be signaled on completion + * + * Send a request to invalidate all TLBs across PF and all VFs. + * + * Return: 0 on success, negative error code on error + */ +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence) +{ + int err; + + xe_gt_assert(gt, gt == fence->gt); + + err = send_tlb_invalidation_all(gt, fence); + if (err) + xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err)); + + return err; +} + /* * Ensure that roundup_pow_of_two(length) doesn't overflow. * Note that roundup_pow_of_two() operates on unsigned long, @@ -441,30 +483,6 @@ void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm) } /** - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA - * @gt: GT structure - * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL - * @vma: VMA to invalidate - * - * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can use - * the invalidation fence to wait for completion. - * - * Return: Negative error code on error, 0 on success - */ -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma) -{ - xe_gt_assert(gt, vma); - - return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); -} - -/** * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * @guc: guc * @msg: message indicating TLB invalidation done diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index abe9b03d543e..f7f0f2eaf4b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -19,10 +19,8 @@ int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma); void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence); int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 516c81e3b8dd..8c63e3263643 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -12,23 +12,20 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_mmio.h" #include "xe_wa.h" -static void -load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) +static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, + const struct xe_reg regs[]) { - va_list argp; u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; int i; - if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS)) - numregs = XE_MAX_DSS_FUSE_REGS; + xe_gt_assert(gt, numregs <= ARRAY_SIZE(fuse_val)); - va_start(argp, numregs); for (i = 0; i < numregs; i++) - fuse_val[i] = xe_mmio_read32(>->mmio, va_arg(argp, struct xe_reg)); - va_end(argp); + fuse_val[i] = xe_mmio_read32(>->mmio, regs[i]); bitmap_from_arr32(mask, fuse_val, numregs * 32); } @@ -218,9 +215,19 @@ get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) void xe_gt_topology_init(struct xe_gt *gt) { + static const struct xe_reg geometry_regs[] = { + XELP_GT_GEOMETRY_DSS_ENABLE, + XE2_GT_GEOMETRY_DSS_1, + XE2_GT_GEOMETRY_DSS_2, + }; + static const struct xe_reg compute_regs[] = { + XEHP_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, + XE2_GT_COMPUTE_DSS_2, + }; + int num_geometry_regs, num_compute_regs; struct xe_device *xe = gt_to_xe(gt); struct drm_printer p; - int num_geometry_regs, num_compute_regs; get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); @@ -228,23 +235,18 @@ xe_gt_topology_init(struct xe_gt *gt) * Register counts returned shouldn't exceed the number of registers * passed as parameters below. */ - drm_WARN_ON(&xe->drm, num_geometry_regs > 3); - drm_WARN_ON(&xe->drm, num_compute_regs > 3); + xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs)); + xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs)); load_dss_mask(gt, gt->fuse_topo.g_dss_mask, - num_geometry_regs, - XELP_GT_GEOMETRY_DSS_ENABLE, - XE2_GT_GEOMETRY_DSS_1, - XE2_GT_GEOMETRY_DSS_2); - load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, - XEHP_GT_COMPUTE_DSS_ENABLE, - XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, - XE2_GT_COMPUTE_DSS_2); + num_geometry_regs, geometry_regs); + load_dss_mask(gt, gt->fuse_topo.c_dss_mask, + num_compute_regs, compute_regs); + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); - p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); - + p = xe_gt_dbg_printer(gt); xe_gt_topology_dump(gt, &p); } @@ -288,11 +290,6 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } -bool xe_dss_mask_empty(const xe_dss_mask_t mask) -{ - return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); -} - /** * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant * @gt: GT to check diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index a72d26ba0653..c8140704ad4c 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -41,8 +41,6 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask) unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); -bool xe_dss_mask_empty(const xe_dss_mask_t mask); - bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 7def0959da35..96344c604726 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -377,6 +377,8 @@ struct xe_gt { u16 group_target; /** @steering.instance_target: instance to steer accesses to */ u16 instance_target; + /** @steering.initialized: Whether this steering range is initialized */ + bool initialized; } steering[NUM_STEERING_TYPES]; /** diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index bc5714a5b36b..b1d1d6da3758 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -29,6 +29,7 @@ #include "xe_guc_db_mgr.h" #include "xe_guc_engine_activity.h" #include "xe_guc_hwconfig.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" #include "xe_guc_relay.h" @@ -59,7 +60,7 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); xe_assert(xe, addr < GUC_GGTT_TOP); - xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); + xe_assert(xe, xe_bo_size(bo) <= GUC_GGTT_TOP - addr); return addr; } @@ -420,7 +421,7 @@ static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 t buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); - xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(g2g_bo)); return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, desc, buf, G2G_BUFFER_SIZE); @@ -483,7 +484,8 @@ static int guc_g2g_alloc(struct xe_guc *guc) XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_ALL | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -569,6 +571,86 @@ err_deregister: return err; } +static int __guc_opt_in_features_enable(struct xe_guc *guc, u64 addr, u32 num_dwords) +{ + u32 action[] = { + XE_GUC_ACTION_OPT_IN_FEATURE_KLV, + lower_32_bits(addr), + upper_32_bits(addr), + num_dwords + }; + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static bool supports_dynamic_ics(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + /* Dynamic ICS is available for PVC and Xe2 and newer platforms. */ + if (xe->info.platform != XE_PVC && GRAPHICS_VER(xe) < 20) + return false; + + /* + * The feature is currently not compatible with multi-lrc, so the GuC + * does not support it at all on the media engines (which are the main + * users of mlrc). On the primary GT side, to avoid it being used in + * conjunction with mlrc, we only enable it if we are in single CCS + * mode. + */ + if (xe_gt_is_media_type(gt) || gt->ccs_mode > 1) + return false; + + /* + * Dynamic ICS requires GuC v70.40.1, which maps to compatibility + * version v1.18.4. + */ + return GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 18, 4); +} + +#define OPT_IN_MAX_DWORDS 16 +int xe_guc_opt_in_features_enable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + CLASS(xe_guc_buf, buf)(&guc->buf, OPT_IN_MAX_DWORDS); + u32 count = 0; + u32 *klvs; + int ret; + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + klvs = xe_guc_buf_cpu_ptr(buf); + + /* + * The extra CAT error type opt-in was added in GuC v70.17.0, which maps + * to compatibility version v1.7.0. + * Note that the GuC allows enabling this KLV even on platforms that do + * not support the extra type; in such case the returned type variable + * will be set to a known invalid value which we can check against. + */ + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 7, 0)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_EXT_CAT_ERR_TYPE); + + if (supports_dynamic_ics(guc)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH); + + if (count) { + xe_assert(xe, count <= OPT_IN_MAX_DWORDS); + + ret = __guc_opt_in_features_enable(guc, xe_guc_buf_flush(buf), count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC opt-in features: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; @@ -576,7 +658,7 @@ static void guc_fini_hw(void *arg) unsigned int fw_ref; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_fini_hw(&guc_to_gt(guc)->uc); + xe_uc_sanitize_reset(&guc_to_gt(guc)->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); guc_g2g_fini(guc); @@ -626,23 +708,51 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc) return 0; } -static int vf_guc_init(struct xe_guc *guc) +static int vf_guc_init_noalloc(struct xe_guc *guc) { + struct xe_gt *gt = guc_to_gt(guc); int err; - xe_guc_comm_init_early(guc); - - err = xe_guc_ct_init(&guc->ct); + err = xe_gt_sriov_vf_bootstrap(gt); if (err) return err; - err = xe_guc_relay_init(&guc->relay); + err = xe_gt_sriov_vf_query_config(gt); if (err) return err; return 0; } +int xe_guc_init_noalloc(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + xe_guc_comm_init_early(guc); + + ret = xe_guc_ct_init_noalloc(&guc->ct); + if (ret) + goto out; + + ret = xe_guc_relay_init(&guc->relay); + if (ret) + goto out; + + if (IS_SRIOV_VF(xe)) { + ret = vf_guc_init_noalloc(guc); + if (ret) + goto out; + } + + return 0; + +out: + xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret)); + return ret; +} + int xe_guc_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -652,13 +762,13 @@ int xe_guc_init(struct xe_guc *guc) guc->fw.type = XE_UC_FW_TYPE_GUC; ret = xe_uc_fw_init(&guc->fw); if (ret) - goto out; + return ret; if (!xe_uc_fw_is_enabled(&guc->fw)) return 0; if (IS_SRIOV_VF(xe)) { - ret = vf_guc_init(guc); + ret = xe_guc_ct_init(&guc->ct); if (ret) goto out; return 0; @@ -680,10 +790,6 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; - ret = xe_guc_relay_init(&guc->relay); - if (ret) - goto out; - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); @@ -692,8 +798,6 @@ int xe_guc_init(struct xe_guc *guc) guc_init_params(guc); - xe_guc_comm_init_early(guc); - return 0; out: @@ -709,6 +813,10 @@ static int vf_guc_init_post_hwconfig(struct xe_guc *guc) if (err) return err; + err = xe_guc_buf_cache_init(&guc->buf); + if (err) + return err; + /* XXX xe_guc_db_mgr_init not needed for now */ return 0; @@ -762,6 +870,10 @@ int xe_guc_post_load_init(struct xe_guc *guc) xe_guc_ads_populate_post_load(&guc->ads); + ret = xe_guc_opt_in_features_enable(guc); + if (ret) + return ret; + if (xe_guc_g2g_wanted(guc_to_xe(guc))) { ret = guc_g2g_start(guc); if (ret) @@ -1097,14 +1209,6 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); int ret; - ret = xe_gt_sriov_vf_bootstrap(gt); - if (ret) - return ret; - - ret = xe_gt_sriov_vf_query_config(gt); - if (ret) - return ret; - ret = xe_guc_hwconfig_init(guc); if (ret) return ret; @@ -1115,13 +1219,17 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) ret = xe_gt_sriov_vf_connect(gt); if (ret) - return ret; + goto err_out; ret = xe_gt_sriov_vf_query_runtime(gt); if (ret) - return ret; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(guc); + return ret; } /** @@ -1284,6 +1392,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; + bool lost = false; int ret; int i; @@ -1317,6 +1426,12 @@ retry: FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), 50000, &reply, false); if (ret) { + /* scratch registers might be cleared during FLR, try once more */ + if (!reply && !lost) { + xe_gt_dbg(gt, "GuC mmio request %#x: lost, trying again\n", request[0]); + lost = true; + goto retry; + } timeout: xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n", request[0], reply); @@ -1393,6 +1508,7 @@ proto: /* Use data from the GuC response as our return value */ return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); } +ALLOW_ERROR_INJECTION(xe_guc_mmio_send_recv, ERRNO); int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len) { @@ -1508,30 +1624,32 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) xe_uc_fw_print(&guc->fw, p); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return; + if (!IS_SRIOV_VF(gt_to_xe(gt))) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; + + status = xe_mmio_read32(>->mmio, GUC_STATUS); + + drm_printf(p, "\nGuC status 0x%08x:\n", status); + drm_printf(p, "\tBootrom status = 0x%x\n", + REG_FIELD_GET(GS_BOOTROM_MASK, status)); + drm_printf(p, "\tuKernel status = 0x%x\n", + REG_FIELD_GET(GS_UKERNEL_MASK, status)); + drm_printf(p, "\tMIA Core status = 0x%x\n", + REG_FIELD_GET(GS_MIA_MASK, status)); + drm_printf(p, "\tLog level = %d\n", + xe_guc_log_get_level(&guc->log)); + + drm_puts(p, "\nScratch registers:\n"); + for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { + drm_printf(p, "\t%2d: \t0x%x\n", + i, xe_mmio_read32(>->mmio, SOFT_SCRATCH(i))); + } - status = xe_mmio_read32(>->mmio, GUC_STATUS); - - drm_printf(p, "\nGuC status 0x%08x:\n", status); - drm_printf(p, "\tBootrom status = 0x%x\n", - REG_FIELD_GET(GS_BOOTROM_MASK, status)); - drm_printf(p, "\tuKernel status = 0x%x\n", - REG_FIELD_GET(GS_UKERNEL_MASK, status)); - drm_printf(p, "\tMIA Core status = 0x%x\n", - REG_FIELD_GET(GS_MIA_MASK, status)); - drm_printf(p, "\tLog level = %d\n", - xe_guc_log_get_level(&guc->log)); - - drm_puts(p, "\nScratch registers:\n"); - for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { - drm_printf(p, "\t%2d: \t0x%x\n", - i, xe_mmio_read32(>->mmio, SOFT_SCRATCH(i))); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } - xe_force_wake_put(gt_to_fw(gt), fw_ref); - drm_puts(p, "\n"); xe_guc_ct_print(&guc->ct, p, false); diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 58338be44558..22cf019a11bf 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -26,6 +26,7 @@ struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); +int xe_guc_init_noalloc(struct xe_guc *guc); int xe_guc_init(struct xe_guc *guc); int xe_guc_init_post_hwconfig(struct xe_guc *guc); int xe_guc_post_load_init(struct xe_guc *guc); @@ -33,6 +34,7 @@ int xe_guc_reset(struct xe_guc *guc); int xe_guc_upload(struct xe_guc *guc); int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); int xe_guc_enable_communication(struct xe_guc *guc); +int xe_guc_opt_in_features_enable(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 7031542a70ce..131cfc56be00 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -20,6 +20,7 @@ #include "xe_gt_ccs_mode.h" #include "xe_gt_printk.h" #include "xe_guc.h" +#include "xe_guc_buf.h" #include "xe_guc_capture.h" #include "xe_guc_ct.h" #include "xe_hw_engine.h" @@ -376,6 +377,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads) GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, &offset, &remain); + if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708)) + guc_waklv_enable_simple(ads, + GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH, + &offset, &remain); + size = guc_ads_waklv_size(ads) - remain; if (!size) return; @@ -414,7 +420,8 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -710,8 +717,8 @@ static int guc_capture_prep_lists(struct xe_guc_ads *ads) } if (ads->capture_size != PAGE_ALIGN(total_size)) - xe_gt_dbg(gt, "ADS capture alloc size changed from %d to %d\n", - ads->capture_size, PAGE_ALIGN(total_size)); + xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n", + PAGE_ALIGN(total_size), ads->capture_size); return PAGE_ALIGN(total_size); } @@ -883,7 +890,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); guc_golden_lrc_init(ads); guc_mapping_table_init_invalid(gt, &info_map); @@ -907,7 +914,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); fill_engine_enable_masks(gt, &info_map); guc_mmio_reg_state_init(ads); @@ -998,16 +1005,16 @@ static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_off */ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) { - struct xe_device *xe = ads_to_xe(ads); - struct xe_gt *gt = ads_to_gt(ads); - struct xe_tile *tile = gt_to_tile(gt); struct guc_policies *policies; - struct xe_bo *bo; - int ret = 0; + struct xe_guc *guc = ads_to_guc(ads); + struct xe_device *xe = ads_to_xe(ads); + CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies)); - policies = kmalloc(sizeof(*policies), GFP_KERNEL); - if (!policies) - return -ENOMEM; + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + policies = xe_guc_buf_cpu_ptr(buf); + memset(policies, 0, sizeof(*policies)); policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); @@ -1017,16 +1024,5 @@ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) else policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; - bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies), - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) { - ret = PTR_ERR(bo); - goto out; - } - - ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo)); -out: - kfree(policies); - return ret; + return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); } diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c index 0193c94dd6a0..14a07dca48e7 100644 --- a/drivers/gpu/drm/xe/xe_guc_buf.c +++ b/drivers/gpu/drm/xe/xe_guc_buf.c @@ -37,10 +37,6 @@ int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) struct xe_gt *gt = cache_to_gt(cache); struct xe_sa_manager *sam; - /* XXX: currently it's useful only for the PF actions */ - if (!IS_SRIOV_PF(gt_to_xe(gt))) - return 0; - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), SZ_8K, 0, sizeof(u32)); if (IS_ERR(sam)) return PTR_ERR(sam); diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 9095618648bc..859a3ba91be5 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -105,49 +105,49 @@ struct __guc_capture_parsed_output { * 3. Incorrect order will trigger XE_WARN. */ #define COMMON_XELP_BASE_GLOBAL \ - { FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"} + { FORCEWAKE_GT, REG_32BIT, 0, 0, 0, "FORCEWAKE_GT"} #define COMMON_BASE_ENGINE_INSTANCE \ - { RING_HWSTAM(0), REG_32BIT, 0, 0, "HWSTAM"}, \ - { RING_HWS_PGA(0), REG_32BIT, 0, 0, "RING_HWS_PGA"}, \ - { RING_HEAD(0), REG_32BIT, 0, 0, "RING_HEAD"}, \ - { RING_TAIL(0), REG_32BIT, 0, 0, "RING_TAIL"}, \ - { RING_CTL(0), REG_32BIT, 0, 0, "RING_CTL"}, \ - { RING_MI_MODE(0), REG_32BIT, 0, 0, "RING_MI_MODE"}, \ - { RING_MODE(0), REG_32BIT, 0, 0, "RING_MODE"}, \ - { RING_ESR(0), REG_32BIT, 0, 0, "RING_ESR"}, \ - { RING_EMR(0), REG_32BIT, 0, 0, "RING_EMR"}, \ - { RING_EIR(0), REG_32BIT, 0, 0, "RING_EIR"}, \ - { RING_IMR(0), REG_32BIT, 0, 0, "RING_IMR"}, \ - { RING_IPEHR(0), REG_32BIT, 0, 0, "IPEHR"}, \ - { RING_INSTDONE(0), REG_32BIT, 0, 0, "RING_INSTDONE"}, \ - { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, "INDIRECT_RING_STATE"}, \ - { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, "ACTHD"}, \ - { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_BBADDR"}, \ - { RING_START(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_START"}, \ - { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_DMA_FADD"}, \ - { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_STATUS"}, \ - { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \ - { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_SQ_CONTENTS"} + { RING_HWSTAM(0), REG_32BIT, 0, 0, 0, "HWSTAM"}, \ + { RING_HWS_PGA(0), REG_32BIT, 0, 0, 0, "RING_HWS_PGA"}, \ + { RING_HEAD(0), REG_32BIT, 0, 0, 0, "RING_HEAD"}, \ + { RING_TAIL(0), REG_32BIT, 0, 0, 0, "RING_TAIL"}, \ + { RING_CTL(0), REG_32BIT, 0, 0, 0, "RING_CTL"}, \ + { RING_MI_MODE(0), REG_32BIT, 0, 0, 0, "RING_MI_MODE"}, \ + { RING_MODE(0), REG_32BIT, 0, 0, 0, "RING_MODE"}, \ + { RING_ESR(0), REG_32BIT, 0, 0, 0, "RING_ESR"}, \ + { RING_EMR(0), REG_32BIT, 0, 0, 0, "RING_EMR"}, \ + { RING_EIR(0), REG_32BIT, 0, 0, 0, "RING_EIR"}, \ + { RING_IMR(0), REG_32BIT, 0, 0, 0, "RING_IMR"}, \ + { RING_IPEHR(0), REG_32BIT, 0, 0, 0, "IPEHR"}, \ + { RING_INSTDONE(0), REG_32BIT, 0, 0, 0, "RING_INSTDONE"}, \ + { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, 0, "INDIRECT_RING_STATE"}, \ + { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "ACTHD"}, \ + { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_BBADDR"}, \ + { RING_START(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_START"}, \ + { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_DMA_FADD"}, \ + { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_STATUS"}, \ + { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \ + { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_SQ_CONTENTS"} #define COMMON_XELP_RC_CLASS \ - { RCU_MODE, REG_32BIT, 0, 0, "RCU_MODE"} + { RCU_MODE, REG_32BIT, 0, 0, 0, "RCU_MODE"} #define COMMON_XELP_RC_CLASS_INSTDONE \ - { SC_INSTDONE, REG_32BIT, 0, 0, "SC_INSTDONE"}, \ - { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA"}, \ - { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA2"} + { SC_INSTDONE, REG_32BIT, 0, 0, 0, "SC_INSTDONE"}, \ + { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA"}, \ + { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA2"} #define XELP_VEC_CLASS_REGS \ - { SFC_DONE(0), 0, 0, 0, "SFC_DONE[0]"}, \ - { SFC_DONE(1), 0, 0, 0, "SFC_DONE[1]"}, \ - { SFC_DONE(2), 0, 0, 0, "SFC_DONE[2]"}, \ - { SFC_DONE(3), 0, 0, 0, "SFC_DONE[3]"} + { SFC_DONE(0), 0, 0, 0, 0, "SFC_DONE[0]"}, \ + { SFC_DONE(1), 0, 0, 0, 0, "SFC_DONE[1]"}, \ + { SFC_DONE(2), 0, 0, 0, 0, "SFC_DONE[2]"}, \ + { SFC_DONE(3), 0, 0, 0, 0, "SFC_DONE[3]"} /* XE_LP Global */ static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = { @@ -352,7 +352,7 @@ static const struct __ext_steer_reg xehpg_extregs[] = { static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, const struct __ext_steer_reg *extlist, - int slice_id, int subslice_id) + u32 dss_id, u16 slice_id, u16 subslice_id) { if (!ext || !extlist) return; @@ -361,6 +361,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); + ext->dss_id = dss_id; ext->regname = extlist->name; } @@ -397,7 +398,7 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); u16 slice, subslice; - int iter, i, total = 0; + int dss, i, total = 0; const struct __guc_mmio_reg_descr_group *lists = guc->capture->reglists; const struct __guc_mmio_reg_descr_group *list; struct __guc_mmio_reg_descr_group *extlists; @@ -454,15 +455,15 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc) /* For steering registers, the list is generated at run-time */ extarray = (struct __guc_mmio_reg_descr *)extlists[0].list; - for_each_dss_steering(iter, gt, slice, subslice) { + for_each_dss_steering(dss, gt, slice, subslice) { for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) { - __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); + __fill_ext_reg(extarray, &xe_extregs[i], dss, slice, subslice); ++extarray; } if (has_xehpg_extregs) for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) { - __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice); + __fill_ext_reg(extarray, &xehpg_extregs[i], dss, slice, subslice); ++extarray; } } @@ -1672,18 +1673,16 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ { struct xe_gt *gt = snapshot->hwe->gt; struct xe_device *xe = gt_to_xe(gt); - struct xe_guc *guc = >->uc.guc; struct xe_devcoredump *devcoredump = &xe->devcoredump; struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot; struct gcap_reg_list_info *reginfo = NULL; u32 i, last_value = 0; - bool is_ext, low32_ready = false; + bool low32_ready = false; if (!list || !list->list || list->num_regs == 0) return; XE_WARN_ON(!devcore_snapshot->matched_node); - is_ext = list == guc->capture->extlists; reginfo = &devcore_snapshot->matched_node->reginfo[type]; /* @@ -1749,17 +1748,12 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ */ XE_WARN_ON(low32_ready); - if (is_ext) { - int dss, group, instance; - - group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags); - instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags); - dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance); - - drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value); - } else { + if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags)) + drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, + reg_desc->dss_id, value); + else drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value); - } + break; } } diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h index ca2d390ccbee..6cb439115597 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h @@ -39,6 +39,8 @@ struct __guc_mmio_reg_descr { u32 flags; /** @mask: The mask to apply */ u32 mask; + /** @dss_id: Cached index for steered registers */ + u32 dss_id; /** @regname: Name of the register */ const char *regname; }; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 72ad576fc18e..3f4e6a46ff16 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -25,6 +25,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_monitor.h" +#include "xe_gt_sriov_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_log.h" @@ -34,6 +35,11 @@ #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -79,22 +85,28 @@ struct g2h_fence { u16 error; u16 hint; u16 reason; + bool cancel; bool retry; bool fail; bool done; }; +#define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) + static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { + memset(g2h_fence, 0, sizeof(*g2h_fence)); g2h_fence->response_buffer = response_buffer; - g2h_fence->response_data = 0; - g2h_fence->response_len = 0; - g2h_fence->fail = false; - g2h_fence->retry = false; - g2h_fence->done = false; g2h_fence->seqno = ~0x0; } +static void g2h_fence_cancel(struct g2h_fence *g2h_fence) +{ + g2h_fence->cancel = true; + g2h_fence->fail = true; + g2h_fence->done = true; +} + static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) { return g2h_fence->seqno == ~0x0; @@ -186,14 +198,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) @@ -204,12 +213,10 @@ static void primelockdep(struct xe_guc_ct *ct) fs_reclaim_release(GFP_KERNEL); } -int xe_guc_ct_init(struct xe_guc_ct *ct) +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_bo *bo; int err; xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); @@ -235,21 +242,32 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) primelockdep(ct); + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); + if (err) + return err; + + xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); + ct->state = XE_GUC_CT_STATE_DISABLED; + return 0; +} +ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */ + +int xe_guc_ct_init(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); ct->bo = bo; - - err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); - if (err) - return err; - - xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); - ct->state = XE_GUC_CT_STATE_DISABLED; return 0; } ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ @@ -370,9 +388,13 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) return ret > 0 ? -EPROTO : ret; } -static void xe_guc_ct_set_state(struct xe_guc_ct *ct, +static void guc_ct_change_state(struct xe_guc_ct *ct, enum xe_guc_ct_state state) { + struct xe_gt *gt = ct_to_gt(ct); + struct g2h_fence *g2h_fence; + unsigned long idx; + mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ @@ -384,8 +406,20 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, ct->g2h_outstanding = 0; ct->state = state; + xe_gt_dbg(gt, "GuC CT communication channel %s\n", + state == XE_GUC_CT_STATE_STOPPED ? "stopped" : + str_enabled_disabled(state == XE_GUC_CT_STATE_ENABLED)); + spin_unlock_irq(&ct->fast_lock); + /* cancel all in-flight send-recv requests */ + xa_for_each(&ct->fence_lookup, idx, g2h_fence) + g2h_fence_cancel(g2h_fence); + + /* make sure guc_ct_send_recv() will see g2h_fence changes */ + smp_mb(); + wake_up_all(&ct->g2h_fence_wq); + /* * Lockdep doesn't like this under the fast lock and he destroy only * needs to be serialized with the send path which ct lock provides. @@ -439,7 +473,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); - xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size); + xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -455,11 +489,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) if (err) goto err_out; - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED); smp_mb(); wake_up_all(&ct->wq); - xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); if (ct_needs_safe_mode(ct)) ct_enter_safe_mode(ct); @@ -500,7 +533,7 @@ static void stop_g2h_handler(struct xe_guc_ct *ct) */ void xe_guc_ct_disable(struct xe_guc_ct *ct) { - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); ct_exit_safe_mode(ct); stop_g2h_handler(ct); } @@ -513,7 +546,10 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct) */ void xe_guc_ct_stop(struct xe_guc_ct *ct) { - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); + if (!xe_guc_ct_initialized(ct)) + return; + + guc_ct_change_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -624,6 +660,47 @@ static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) spin_unlock_irq(&ct->fast_lock); } +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) +{ + unsigned int slot = fence % ARRAY_SIZE(ct->fast_req); +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + unsigned long entries[SZ_32]; + unsigned int n; + + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); + + /* May be called under spinlock, so avoid sleeping */ + ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT); +#endif + ct->fast_req[slot].fence = fence; + ct->fast_req[slot].action = action; +} +#else +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) +{ +} +#endif + +/* + * The CT protocol accepts a 16 bits fence. This field is fully owned by the + * driver, the GuC will just copy it to the reply message. Since we need to + * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, + * we use one bit of the seqno as an indicator for that and a rolling counter + * for the remaining 15 bits. + */ +#define CT_SEQNO_MASK GENMASK(14, 0) +#define CT_SEQNO_UNTRACKED BIT(15) +static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) +{ + u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; + + if (!is_g2h_fence) + seqno |= CT_SEQNO_UNTRACKED; + + return seqno; +} + #define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, @@ -700,6 +777,9 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } else { + fast_req_track(ct, ct_fence_value, + FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0])); + cmd[1] = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | @@ -732,25 +812,6 @@ corrupted: return -EPIPE; } -/* - * The CT protocol accepts a 16 bits fence. This field is fully owned by the - * driver, the GuC will just copy it to the reply message. Since we need to - * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, - * we use one bit of the seqno as an indicator for that and a rolling counter - * for the remaining 15 bits. - */ -#define CT_SEQNO_MASK GENMASK(14, 0) -#define CT_SEQNO_UNTRACKED BIT(15) -static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) -{ - u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; - - if (!is_g2h_fence) - seqno |= CT_SEQNO_UNTRACKED; - - return seqno; -} - static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) @@ -759,7 +820,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u16 seqno; int ret; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); xe_gt_assert(gt, !g2h_len || !g2h_fence); xe_gt_assert(gt, !num_g2h || !g2h_fence); xe_gt_assert(gt, !g2h_len || num_g2h); @@ -1051,6 +1112,11 @@ retry_same_fence: goto retry; } if (g2h_fence.fail) { + if (g2h_fence.cancel) { + xe_gt_dbg(gt, "H2G request %#x canceled!\n", action[0]); + ret = -ECANCELED; + goto unlock; + } xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; @@ -1059,6 +1125,7 @@ retry_same_fence: if (ret > 0) ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; +unlock: mutex_unlock(&ct->lock); return ret; @@ -1088,6 +1155,7 @@ int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer); return guc_ct_send_recv(ct, action, len, response_buffer, false); } +ALLOW_ERROR_INJECTION(xe_guc_ct_send_recv, ERRNO); int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 *response_buffer) @@ -1141,6 +1209,55 @@ static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action) return 0; } +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) +{ + u16 fence_min = U16_MAX, fence_max = 0; + struct xe_gt *gt = ct_to_gt(ct); + bool found = false; + unsigned int n; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + char *buf; +#endif + + lockdep_assert_held(&ct->lock); + + for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) { + if (ct->fast_req[n].fence < fence_min) + fence_min = ct->fast_req[n].fence; + if (ct->fast_req[n].fence > fence_max) + fence_max = ct->fast_req[n].fence; + + if (ct->fast_req[n].fence != fence) + continue; + found = true; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + buf = kmalloc(SZ_4K, GFP_NOWAIT); + if (buf && stack_depot_snprint(ct->fast_req[n].stack, buf, SZ_4K, 0)) + xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s", + fence, ct->fast_req[n].action, buf); + else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n", + fence, ct->fast_req[n].action); + kfree(buf); +#else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n", + fence, ct->fast_req[n].action); +#endif + break; + } + + if (!found) + xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n", + fence, fence_min, fence_max, ct->fence_seqno); +} +#else +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) +{ +} +#endif + static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); @@ -1169,6 +1286,9 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) else xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n", type, fence); + + fast_req_report(ct, fence); + CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE); return -EPROTO; @@ -1342,7 +1462,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) u32 action; u32 *hxg; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) @@ -1622,6 +1742,186 @@ static void g2h_worker_func(struct work_struct *w) receive_g2h(ct); } +static void xe_fixup_u64_in_cmds(struct xe_device *xe, struct iosys_map *cmds, + u32 size, u32 idx, s64 shift) +{ + u32 hi, lo; + u64 offset; + + lo = xe_map_rd_ring_u32(xe, cmds, idx, size); + hi = xe_map_rd_ring_u32(xe, cmds, idx + 1, size); + offset = make_u64(hi, lo); + offset += shift; + lo = lower_32_bits(offset); + hi = upper_32_bits(offset); + xe_map_wr_ring_u32(xe, cmds, idx, size, lo); + xe_map_wr_ring_u32(xe, cmds, idx + 1, size, hi); +} + +/* + * Shift any GGTT addresses within a single message left within CTB from + * before post-migration recovery. + * @ct: pointer to CT struct of the target GuC + * @cmds: iomap buffer containing CT messages + * @head: start of the target message within the buffer + * @len: length of the target message + * @size: size of the commands buffer + * @shift: the address shift to be added to each GGTT reference + * Return: true if the message was fixed or needed no fixups, false on failure + */ +static bool ct_fixup_ggtt_in_message(struct xe_guc_ct *ct, + struct iosys_map *cmds, u32 head, + u32 len, u32 size, s64 shift) +{ + struct xe_gt *gt = ct_to_gt(ct); + struct xe_device *xe = ct_to_xe(ct); + u32 msg[GUC_HXG_MSG_MIN_LEN]; + u32 action, i, n; + + xe_gt_assert(gt, len >= GUC_HXG_MSG_MIN_LEN); + + msg[0] = xe_map_rd_ring_u32(xe, cmds, head, size); + action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]); + + xe_gt_sriov_dbg_verbose(gt, "fixing H2G %#x\n", action); + + switch (action) { + case XE_GUC_ACTION_REGISTER_CONTEXT: + if (len != XE_GUC_REGISTER_CONTEXT_MSG_LEN) + goto err_len; + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER, + shift); + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER, + shift); + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR, shift); + break; + case XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC: + if (len < XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN) + goto err_len; + n = xe_map_rd_ring_u32(xe, cmds, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS, size); + if (len != XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN + 2 * n) + goto err_len; + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER, + shift); + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER, + shift); + for (i = 0; i < n; i++) + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR + + 2 * i, shift); + break; + default: + break; + } + return true; + +err_len: + xe_gt_err(gt, "Skipped G2G %#x message fixups, unexpected length (%u)\n", action, len); + return false; +} + +/* + * Apply fixups to the next outgoing CT message within given CTB + * @ct: the &xe_guc_ct struct instance representing the target GuC + * @h2g: the &guc_ctb struct instance of the target buffer + * @shift: shift to be added to all GGTT addresses within the CTB + * @mhead: pointer to an integer storing message start position; the + * position is changed to next message before this function return + * @avail: size of the area available for parsing, that is length + * of all remaining messages stored within the CTB + * Return: size of the area available for parsing after one message + * has been parsed, that is length remaining from the updated mhead + */ +static int ct_fixup_ggtt_in_buffer(struct xe_guc_ct *ct, struct guc_ctb *h2g, + s64 shift, u32 *mhead, s32 avail) +{ + struct xe_gt *gt = ct_to_gt(ct); + struct xe_device *xe = ct_to_xe(ct); + u32 msg[GUC_HXG_MSG_MIN_LEN]; + u32 size = h2g->info.size; + u32 head = *mhead; + u32 len; + + xe_gt_assert(gt, avail >= (s32)GUC_CTB_MSG_MIN_LEN); + + /* Read header */ + msg[0] = xe_map_rd_ring_u32(xe, &h2g->cmds, head, size); + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; + + if (unlikely(len > (u32)avail)) { + xe_gt_err(gt, "H2G channel broken on read, avail=%d, len=%d, fixups skipped\n", + avail, len); + return 0; + } + + head = (head + GUC_CTB_MSG_MIN_LEN) % size; + if (!ct_fixup_ggtt_in_message(ct, &h2g->cmds, head, msg_len_to_hxg_len(len), size, shift)) + return 0; + *mhead = (head + msg_len_to_hxg_len(len)) % size; + + return avail - len; +} + +/** + * xe_guc_ct_fixup_messages_with_ggtt - Fixup any pending H2G CTB messages + * @ct: pointer to CT struct of the target GuC + * @ggtt_shift: shift to be added to all GGTT addresses within the CTB + * + * Messages in GuC to Host CTB are owned by GuC and any fixups in them + * are made by GuC. But content of the Host to GuC CTB is owned by the + * KMD, so fixups to GGTT references in any pending messages need to be + * applied here. + * This function updates GGTT offsets in payloads of pending H2G CTB + * messages (messages which were not consumed by GuC before the VF got + * paused). + */ +void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift) +{ + struct guc_ctb *h2g = &ct->ctbs.h2g; + struct xe_guc *guc = ct_to_guc(ct); + struct xe_gt *gt = guc_to_gt(guc); + u32 head, tail, size; + s32 avail; + + if (unlikely(h2g->info.broken)) + return; + + h2g->info.head = desc_read(ct_to_xe(ct), h2g, head); + head = h2g->info.head; + tail = READ_ONCE(h2g->info.tail); + size = h2g->info.size; + + if (unlikely(head > size)) + goto corrupted; + + if (unlikely(tail >= size)) + goto corrupted; + + avail = tail - head; + + /* beware of buffer wrap case */ + if (unlikely(avail < 0)) + avail += size; + xe_gt_dbg(gt, "available %d (%u:%u:%u)\n", avail, head, tail, size); + xe_gt_assert(gt, avail >= 0); + + while (avail > 0) + avail = ct_fixup_ggtt_in_buffer(ct, h2g, ggtt_shift, &head, avail); + + return; + +corrupted: + xe_gt_err(gt, "Corrupted H2G descriptor head=%u tail=%u size=%u, fixups not applied\n", + head, tail, size); + h2g->info.broken = true; +} + static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic, bool want_ctb) { @@ -1632,7 +1932,7 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bo return NULL; if (ct->bo && want_ctb) { - snapshot->ctb_size = ct->bo->size; + snapshot->ctb_size = xe_bo_size(ct->bo); snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); } @@ -1768,6 +2068,24 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb) } #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + +#ifdef CONFIG_FUNCTION_ERROR_INJECTION +/* + * This is a helper function which assists the driver in identifying if a fault + * injection test is currently active, allowing it to reduce unnecessary debug + * output. Typically, the function returns zero, but the fault injection + * framework can alter this to return an error. Since faults are injected + * through this function, it's important to ensure the compiler doesn't optimize + * it into an inline function. To avoid such optimization, the 'noinline' + * attribute is applied. Compiler optimizes the static function defined in the + * header file as an inline function. + */ +noinline int xe_is_injection_active(void) { return 0; } +ALLOW_ERROR_INJECTION(xe_is_injection_active, ERRNO); +#else +int xe_is_injection_active(void) { return 0; } +#endif + static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code) { struct xe_guc_log_snapshot *snapshot_log; @@ -1778,6 +2096,12 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso if (ctb) ctb->info.broken = true; + /* + * Huge dump is getting generated when injecting error for guc CT/MMIO + * functions. So, let us suppress the dump when fault is injected. + */ + if (xe_is_injection_active()) + return; /* Ignore further errors after the first dump until a reset */ if (ct->dead.reported) @@ -1828,10 +2152,9 @@ static void ct_dead_print(struct xe_dead_ct *dead) return; } - drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason); - /* Can't generate a genuine core dump at this point, so just do the good bits */ drm_puts(&lp, "**** Xe Device Coredump ****\n"); + drm_printf(&lp, "Reason: CTB is dead - 0x%X\n", dead->reason); xe_device_snapshot_print(xe, &lp); drm_printf(&lp, "**** GT #%d ****\n", gt->info.id); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 82c4ae458dda..18d4225e6502 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -11,6 +11,7 @@ struct drm_printer; struct xe_device; +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); int xe_guc_ct_init(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); @@ -22,6 +23,13 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); +void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift); + +static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) +{ + return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; +} + static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { return ct->state == XE_GUC_CT_STATE_ENABLED; diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 8e1b9d981d61..8b03b50313d9 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include <linux/iosys-map.h> #include <linux/spinlock_types.h> +#include <linux/stackdepot.h> #include <linux/wait.h> #include <linux/xarray.h> @@ -104,6 +105,18 @@ struct xe_dead_ct { /** snapshot_log: copy of GuC log at point of error */ struct xe_guc_log_snapshot *snapshot_log; }; + +/** struct xe_fast_req_fence - Used to track FAST_REQ messages by fence to match error responses */ +struct xe_fast_req_fence { + /** @fence: sequence number sent in H2G and return in G2H error */ + u16 fence; + /** @action: H2G action code */ + u16 action; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + /** @stack: call stack from when the H2G was sent */ + depot_stack_handle_t stack; +#endif +}; #endif /** @@ -152,6 +165,8 @@ struct xe_guc_ct { #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) /** @dead: information for debugging dead CTs */ struct xe_dead_ct dead; + /** @fast_req: history of FAST_REQ messages for matching with G2H error responses */ + struct xe_fast_req_fence fast_req[SZ_32]; #endif }; diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index c569ff456e74..0b102ab46c4d 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -17,101 +17,130 @@ #include "xe_macros.h" #include "xe_pm.h" -static struct xe_guc *node_to_guc(struct drm_info_node *node) -{ - return node->info_ent->data; -} - -static int guc_info(struct seq_file *m, void *data) +/* + * guc_debugfs_show - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * This callback can be used in struct drm_info_list to describe debugfs + * files that are &xe_guc specific in similar way how we handle &xe_gt + * specific files using &xe_gt_debugfs_simple_show. + * + * It is assumed that those debugfs files will be created on directory entry + * which grandparent struct dentry d_inode->i_private points to &xe_gt. + * + * /sys/kernel/debug/dri/0/ + * ├── gt0 # dent->d_parent->d_parent (d_inode->i_private == gt) + * │  ├── uc # dent->d_parent + * │  │  ├── guc_info # dent + * │  │  ├── guc_... + * + * This function assumes that &m->private will be set to the &struct + * drm_info_node corresponding to the instance of the info on a given &struct + * drm_minor (see struct drm_info_list.show for details). + * + * This function also assumes that struct drm_info_list.data will point to the + * function code that will actually print a file content:: + * + * int (*print)(struct xe_guc *, struct drm_printer *) + * + * Example:: + * + * int foo(struct xe_guc *guc, struct drm_printer *p) + * { + * drm_printf(p, "enabled %d\n", guc->submission_state.enabled); + * return 0; + * } + * + * static const struct drm_info_list bar[] = { + * { name = "foo", .show = guc_debugfs_show, .data = foo }, + * }; + * + * parent = debugfs_create_dir("uc", gtdir); + * drm_debugfs_create_files(bar, ARRAY_SIZE(bar), parent, minor); + * + * Return: 0 on success or a negative error code on failure. + */ +static int guc_debugfs_show(struct seq_file *m, void *data) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct dentry *grandparent = parent->d_parent; + struct xe_gt *gt = grandparent->d_inode->i_private; + struct xe_device *xe = gt_to_xe(gt); + int (*print)(struct xe_guc *, struct drm_printer *) = node->info_ent->data; + int ret; xe_pm_runtime_get(xe); - xe_guc_print_info(guc, &p); + ret = print(>->uc.guc, &p); xe_pm_runtime_put(xe); - return 0; + return ret; } -static int guc_log(struct seq_file *m, void *data) +static int guc_log(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_log_print(&guc->log, &p); - xe_pm_runtime_put(xe); - + xe_guc_log_print(&guc->log, p); return 0; } -static int guc_log_dmesg(struct seq_file *m, void *data) +static int guc_log_dmesg(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - - xe_pm_runtime_get(xe); xe_guc_log_print_dmesg(&guc->log); - xe_pm_runtime_put(xe); - return 0; } -static int guc_ctb(struct seq_file *m, void *data) +static int guc_ctb(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_ct_print(&guc->ct, &p, true); - xe_pm_runtime_put(xe); - + xe_guc_ct_print(&guc->ct, p, true); return 0; } -static int guc_pc(struct seq_file *m, void *data) +static int guc_pc(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_pc_print(&guc->pc, &p); - xe_pm_runtime_put(xe); - + xe_guc_pc_print(&guc->pc, p); return 0; } -static const struct drm_info_list debugfs_list[] = { - {"guc_info", guc_info, 0}, - {"guc_log", guc_log, 0}, - {"guc_log_dmesg", guc_log_dmesg, 0}, - {"guc_ctb", guc_ctb, 0}, - {"guc_pc", guc_pc, 0}, +/* + * only for GuC debugfs files which can be safely used on the VF as well: + * - without access to the GuC privileged registers + * - without access to the PF specific GuC objects + */ +static const struct drm_info_list vf_safe_debugfs_list[] = { + { "guc_info", .show = guc_debugfs_show, .data = xe_guc_print_info }, + { "guc_ctb", .show = guc_debugfs_show, .data = guc_ctb }, +}; + +/* For GuC debugfs files that require the SLPC support */ +static const struct drm_info_list slpc_debugfs_list[] = { + { "guc_pc", .show = guc_debugfs_show, .data = guc_pc }, +}; + +/* everything else should be added here */ +static const struct drm_info_list pf_only_debugfs_list[] = { + { "guc_log", .show = guc_debugfs_show, .data = guc_log }, + { "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg }, }; void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) { - struct drm_minor *minor = guc_to_xe(guc)->drm.primary; - struct drm_info_list *local; - int i; - -#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) - local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); - if (!local) - return; + struct xe_device *xe = guc_to_xe(guc); + struct drm_minor *minor = xe->drm.primary; - memcpy(local, debugfs_list, DEBUGFS_SIZE); -#undef DEBUGFS_SIZE + drm_debugfs_create_files(vf_safe_debugfs_list, + ARRAY_SIZE(vf_safe_debugfs_list), + parent, minor); - for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) - local[i].data = guc; + if (!IS_SRIOV_VF(xe)) { + drm_debugfs_create_files(pf_only_debugfs_list, + ARRAY_SIZE(pf_only_debugfs_list), + parent, minor); - drm_debugfs_create_files(local, - ARRAY_SIZE(debugfs_list), - parent, minor); + if (!xe->info.skip_guc_pc) + drm_debugfs_create_files(slpc_debugfs_list, + ARRAY_SIZE(slpc_debugfs_list), + parent, minor); + } } diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c index 2a457dcf31d5..92e1f9f41b8c 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c @@ -17,36 +17,61 @@ #include "xe_hw_engine.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_sriov_pf_helpers.h" #include "xe_trace_guc.h" #define TOTAL_QUANTA 0x8000 -static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe) +static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int index) { struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; - struct engine_activity_buffer *buffer = &engine_activity->device_buffer; + struct engine_activity_buffer *buffer; u16 guc_class = xe_engine_class_to_guc_class(hwe->class); size_t offset; - offset = offsetof(struct guc_engine_activity_data, + if (engine_activity->num_functions) { + buffer = &engine_activity->function_buffer; + offset = sizeof(struct guc_engine_activity_data) * index; + } else { + buffer = &engine_activity->device_buffer; + offset = 0; + } + + offset += offsetof(struct guc_engine_activity_data, engine_activity[guc_class][hwe->logical_instance]); return IOSYS_MAP_INIT_OFFSET(&buffer->activity_bo->vmap, offset); } -static struct iosys_map engine_metadata_map(struct xe_guc *guc) +static struct iosys_map engine_metadata_map(struct xe_guc *guc, + unsigned int index) { struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; - struct engine_activity_buffer *buffer = &engine_activity->device_buffer; + struct engine_activity_buffer *buffer; + size_t offset; - return buffer->metadata_bo->vmap; + if (engine_activity->num_functions) { + buffer = &engine_activity->function_buffer; + offset = sizeof(struct guc_engine_activity_metadata) * index; + } else { + buffer = &engine_activity->device_buffer; + offset = 0; + } + + return IOSYS_MAP_INIT_OFFSET(&buffer->metadata_bo->vmap, offset); } static int allocate_engine_activity_group(struct xe_guc *guc) { struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; struct xe_device *xe = guc_to_xe(guc); - u32 num_activity_group = 1; /* Will be modified for VF */ + u32 num_activity_group; + + /* + * An additional activity group is allocated for PF + */ + num_activity_group = IS_SRIOV_PF(xe) ? xe_sriov_pf_get_totalvfs(xe) + 1 : 1; engine_activity->eag = drmm_kcalloc(&xe->drm, num_activity_group, sizeof(struct engine_activity_group), GFP_KERNEL); @@ -60,10 +85,11 @@ static int allocate_engine_activity_group(struct xe_guc *guc) } static int allocate_engine_activity_buffers(struct xe_guc *guc, - struct engine_activity_buffer *buffer) + struct engine_activity_buffer *buffer, + int count) { - u32 metadata_size = sizeof(struct guc_engine_activity_metadata); - u32 size = sizeof(struct guc_engine_activity_data); + u32 metadata_size = sizeof(struct guc_engine_activity_metadata) * count; + u32 size = sizeof(struct guc_engine_activity_data) * count; struct xe_gt *gt = guc_to_gt(guc); struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo, *metadata_bo; @@ -98,7 +124,7 @@ static void free_engine_activity_buffers(struct engine_activity_buffer *buffer) static bool is_engine_activity_supported(struct xe_guc *guc) { struct xe_uc_fw_version *version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; - struct xe_uc_fw_version required = { 1, 14, 1 }; + struct xe_uc_fw_version required = { .major = 1, .minor = 14, .patch = 1 }; struct xe_gt *gt = guc_to_gt(guc); if (IS_SRIOV_VF(gt_to_xe(gt))) { @@ -118,10 +144,11 @@ static bool is_engine_activity_supported(struct xe_guc *guc) return true; } -static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe) +static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe, + unsigned int index) { struct xe_guc *guc = &hwe->gt->uc.guc; - struct engine_activity_group *eag = &guc->engine_activity.eag[0]; + struct engine_activity_group *eag = &guc->engine_activity.eag[index]; u16 guc_class = xe_engine_class_to_guc_class(hwe->class); return &eag->engine[guc_class][hwe->logical_instance]; @@ -138,9 +165,10 @@ static u64 cpu_ns_to_guc_tsc_tick(ktime_t ns, u32 freq) #define read_metadata_record(xe_, map_, field_) \ xe_map_rd_field(xe_, map_, 0, struct guc_engine_activity_metadata, field_) -static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int index) { - struct engine_activity *ea = hw_engine_to_engine_activity(hwe); + struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index); struct guc_engine_activity *cached_activity = &ea->activity; struct guc_engine_activity_metadata *cached_metadata = &ea->metadata; struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; @@ -151,8 +179,8 @@ static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) u64 active_ticks, gpm_ts; u16 change_num; - activity_map = engine_activity_map(guc, hwe); - metadata_map = engine_metadata_map(guc); + activity_map = engine_activity_map(guc, hwe, index); + metadata_map = engine_metadata_map(guc, index); global_change_num = read_metadata_record(xe, &metadata_map, global_change_num); /* GuC has not initialized activity data yet, return 0 */ @@ -194,9 +222,9 @@ update: return ea->total + ea->active; } -static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, unsigned int index) { - struct engine_activity *ea = hw_engine_to_engine_activity(hwe); + struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index); struct guc_engine_activity_metadata *cached_metadata = &ea->metadata; struct guc_engine_activity *cached_activity = &ea->activity; struct iosys_map activity_map, metadata_map; @@ -205,8 +233,8 @@ static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) u64 numerator; u16 quanta_ratio; - activity_map = engine_activity_map(guc, hwe); - metadata_map = engine_metadata_map(guc); + activity_map = engine_activity_map(guc, hwe, index); + metadata_map = engine_metadata_map(guc, index); if (!cached_metadata->guc_tsc_frequency_hz) cached_metadata->guc_tsc_frequency_hz = read_metadata_record(xe, &metadata_map, @@ -245,12 +273,39 @@ static int enable_engine_activity_stats(struct xe_guc *guc) return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); } -static void engine_activity_set_cpu_ts(struct xe_guc *guc) +static int enable_function_engine_activity_stats(struct xe_guc *guc, bool enable) { struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; - struct engine_activity_group *eag = &engine_activity->eag[0]; + u32 metadata_ggtt_addr = 0, ggtt_addr = 0, num_functions = 0; + struct engine_activity_buffer *buffer = &engine_activity->function_buffer; + u32 action[6]; + int len = 0; + + if (enable) { + metadata_ggtt_addr = xe_bo_ggtt_addr(buffer->metadata_bo); + ggtt_addr = xe_bo_ggtt_addr(buffer->activity_bo); + num_functions = engine_activity->num_functions; + } + + action[len++] = XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER; + action[len++] = num_functions; + action[len++] = metadata_ggtt_addr; + action[len++] = 0; + action[len++] = ggtt_addr; + action[len++] = 0; + + /* Blocking here to ensure the buffers are ready before reading them */ + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static void engine_activity_set_cpu_ts(struct xe_guc *guc, unsigned int index) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_group *eag = &engine_activity->eag[index]; int i, j; + xe_gt_assert(guc_to_gt(guc), index < engine_activity->num_activity_group); + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; j++) eag->engine[i][j].last_cpu_ts = ktime_get(); @@ -265,34 +320,107 @@ static u32 gpm_timestamp_shift(struct xe_gt *gt) return 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); } +static bool is_function_valid(struct xe_guc *guc, unsigned int fn_id) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + + if (!IS_SRIOV_PF(xe) && fn_id) + return false; + + if (engine_activity->num_functions && fn_id >= engine_activity->num_functions) + return false; + + return true; +} + +static int engine_activity_disable_function_stats(struct xe_guc *guc) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_buffer *buffer = &engine_activity->function_buffer; + int ret; + + if (!engine_activity->num_functions) + return 0; + + ret = enable_function_engine_activity_stats(guc, false); + if (ret) + return ret; + + free_engine_activity_buffers(buffer); + engine_activity->num_functions = 0; + + return 0; +} + +static int engine_activity_enable_function_stats(struct xe_guc *guc, int num_vfs) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_buffer *buffer = &engine_activity->function_buffer; + int ret, i; + + if (!num_vfs) + return 0; + + /* This includes 1 PF and num_vfs */ + engine_activity->num_functions = num_vfs + 1; + + ret = allocate_engine_activity_buffers(guc, buffer, engine_activity->num_functions); + if (ret) + return ret; + + ret = enable_function_engine_activity_stats(guc, true); + if (ret) { + free_engine_activity_buffers(buffer); + engine_activity->num_functions = 0; + return ret; + } + + /* skip PF as it was already setup */ + for (i = 1; i < engine_activity->num_functions; i++) + engine_activity_set_cpu_ts(guc, i); + + return 0; +} + /** * xe_guc_engine_activity_active_ticks - Get engine active ticks * @guc: The GuC object * @hwe: The hw_engine object + * @fn_id: function id to report on * * Return: accumulated ticks @hwe was active since engine activity stats were enabled. */ -u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int fn_id) { if (!xe_guc_engine_activity_supported(guc)) return 0; - return get_engine_active_ticks(guc, hwe); + if (!is_function_valid(guc, fn_id)) + return 0; + + return get_engine_active_ticks(guc, hwe, fn_id); } /** * xe_guc_engine_activity_total_ticks - Get engine total ticks * @guc: The GuC object * @hwe: The hw_engine object + * @fn_id: function id to report on * * Return: accumulated quanta of ticks allocated for the engine */ -u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int fn_id) { if (!xe_guc_engine_activity_supported(guc)) return 0; - return get_engine_total_ticks(guc, hwe); + if (!is_function_valid(guc, fn_id)) + return 0; + + return get_engine_total_ticks(guc, hwe, fn_id); } /** @@ -311,6 +439,25 @@ bool xe_guc_engine_activity_supported(struct xe_guc *guc) } /** + * xe_guc_engine_activity_function_stats - Enable/Disable per-function engine activity stats + * @guc: The GuC object + * @num_vfs: number of vfs + * @enable: true to enable, false otherwise + * + * Return: 0 on success, negative error code otherwise + */ +int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable) +{ + if (!xe_guc_engine_activity_supported(guc)) + return 0; + + if (enable) + return engine_activity_enable_function_stats(guc, num_vfs); + + return engine_activity_disable_function_stats(guc); +} + +/** * xe_guc_engine_activity_enable_stats - Enable engine activity stats * @guc: The GuC object * @@ -327,7 +474,7 @@ void xe_guc_engine_activity_enable_stats(struct xe_guc *guc) if (ret) xe_gt_err(guc_to_gt(guc), "failed to enable activity stats%d\n", ret); else - engine_activity_set_cpu_ts(guc); + engine_activity_set_cpu_ts(guc, 0); } static void engine_activity_fini(void *arg) @@ -360,7 +507,7 @@ int xe_guc_engine_activity_init(struct xe_guc *guc) return ret; } - ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer); + ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer, 1); if (ret) { xe_gt_err(gt, "failed to allocate engine activity buffers (%pe)\n", ERR_PTR(ret)); return ret; diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.h b/drivers/gpu/drm/xe/xe_guc_engine_activity.h index a042d4cb404c..b32926c2d208 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity.h +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.h @@ -14,6 +14,9 @@ struct xe_guc; int xe_guc_engine_activity_init(struct xe_guc *guc); bool xe_guc_engine_activity_supported(struct xe_guc *guc); void xe_guc_engine_activity_enable_stats(struct xe_guc *guc); -u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe); -u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe); +int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable); +u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int fn_id); +u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, + unsigned int fn_id); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h index 5cdd034b6b70..48f69ddefa36 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h @@ -79,14 +79,24 @@ struct xe_guc_engine_activity { /** @num_activity_group: number of activity groups */ u32 num_activity_group; + /** @num_functions: number of functions */ + u32 num_functions; + /** @supported: indicates support for engine activity stats */ bool supported; - /** @eag: holds the device level engine activity data */ + /** + * @eag: holds the device level engine activity data in native mode. + * In SRIOV mode, points to an array with entries which holds the engine + * activity data for PF and VF's + */ struct engine_activity_group *eag; /** @device_buffer: buffer object for global engine activity */ struct engine_activity_buffer device_buffer; + + /** @function_buffer: buffer object for per-function engine activity */ + struct engine_activity_buffer function_buffer; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index 4c39f01e4f52..a3f421e2adc0 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -20,6 +20,8 @@ struct xe_exec_queue; struct xe_guc_exec_queue { /** @q: Backpointer to parent xe_exec_queue */ struct xe_exec_queue *q; + /** @rcu: For safe freeing of exported dma fences */ + struct rcu_head rcu; /** @sched: GPU scheduler for this xe_exec_queue */ struct xe_gpu_scheduler sched; /** @entity: Scheduler entity for this xe_exec_queue */ diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 80514a446ba2..c01ccb35dc75 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -79,7 +79,7 @@ static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log * * Also, can't use vmalloc as might be called from atomic context. So need * to break the buffer up into smaller chunks that can be allocated. */ - snapshot->size = log->bo->size; + snapshot->size = xe_bo_size(log->bo); snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE); snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy), @@ -260,7 +260,8 @@ int xe_guc_log_init(struct xe_guc_log *log) bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h index 5b896f5fafaf..f1e2b0be90a9 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.h +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -12,7 +12,7 @@ struct drm_printer; struct xe_device; -#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER) +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) #define CRASH_BUFFER_SIZE SZ_1M #define DEBUG_BUFFER_SIZE SZ_8M #define CAPTURE_BUFFER_SIZE SZ_2M diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 43b1192ba61c..68b192fe3b32 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,8 +5,11 @@ #include "xe_guc_pc.h" +#include <linux/cleanup.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> @@ -51,9 +54,12 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -141,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -153,7 +189,7 @@ static int pc_action_reset(struct xe_guc_pc *pc) int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n", ERR_PTR(ret)); @@ -177,7 +213,7 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) /* Blocking here to ensure the results are ready before reading them */ ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n", ERR_PTR(ret)); @@ -200,7 +236,7 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n", id, value, ERR_PTR(ret)); @@ -222,7 +258,7 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe", ERR_PTR(ret)); @@ -239,7 +275,7 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", mode, ERR_PTR(ret)); return ret; @@ -462,6 +498,21 @@ static u32 get_cur_freq(struct xe_gt *gt) } /** + * xe_guc_pc_get_cur_freq_fw - With fw held, get requested frequency + * @pc: The GuC PC + * + * Returns: the requested frequency for that GT instance + */ +u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + return get_cur_freq(gt); +} + +/** * xe_guc_pc_get_cur_freq - Get Current requested frequency * @pc: The GuC PC * @freq: A pointer to a u32 where the freq value will be returned @@ -538,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -548,26 +618,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -581,24 +653,28 @@ out: */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -611,24 +687,28 @@ out: */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -642,24 +722,14 @@ out: */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; - - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; + if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; } - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; - - pc->user_requested_max = freq; + guard(mutex)(&pc->freq_lock); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** @@ -802,6 +872,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc) static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { + struct xe_tile *tile = gt_to_tile(pc_to_gt(pc)); int ret; lockdep_assert_held(&pc->freq_lock); @@ -828,6 +899,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); + if (XE_WA(tile->primary_gt, 14022085890)) + ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); + out: return ret; } @@ -853,30 +927,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { - int ret = 0; + struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + return XE_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; } + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret; + + if (!XE_WA(pc_to_gt(pc), 22019338487)) + return 0; + + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + return ret; } @@ -1053,7 +1214,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; } - memset(pc->bo->vmap.vaddr, 0, size); + xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); slpc_shared_data_write(pc, header.size, size); earlier = ktime_get(); @@ -1170,7 +1331,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 39102b79602f..52ecdd5ddbff 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -22,6 +22,7 @@ void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); +u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc); @@ -37,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..c02053948a57 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpa_freq: HW RPa frequency - The Achievable one */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 769781d577df..cafb47711e9b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -229,6 +229,17 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = wait_event_timeout(guc->submission_state.fini_wq, + xa_empty(&guc->submission_state.exec_queue_lookup), + HZ * 5); + + drain_workqueue(xe->destroy_wq); + + xe_gt_assert(gt, ret); xa_destroy(&guc->submission_state.exec_queue_lookup); } @@ -300,6 +311,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) primelockdep(guc); + guc->submission_state.initialized = true; + return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } @@ -485,6 +498,15 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc, action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); } + /* explicitly checks some fields that we might fixup later */ + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); + xe_gt_assert(guc_to_gt(guc), q->width == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == + action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); #undef MAX_MLRC_REG_SIZE @@ -509,6 +531,14 @@ static void __register_exec_queue(struct xe_guc *guc, info->hwlrca_hi, }; + /* explicitly checks some fields that we might fixup later */ + xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); + xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == + action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } @@ -834,6 +864,13 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); + /* + * If device is being wedged even before submission_state is + * initialized, there's nothing to do here. + */ + if (!guc->submission_state.initialized) + return; + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { @@ -871,12 +908,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -950,10 +988,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) */ xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); - if (ctx_timestamp < ctx_job_timestamp) - diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; - else - diff = ctx_timestamp - ctx_job_timestamp; + diff = ctx_timestamp - ctx_job_timestamp; /* * Ensure timeout is within 5% to account for an GuC scheduling latency @@ -1050,7 +1085,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -1058,12 +1093,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * list so job can be freed and kick scheduler ensuring free job is not * lost. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; - } + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) + return DRM_GPU_SCHED_STAT_NO_HANG; /* Kill the run_job entry point */ xe_sched_submission_stop(sched); @@ -1096,7 +1127,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { @@ -1170,9 +1202,12 @@ trigger_reset: process_name = q->vm->xef->process_name; pid = q->vm->xef->pid; } - xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags, process_name, pid); + + if (!exec_queue_killed(q)) + xe_gt_notice(guc_to_gt(guc), + "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, q->flags, process_name, pid); trace_xe_sched_job_timedout(job); @@ -1228,7 +1263,7 @@ trigger_reset: /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; sched_enable: enable_scheduling(q); @@ -1238,10 +1273,8 @@ rearm: * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ - xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_NO_HANG; } static void __guc_exec_queue_fini_async(struct work_struct *w) @@ -1262,7 +1295,11 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); - kfree(ge); + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); xe_exec_queue_fini(q); xe_pm_runtime_put(guc_to_xe(guc)); } @@ -1445,6 +1482,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->guc = ge; ge->q = q; + init_rcu_head(&ge->rcu); init_waitqueue_head(&ge->suspend_wait); for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) @@ -1739,6 +1777,9 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; + if (!guc->submission_state.initialized) + return 0; + /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset @@ -2045,12 +2086,16 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; u32 guc_id; + u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; - if (unlikely(len < 1)) + if (unlikely(!len || len > 2)) return -EPROTO; guc_id = msg[0]; + if (len == 2) + type = msg[1]; + if (guc_id == GUC_ID_UNKNOWN) { /* * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF @@ -2064,8 +2109,19 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, if (unlikely(!q)) return -EPROTO; - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + /* + * The type is HW-defined and changes based on platform, so we don't + * decode it in the kernel and only check if it is valid. + * See bspec 54047 and 72187 for details. + */ + if (type != XE_GUC_CAT_ERR_TYPE_INVALID) + xe_gt_dbg(gt, + "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", + type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + else + xe_gt_dbg(gt, + "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); trace_xe_exec_queue_memory_cat_error(q); diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 63bac64429a5..1fde7614fcc5 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -89,6 +89,11 @@ struct xe_guc { struct mutex lock; /** @submission_state.enabled: submission is enabled */ bool enabled; + /** + * @submission_state.initialized: mark when submission state is + * even initialized - before that not even the lock is valid + */ + bool initialized; /** @submission_state.fini_wq: submit fini wait queue */ wait_queue_head_t fini_wq; } submission_state; diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 27d11e06a82b..6d7b62724126 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -11,15 +11,12 @@ #include "xe_device_types.h" #include "xe_drv.h" #include "xe_heci_gsc.h" +#include "regs/xe_gsc_regs.h" #include "xe_platform_types.h" #include "xe_survivability_mode.h" #define GSC_BAR_LENGTH 0x00000FFC -#define DG1_GSC_HECI2_BASE 0x259000 -#define PVC_GSC_HECI2_BASE 0x285000 -#define DG2_GSC_HECI2_BASE 0x374000 - static void heci_gsc_irq_mask(struct irq_data *d) { /* generic irq handling */ diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 6a846e4cb221..7e43b2dd6a32 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -171,7 +171,7 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) sizeof(struct pxp43_new_huc_auth_in)); wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset, xe_bo_ggtt_addr(huc->fw.bo), - huc->fw.bo->size); + xe_bo_size(huc->fw.bo)); do { err = xe_gsc_pkt_submit_kernel(>->uc.gsc, ggtt_offset, wr_offset, ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE, diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 93241fd0a4ba..796ba8c34a16 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -17,6 +17,7 @@ #include "regs/xe_irq_regs.h" #include "xe_assert.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_execlist.h" #include "xe_force_wake.h" @@ -693,7 +694,7 @@ static void read_media_fuses(struct xe_gt *gt) if (!(BIT(j) & vdbox_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "vcs%u fused off\n", j); + xe_gt_info(gt, "vcs%u fused off\n", j); } } @@ -703,7 +704,7 @@ static void read_media_fuses(struct xe_gt *gt) if (!(BIT(j) & vebox_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "vecs%u fused off\n", j); + xe_gt_info(gt, "vecs%u fused off\n", j); } } } @@ -728,15 +729,13 @@ static void read_copy_fuses(struct xe_gt *gt) if (!(BIT(j / 2) & bcs_mask)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "bcs%u fused off\n", j); + xe_gt_info(gt, "bcs%u fused off\n", j); } } } static void read_compute_fuses_from_dss(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); - /* * CCS fusing based on DSS masks only applies to platforms that can * have more than one CCS. @@ -755,14 +754,13 @@ static void read_compute_fuses_from_dss(struct xe_gt *gt) if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "ccs%u fused off\n", j); + xe_gt_info(gt, "ccs%u fused off\n", j); } } } static void read_compute_fuses_from_reg(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); u32 ccs_mask; ccs_mask = xe_mmio_read32(>->mmio, XEHP_FUSE4); @@ -774,7 +772,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt) if ((ccs_mask & BIT(j)) == 0) { gt->info.engine_mask &= ~BIT(i); - drm_info(&xe->drm, "ccs%u fused off\n", j); + xe_gt_info(gt, "ccs%u fused off\n", j); } } } @@ -789,8 +787,6 @@ static void read_compute_fuses(struct xe_gt *gt) static void check_gsc_availability(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); - if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) return; @@ -806,7 +802,25 @@ static void check_gsc_availability(struct xe_gt *gt) xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); - drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n"); + xe_gt_dbg(gt, "GSC FW not used, disabling gsccs\n"); + } +} + +static void check_sw_disable(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u64 sw_allowed = xe_configfs_get_engines_allowed(to_pci_dev(xe->drm.dev)); + enum xe_hw_engine_id id; + + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { + if (!(gt->info.engine_mask & BIT(id))) + continue; + + if (!(sw_allowed & BIT(id))) { + gt->info.engine_mask &= ~BIT(id); + xe_gt_info(gt, "%s disabled via configfs\n", + engine_infos[id].name); + } } } @@ -818,6 +832,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt) read_copy_fuses(gt); read_compute_fuses(gt); check_gsc_availability(gt); + check_sw_disable(gt); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); @@ -1044,12 +1059,13 @@ struct xe_hw_engine * xe_hw_engine_lookup(struct xe_device *xe, struct drm_xe_engine_class_instance eci) { + struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); unsigned int idx; if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; - if (eci.gt_id >= xe->info.gt_count) + if (!gt) return NULL; idx = array_index_nospec(eci.engine_class, diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index a440442b4d72..640950172088 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -605,6 +605,7 @@ err_object: kobject_put(kobj); return err; } +ALLOW_ERROR_INJECTION(xe_add_hw_engine_class_defaults, ERRNO); /* See xe_pci_probe() */ static void hw_engine_class_sysfs_fini(void *arg) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 2d68c5b5262a..c926f840c87b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -13,15 +13,6 @@ #include "xe_vm.h" static void -hw_engine_group_free(struct drm_device *drm, void *arg) -{ - struct xe_hw_engine_group *group = arg; - - destroy_workqueue(group->resume_wq); - kfree(group); -} - -static void hw_engine_group_resume_lr_jobs_func(struct work_struct *w) { struct xe_exec_queue *q; @@ -53,7 +44,7 @@ hw_engine_group_alloc(struct xe_device *xe) struct xe_hw_engine_group *group; int err; - group = kzalloc(sizeof(*group), GFP_KERNEL); + group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -61,14 +52,14 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group->resume_wq) return ERR_PTR(-ENOMEM); + err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); + if (err) + return ERR_PTR(err); + init_rwsem(&group->mode_sem); INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); INIT_LIST_HEAD(&group->exec_queue_list); - err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); - if (err) - return ERR_PTR(err); - return group; } @@ -84,25 +75,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) enum xe_hw_engine_id id; struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; struct xe_device *xe = gt_to_xe(gt); - int err; group_rcs_ccs = hw_engine_group_alloc(xe); - if (IS_ERR(group_rcs_ccs)) { - err = PTR_ERR(group_rcs_ccs); - goto err_group_rcs_ccs; - } + if (IS_ERR(group_rcs_ccs)) + return PTR_ERR(group_rcs_ccs); group_bcs = hw_engine_group_alloc(xe); - if (IS_ERR(group_bcs)) { - err = PTR_ERR(group_bcs); - goto err_group_bcs; - } + if (IS_ERR(group_bcs)) + return PTR_ERR(group_bcs); group_vcs_vecs = hw_engine_group_alloc(xe); - if (IS_ERR(group_vcs_vecs)) { - err = PTR_ERR(group_vcs_vecs); - goto err_group_vcs_vecs; - } + if (IS_ERR(group_vcs_vecs)) + return PTR_ERR(group_vcs_vecs); for_each_hw_engine(hwe, gt, id) { switch (hwe->class) { @@ -125,15 +109,6 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) } return 0; - -err_group_vcs_vecs: - kfree(group_vcs_vecs); -err_group_bcs: - kfree(group_bcs); -err_group_rcs_ccs: - kfree(group_rcs_ccs); - - return err; } /** diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 0b4f12be3692..b2a0c46dfcd4 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -100,6 +100,9 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) spin_unlock_irqrestore(&irq->lock, flags); dma_fence_end_signalling(tmp); } + + /* Safe release of the irq->lock used in dma_fence_init. */ + synchronize_rcu(); } void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) @@ -165,7 +168,7 @@ static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || - !__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops); + !__dma_fence_is_later(dma_fence, dma_fence->seqno, seqno); } static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 48d80ffdf7bb..f08fc4377d25 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -5,6 +5,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/hwmon.h> +#include <linux/jiffies.h> #include <linux/types.h> #include <linux/units.h> @@ -19,6 +20,8 @@ #include "xe_pcode_api.h" #include "xe_sriov.h" #include "xe_pm.h" +#include "xe_vsec.h" +#include "regs/xe_pmt.h" enum xe_hwmon_reg { REG_TEMP, @@ -27,6 +30,7 @@ enum xe_hwmon_reg { REG_PKG_POWER_SKU_UNIT, REG_GT_PERF_STATUS, REG_PKG_ENERGY_STATUS, + REG_FAN_SPEED, }; enum xe_hwmon_reg_operation { @@ -42,6 +46,29 @@ enum xe_hwmon_channel { CHANNEL_MAX, }; +enum xe_fan_channel { + FAN_1, + FAN_2, + FAN_3, + FAN_MAX, +}; + +/* Attribute index for powerX_xxx_interval sysfs entries */ +enum sensor_attr_power { + SENSOR_INDEX_PSYS_PL1, + SENSOR_INDEX_PKG_PL1, + SENSOR_INDEX_PSYS_PL2, + SENSOR_INDEX_PKG_PL2, +}; + +/* + * For platforms that support mailbox commands for power limits, REG_PKG_POWER_SKU_UNIT is + * not supported and below are SKU units to be used. + */ +#define PWR_UNIT 0x3 +#define ENERGY_UNIT 0xe +#define TIME_UNIT 0xa + /* * SF_* - scale factors for particular quantities according to hwmon spec. */ @@ -51,6 +78,19 @@ enum xe_hwmon_channel { #define SF_ENERGY 1000000 /* microjoules */ #define SF_TIME 1000 /* milliseconds */ +/* + * PL*_HWMON_ATTR - mapping of hardware power limits to corresponding hwmon power attribute. + */ +#define PL1_HWMON_ATTR hwmon_power_max +#define PL2_HWMON_ATTR hwmon_power_cap + +#define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "PL2") + +/* + * Timeout for power limit write mailbox command. + */ +#define PL_WRITE_MBX_TIMEOUT_MS (1) + /** * struct xe_hwmon_energy_info - to accumulate energy */ @@ -62,6 +102,16 @@ struct xe_hwmon_energy_info { }; /** + * struct xe_hwmon_fan_info - to cache previous fan reading + */ +struct xe_hwmon_fan_info { + /** @reg_val_prev: previous fan reg val */ + u32 reg_val_prev; + /** @time_prev: previous timestamp */ + u64 time_prev; +}; + +/** * struct xe_hwmon - xe hwmon data structure */ struct xe_hwmon { @@ -79,8 +129,89 @@ struct xe_hwmon { int scl_shift_time; /** @ei: Energy info for energyN_input */ struct xe_hwmon_energy_info ei[CHANNEL_MAX]; + /** @fi: Fan info for fanN_input */ + struct xe_hwmon_fan_info fi[FAN_MAX]; + /** @boot_power_limit_read: is boot power limits read */ + bool boot_power_limit_read; + /** @pl1_on_boot: power limit PL1 on boot */ + u32 pl1_on_boot[CHANNEL_MAX]; + /** @pl2_on_boot: power limit PL2 on boot */ + u32 pl2_on_boot[CHANNEL_MAX]; + }; +static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel, + u32 *uval) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + u32 val0 = 0, val1 = 0; + int ret = 0; + + ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, + (channel == CHANNEL_CARD) ? + READ_PSYSGPU_POWER_LIMIT : + READ_PACKAGE_POWER_LIMIT, + hwmon->boot_power_limit_read ? + READ_PL_FROM_PCODE : READ_PL_FROM_FW), + &val0, &val1); + + if (ret) { + drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", + channel, val0, val1, ret); + *uval = 0; + return ret; + } + + /* return the value only if limit is enabled */ + if (attr == PL1_HWMON_ATTR) + *uval = (val0 & PWR_LIM_EN) ? val0 : 0; + else if (attr == PL2_HWMON_ATTR) + *uval = (val1 & PWR_LIM_EN) ? val1 : 0; + else if (attr == hwmon_power_label) + *uval = (val0 & PWR_LIM_EN) ? 1 : (val1 & PWR_LIM_EN) ? 1 : 0; + else + *uval = 0; + + return ret; +} + +static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, + u32 clr, u32 set) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + u32 val0, val1; + int ret = 0; + + ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, + (channel == CHANNEL_CARD) ? + READ_PSYSGPU_POWER_LIMIT : + READ_PACKAGE_POWER_LIMIT, + hwmon->boot_power_limit_read ? + READ_PL_FROM_PCODE : READ_PL_FROM_FW), + &val0, &val1); + + if (ret) + drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", + channel, val0, val1, ret); + + if (attr == PL1_HWMON_ATTR) + val0 = (val0 & ~clr) | set; + else if (attr == PL2_HWMON_ATTR) + val1 = (val1 & ~clr) | set; + else + return -EIO; + + ret = xe_pcode_write64_timeout(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, + (channel == CHANNEL_CARD) ? + WRITE_PSYSGPU_POWER_LIMIT : + WRITE_PACKAGE_POWER_LIMIT, 0), + val0, val1, PL_WRITE_MBX_TIMEOUT_MS); + if (ret) + drm_dbg(&hwmon->xe->drm, "write failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n", + channel, val0, val1, ret); + return ret; +} + static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, int channel) { @@ -101,29 +232,19 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg } break; case REG_PKG_RAPL_LIMIT: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_RAPL_LIMIT; - else - return BMG_PLATFORM_POWER_LIMIT; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) return PVC_GT0_PACKAGE_RAPL_LIMIT; - } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { + else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) return PCU_CR_PACKAGE_RAPL_LIMIT; - } break; case REG_PKG_POWER_SKU: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU; - else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) return PVC_GT0_PACKAGE_POWER_SKU; else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) return PCU_CR_PACKAGE_POWER_SKU; break; case REG_PKG_POWER_SKU_UNIT: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU_UNIT; - else if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) return PVC_GT0_PACKAGE_POWER_SKU_UNIT; else if (xe->info.platform == XE_DG2) return PCU_CR_PACKAGE_POWER_SKU_UNIT; @@ -133,17 +254,20 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return GT_PERF_STATUS; break; case REG_PKG_ENERGY_STATUS: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_ENERGY_STATUS; - else - return BMG_PLATFORM_ENERGY_STATUS; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { return PVC_GT0_PLATFORM_ENERGY_STATUS; } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { return PCU_CR_PACKAGE_ENERGY_STATUS; } break; + case REG_FAN_SPEED: + if (channel == FAN_1) + return BMG_FAN_1_SPEED; + else if (channel == FAN_2) + return BMG_FAN_2_SPEED; + else if (channel == FAN_3) + return BMG_FAN_3_SPEED; + break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); break; @@ -152,7 +276,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return XE_REG(0); } -#define PL1_DISABLE 0 +#define PL_DISABLE 0 /* * HW allows arbitrary PL1 limits to be set but silently clamps these values to @@ -160,67 +284,73 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg * same pattern for sysfs, allow arbitrary PL1 limits to be set but display * clamped values when read. */ -static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value) +static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) { - u64 reg_val, min, max; + u64 reg_val = 0, min, max; struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; struct xe_mmio *mmio = xe_root_tile_mmio(xe); - rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + mutex_lock(&hwmon->hwmon_lock); - /* - * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible. - * So not checking it again here. - */ - if (!xe_reg_is_valid(pkg_power_sku)) { - drm_warn(&xe->drm, "pkg_power_sku invalid\n"); - *value = 0; - return; + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)®_val); + } else { + rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + reg_val = xe_mmio_read32(mmio, rapl_limit); } - mutex_lock(&hwmon->hwmon_lock); - - reg_val = xe_mmio_read32(mmio, rapl_limit); - /* Check if PL1 limit is disabled */ - if (!(reg_val & PKG_PWR_LIM_1_EN)) { - *value = PL1_DISABLE; + /* Check if PL limits are disabled. */ + if (!(reg_val & PWR_LIM_EN)) { + *value = PL_DISABLE; + drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n", + PWR_ATTR_TO_STR(attr), channel, reg_val); goto unlock; } - reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); + reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); - min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); - max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); - - if (min && max) - *value = clamp_t(u64, *value, min, max); + /* For platforms with mailbox power limit support clamping would be done by pcode. */ + if (!hwmon->xe->info.has_mbx_power_limits) { + reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); + max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + if (min && max) + *value = clamp_t(u64, *value, min, max); + } unlock: mutex_unlock(&hwmon->hwmon_lock); } -static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) +static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channel, long value) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); int ret = 0; - u64 reg_val; + u32 reg_val, max; struct xe_reg rapl_limit; + mutex_lock(&hwmon->hwmon_lock); + rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - mutex_lock(&hwmon->hwmon_lock); + /* Disable Power Limit and verify, as limit cannot be disabled on all platforms. */ + if (value == PL_DISABLE) { + if (hwmon->xe->info.has_mbx_power_limits) { + drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", + PWR_ATTR_TO_STR(attr), channel); + xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0); + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); + } else { + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); + reg_val = xe_mmio_read32(mmio, rapl_limit); + } - /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ - if (value == PL1_DISABLE) { - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN, 0); - reg_val = xe_mmio_read32(mmio, rapl_limit); - if (reg_val & PKG_PWR_LIM_1_EN) { - drm_warn(&hwmon->xe->drm, "PL1 disable is not supported!\n"); + if (reg_val & PWR_LIM_EN) { + drm_warn(&hwmon->xe->drm, "Power limit disable is not supported!\n"); ret = -EOPNOTSUPP; } goto unlock; @@ -228,26 +358,54 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); - reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); + /* + * Clamp power limit to GPU firmware default as maximum, as an additional protection to + * pcode clamp. + */ + if (hwmon->xe->info.has_mbx_power_limits) { + max = (attr == PL1_HWMON_ATTR) ? + hwmon->pl1_on_boot[channel] : hwmon->pl2_on_boot[channel]; + max = REG_FIELD_PREP(PWR_LIM_VAL, max); + if (reg_val > max) { + reg_val = max; + drm_dbg(&hwmon->xe->drm, + "Clamping power limit to GPU firmware default 0x%x\n", + reg_val); + } + } + + reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val); + + if (hwmon->xe->info.has_mbx_power_limits) + ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val); + else + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; } -static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) +static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, + long *value) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); - u64 reg_val; + u32 reg_val; + + if (hwmon->xe->info.has_mbx_power_limits) { + /* PL1 is rated max if supported. */ + xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, channel, ®_val); + } else { + /* + * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check + * for this register can be skipped. + * See xe_hwmon_power_is_visible. + */ + struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + + reg_val = xe_mmio_read32(mmio, reg); + } - /* - * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check - * for this register can be skipped. - * See xe_hwmon_power_is_visible. - */ - reg_val = xe_mmio_read32(mmio, reg); reg_val = REG_FIELD_GET(PKG_TDP, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); } @@ -277,16 +435,37 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; - u64 reg_val; + u32 reg_val; + int ret = 0; - reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)); + /* Energy is supported only for card and pkg */ + if (channel > CHANNEL_PKG) { + *energy = 0; + return; + } - if (reg_val >= ei->reg_val_prev) - ei->accum_energy += reg_val - ei->reg_val_prev; - else - ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + if (hwmon->xe->info.platform == XE_BATTLEMAGE) { + u64 pmt_val; + + ret = xe_pmt_telem_read(to_pci_dev(hwmon->xe->drm.dev), + xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), + &pmt_val, BMG_ENERGY_STATUS_PMT_OFFSET, sizeof(pmt_val)); + if (ret != sizeof(pmt_val)) { + drm_warn(&hwmon->xe->drm, "energy read from pmt failed, ret %d\n", ret); + *energy = 0; + return; + } + + if (channel == CHANNEL_PKG) + reg_val = REG_FIELD_GET64(ENERGY_PKG, pmt_val); + else + reg_val = REG_FIELD_GET64(ENERGY_CARD, pmt_val); + } else { + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)); + } + ei->accum_energy += reg_val - ei->reg_val_prev; ei->reg_val_prev = reg_val; *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, @@ -301,23 +480,36 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; - int sensor_index = to_sensor_dev_attr(attr)->index; + int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; + + int ret = 0; xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index)); + if (hwmon->xe->info.has_mbx_power_limits) { + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); + if (ret) { + drm_err(&hwmon->xe->drm, + "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n", + channel, power_attr, r, ret); + r = 0; + } + } else { + r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel)); + } mutex_unlock(&hwmon->hwmon_lock); xe_pm_runtime_put(hwmon->xe); - x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); - y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); + x = REG_FIELD_GET(PWR_LIM_TIME_X, r); + y = REG_FIELD_GET(PWR_LIM_TIME_Y, r); /* - * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17) + * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17) * = (4 | x) << (y - 2) * * Here (y - 2) ensures a 1.x fixed point representation of 1.x @@ -343,15 +535,16 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a u32 x, y, rxy, x_w = 2; /* 2 bits */ u64 tau4, r, max_win; unsigned long val; + int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; int ret; - int sensor_index = to_sensor_dev_attr(attr)->index; ret = kstrtoul(buf, 0, &val); if (ret) return ret; /* - * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12. + * Max HW supported tau in '(1 + (x / 4)) * power(2,y)' format, x = 0, y = 0x12. * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds. * * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register. @@ -375,7 +568,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a return -EINVAL; /* val in hw units */ - val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME); + val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME) + 1; /* * Convert val to 1.x * power(2,y) @@ -390,14 +583,18 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a x = (val - (1ul << y)) << x_w >> y; } - rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); + rxy = REG_FIELD_PREP(PWR_LIM_TIME_X, x) | + REG_FIELD_PREP(PWR_LIM_TIME_Y, y); xe_pm_runtime_get(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); - r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index), - PKG_PWR_LIM_1_TIME, rxy); + if (hwmon->xe->info.has_mbx_power_limits) + xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy); + else + r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), + PWR_LIM_TIME, rxy); mutex_unlock(&hwmon->hwmon_lock); @@ -406,17 +603,28 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a return count; } +/* PSYS PL1 */ static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_CARD); - + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL1); +/* PKG PL1 */ static SENSOR_DEVICE_ATTR(power2_max_interval, 0664, xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_PKG); + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL1); +/* PSYS PL2 */ +static SENSOR_DEVICE_ATTR(power1_cap_interval, 0664, + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL2); +/* PKG PL2 */ +static SENSOR_DEVICE_ATTR(power2_cap_interval, 0664, + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL2); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_power1_max_interval.dev_attr.attr, &sensor_dev_attr_power2_max_interval.dev_attr.attr, + &sensor_dev_attr_power1_cap_interval.dev_attr.attr, + &sensor_dev_attr_power2_cap_interval.dev_attr.attr, NULL }; @@ -426,10 +634,22 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret = 0; + int channel = (index % 2) ? CHANNEL_PKG : CHANNEL_CARD; + u32 power_attr = (index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; + u32 uval = 0; + struct xe_reg rapl_limit; + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); xe_pm_runtime_get(hwmon->xe); - ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0; + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval); + } else if (power_attr != PL2_HWMON_ATTR) { + rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + if (xe_reg_is_valid(rapl_limit)) + uval = xe_mmio_read32(mmio, rapl_limit); + } + ret = (uval & PWR_LIM_EN) ? attr->mode : 0; xe_pm_runtime_put(hwmon->xe); @@ -449,11 +669,13 @@ static const struct attribute_group *hwmon_groups[] = { static const struct hwmon_channel_info * const hwmon_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL), - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL, - HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL), + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT | + HWMON_P_CAP, + HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CAP), HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), + HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT), NULL }; @@ -480,6 +702,19 @@ static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) (uval & POWER_SETUP_I1_DATA_MASK)); } +static int xe_hwmon_pcode_read_fan_control(const struct xe_hwmon *hwmon, u32 subcmd, u32 *uval) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + + /* Platforms that don't return correct value */ + if (hwmon->xe->info.platform == XE_DG2 && subcmd == FSC_READ_NUM_FANS) { + *uval = 2; + return 0; + } + + return xe_pcode_read(root_tile, PCODE_MBOX(FAN_SPEED_CONTROL, subcmd, 0), uval, NULL); +} + static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, long *value, u32 scale_factor) { @@ -557,23 +792,62 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) static umode_t xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { - u32 uval; + u32 uval = 0; + struct xe_reg reg; + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); switch (attr) { case hwmon_power_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, - channel)) ? 0664 : 0; + case hwmon_power_cap: + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); + } else if (attr != PL2_HWMON_ATTR) { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + } + if (uval & PWR_LIM_EN) { + drm_info(&hwmon->xe->drm, "%s is supported on channel %d\n", + PWR_ATTR_TO_STR(attr), channel); + return 0664; + } + drm_dbg(&hwmon->xe->drm, "%s is unsupported on channel %d\n", + PWR_ATTR_TO_STR(attr), channel); + return 0; case hwmon_power_rated_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, - channel)) ? 0444 : 0; + if (hwmon->xe->info.has_mbx_power_limits) { + return 0; + } else { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + return uval ? 0444 : 0; + } case hwmon_power_crit: - if (channel == CHANNEL_PKG) - return (xe_hwmon_pcode_read_i1(hwmon, &uval) || - !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + if (channel == CHANNEL_CARD) { + xe_hwmon_pcode_read_i1(hwmon, &uval); + return (uval & POWER_SETUP_I1_WATTS) ? 0644 : 0; + } break; case hwmon_power_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, - channel)) ? 0444 : 0; + if (hwmon->xe->info.has_mbx_power_limits) { + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval); + } else { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + + if (!uval) { + reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); + if (xe_reg_is_valid(reg)) + uval = xe_mmio_read32(mmio, reg); + } + } + if ((!(uval & PWR_LIM_EN)) && channel == CHANNEL_CARD) { + xe_hwmon_pcode_read_i1(hwmon, &uval); + return (uval & POWER_SETUP_I1_WATTS) ? 0444 : 0; + } + return (uval) ? 0444 : 0; default: return 0; } @@ -585,10 +859,11 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_power_max: - xe_hwmon_power_max_read(hwmon, channel, val); + case hwmon_power_cap: + xe_hwmon_power_max_read(hwmon, attr, channel, val); return 0; case hwmon_power_rated_max: - xe_hwmon_power_rated_max_read(hwmon, channel, val); + xe_hwmon_power_rated_max_read(hwmon, attr, channel, val); return 0; case hwmon_power_crit: return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER); @@ -601,8 +876,9 @@ static int xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) { switch (attr) { + case hwmon_power_cap: case hwmon_power_max: - return xe_hwmon_power_max_write(hwmon, channel, val); + return xe_hwmon_power_max_write(hwmon, attr, channel, val); case hwmon_power_crit: return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER); default: @@ -683,11 +959,18 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) static umode_t xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { + long energy = 0; + switch (attr) { case hwmon_energy_input: case hwmon_energy_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)) ? 0444 : 0; + if (hwmon->xe->info.platform == XE_BATTLEMAGE) { + xe_hwmon_energy_get(hwmon, channel, &energy); + return energy ? 0444 : 0; + } else { + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)) ? 0444 : 0; + } default: return 0; } @@ -706,6 +989,75 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) } static umode_t +xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) +{ + u32 uval; + + if (!hwmon->xe->info.has_fan_control) + return 0; + + switch (attr) { + case hwmon_fan_input: + if (xe_hwmon_pcode_read_fan_control(hwmon, FSC_READ_NUM_FANS, &uval)) + return 0; + + return channel < uval ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_fan_input_read(struct xe_hwmon *hwmon, int channel, long *val) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); + struct xe_hwmon_fan_info *fi = &hwmon->fi[channel]; + u64 rotations, time_now, time; + u32 reg_val; + int ret = 0; + + mutex_lock(&hwmon->hwmon_lock); + + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_FAN_SPEED, channel)); + time_now = get_jiffies_64(); + + /* + * HW register value is accumulated count of pulses from PWM fan with the scale + * of 2 pulses per rotation. + */ + rotations = (reg_val - fi->reg_val_prev) / 2; + + time = jiffies_delta_to_msecs(time_now - fi->time_prev); + if (unlikely(!time)) { + ret = -EAGAIN; + goto unlock; + } + + /* + * Calculate fan speed in RPM by time averaging two subsequent readings in minutes. + * RPM = number of rotations * msecs per minute / time in msecs + */ + *val = DIV_ROUND_UP_ULL(rotations * (MSEC_PER_SEC * 60), time); + + fi->reg_val_prev = reg_val; + fi->time_prev = time_now; +unlock: + mutex_unlock(&hwmon->hwmon_lock); + return ret; +} + +static int +xe_hwmon_fan_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) +{ + switch (attr) { + case hwmon_fan_input: + return xe_hwmon_fan_input_read(hwmon, channel, val); + default: + return -EOPNOTSUPP; + } +} + +static umode_t xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) { @@ -730,6 +1082,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, case hwmon_energy: ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); break; + case hwmon_fan: + ret = xe_hwmon_fan_is_visible(hwmon, attr, channel); + break; default: ret = 0; break; @@ -765,6 +1120,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, case hwmon_energy: ret = xe_hwmon_energy_read(hwmon, attr, channel, val); break; + case hwmon_fan: + ret = xe_hwmon_fan_read(hwmon, attr, channel, val); + break; default: ret = -EOPNOTSUPP; break; @@ -842,23 +1200,57 @@ static void xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - long energy; + long energy, fan_speed; u64 val_sku_unit = 0; int channel; struct xe_reg pkg_power_sku_unit; - /* - * The contents of register PKG_POWER_SKU_UNIT do not change, - * so read it once and store the shift values. - */ - pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); - if (xe_reg_is_valid(pkg_power_sku_unit)) { - val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); - hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); - hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); - hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); + if (hwmon->xe->info.has_mbx_power_limits) { + /* Check if GPU firmware support mailbox power limits commands. */ + if (xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_CARD, + &hwmon->pl1_on_boot[CHANNEL_CARD]) | + xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG, + &hwmon->pl1_on_boot[CHANNEL_PKG]) | + xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_CARD, + &hwmon->pl2_on_boot[CHANNEL_CARD]) | + xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_PKG, + &hwmon->pl2_on_boot[CHANNEL_PKG])) { + drm_warn(&hwmon->xe->drm, + "Failed to read power limits, check GPU firmware !\n"); + } else { + drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); + /* Write default limits to read from pcode from now on. */ + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_PKG]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl2_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl2_on_boot[CHANNEL_PKG]); + hwmon->scl_shift_power = PWR_UNIT; + hwmon->scl_shift_energy = ENERGY_UNIT; + hwmon->scl_shift_time = TIME_UNIT; + hwmon->boot_power_limit_read = true; + } + } else { + drm_info(&hwmon->xe->drm, "Using register for power limits\n"); + /* + * The contents of register PKG_POWER_SKU_UNIT do not change, + * so read it once and store the shift values. + */ + pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); + if (xe_reg_is_valid(pkg_power_sku_unit)) { + val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); + hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); + hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); + hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); + } } - /* * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the * first value of the energy register read @@ -866,6 +1258,11 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) for (channel = 0; channel < CHANNEL_MAX; channel++) if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) xe_hwmon_energy_get(hwmon, channel, &energy); + + /* Initialize 'struct xe_hwmon_fan_info' with initial fan register reading. */ + for (channel = 0; channel < FAN_MAX; channel++) + if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel)) + xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); } static void xe_hwmon_mutex_destroy(void *arg) @@ -918,4 +1315,4 @@ int xe_hwmon_register(struct xe_device *xe) return 0; } - +MODULE_IMPORT_NS("INTEL_PMT_TELEMETRY"); diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c new file mode 100644 index 000000000000..bc7dc2099470 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Intel Xe I2C attached Microcontroller Units (MCU) + * + * Copyright (C) 2025 Intel Corporation. + */ + +#include <linux/array_size.h> +#include <linux/container_of.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/ioport.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/property.h> +#include <linux/regmap.h> +#include <linux/sprintf.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#include "regs/xe_i2c_regs.h" +#include "regs/xe_irq_regs.h" + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_i2c.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" + +/** + * DOC: Xe I2C devices + * + * Register a platform device for the I2C host controller (Synpsys DesignWare + * I2C) if the registers of that controller are mapped to the MMIO, and also the + * I2C client device for the Add-In Management Controller (the MCU) attached to + * the host controller. + * + * See drivers/i2c/busses/i2c-designware-* for more information on the I2C host + * controller. + */ + +static const char adapter_name[] = "i2c_designware"; + +static const struct property_entry xe_i2c_adapter_properties[] = { + PROPERTY_ENTRY_STRING("compatible", "intel,xe-i2c"), + PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_FAST_MODE_PLUS_FREQ), + { } +}; + +static inline void xe_i2c_read_endpoint(struct xe_mmio *mmio, void *ep) +{ + u32 *val = ep; + + val[0] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_PREFIX); + val[1] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_POSTFIX); +} + +static void xe_i2c_client_work(struct work_struct *work) +{ + struct xe_i2c *i2c = container_of(work, struct xe_i2c, work); + struct i2c_board_info info = { + .type = "amc", + .flags = I2C_CLIENT_HOST_NOTIFY, + .addr = i2c->ep.addr[1], + }; + + i2c->client[0] = i2c_new_client_device(i2c->adapter, &info); +} + +static int xe_i2c_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct xe_i2c *i2c = container_of(nb, struct xe_i2c, bus_notifier); + struct i2c_adapter *adapter = i2c_verify_adapter(data); + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE && + adapter && dev->parent == &i2c->pdev->dev) { + i2c->adapter = adapter; + schedule_work(&i2c->work); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static int xe_i2c_register_adapter(struct xe_i2c *i2c) +{ + struct pci_dev *pci = to_pci_dev(i2c->drm_dev); + struct platform_device *pdev; + struct fwnode_handle *fwnode; + int ret; + + fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); + if (IS_ERR(fwnode)) + return PTR_ERR(fwnode); + + /* + * Not using platform_device_register_full() here because we don't have + * a handle to the platform_device before it returns. xe_i2c_notifier() + * uses that handle, but it may be called before + * platform_device_register_full() is done. + */ + pdev = platform_device_alloc(adapter_name, pci_dev_id(pci)); + if (!pdev) { + ret = -ENOMEM; + goto err_fwnode_remove; + } + + if (i2c->adapter_irq) { + struct resource res; + + res = DEFINE_RES_IRQ_NAMED(i2c->adapter_irq, "xe_i2c"); + + ret = platform_device_add_resources(pdev, &res, 1); + if (ret) + goto err_pdev_put; + } + + pdev->dev.parent = i2c->drm_dev; + pdev->dev.fwnode = fwnode; + i2c->adapter_node = fwnode; + i2c->pdev = pdev; + + ret = platform_device_add(pdev); + if (ret) + goto err_pdev_put; + + return 0; + +err_pdev_put: + platform_device_put(pdev); +err_fwnode_remove: + fwnode_remove_software_node(fwnode); + + return ret; +} + +static void xe_i2c_unregister_adapter(struct xe_i2c *i2c) +{ + platform_device_unregister(i2c->pdev); + fwnode_remove_software_node(i2c->adapter_node); +} + +/** + * xe_i2c_irq_handler: Handler for I2C interrupts + * @xe: xe device instance + * @master_ctl: interrupt register + * + * Forward interrupts generated by the I2C host adapter to the I2C host adapter + * driver. + */ +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) +{ + if (!xe->i2c || !xe->i2c->adapter_irq) + return; + + if (master_ctl & I2C_IRQ) + generic_handle_irq_safe(xe->i2c->adapter_irq); +} + +static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw_irq_num) +{ + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + return 0; +} + +static const struct irq_domain_ops xe_i2c_irq_ops = { + .map = xe_i2c_irq_map, +}; + +static int xe_i2c_create_irq(struct xe_i2c *i2c) +{ + struct irq_domain *domain; + + if (!(i2c->ep.capabilities & XE_I2C_EP_CAP_IRQ)) + return 0; + + domain = irq_domain_create_linear(dev_fwnode(i2c->drm_dev), 1, &xe_i2c_irq_ops, NULL); + if (!domain) + return -ENOMEM; + + i2c->adapter_irq = irq_create_mapping(domain, 0); + i2c->irqdomain = domain; + + return 0; +} + +static void xe_i2c_remove_irq(struct xe_i2c *i2c) +{ + if (!i2c->irqdomain) + return; + + irq_dispose_mapping(i2c->adapter_irq); + irq_domain_remove(i2c->irqdomain); +} + +static int xe_i2c_read(void *context, unsigned int reg, unsigned int *val) +{ + struct xe_i2c *i2c = context; + + *val = xe_mmio_read32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET)); + + return 0; +} + +static int xe_i2c_write(void *context, unsigned int reg, unsigned int val) +{ + struct xe_i2c *i2c = context; + + xe_mmio_write32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET), val); + + return 0; +} + +static const struct regmap_config i2c_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_read = xe_i2c_read, + .reg_write = xe_i2c_write, + .fast_io = true, +}; + +void xe_i2c_pm_suspend(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return; + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D3hot); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return; + + if (d3cold) + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY); + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +static void xe_i2c_remove(void *data) +{ + struct xe_i2c *i2c = data; + unsigned int i; + + for (i = 0; i < XE_I2C_MAX_CLIENTS; i++) + i2c_unregister_device(i2c->client[i]); + + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + xe_i2c_unregister_adapter(i2c); + xe_i2c_remove_irq(i2c); +} + +/** + * xe_i2c_probe: Probe the I2C host adapter and the I2C clients attached to it + * @xe: xe device instance + * + * Register all the I2C devices described in the I2C Endpoint data structure. + * + * Return: 0 on success, error code on failure + */ +int xe_i2c_probe(struct xe_device *xe) +{ + struct device *drm_dev = xe->drm.dev; + struct xe_i2c_endpoint ep; + struct regmap *regmap; + struct xe_i2c *i2c; + int ret; + + if (xe->info.platform != XE_BATTLEMAGE) + return 0; + + if (IS_SRIOV_VF(xe)) + return 0; + + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); + if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return 0; + + i2c = devm_kzalloc(drm_dev, sizeof(*i2c), GFP_KERNEL); + if (!i2c) + return -ENOMEM; + + INIT_WORK(&i2c->work, xe_i2c_client_work); + i2c->mmio = xe_root_tile_mmio(xe); + i2c->drm_dev = drm_dev; + i2c->ep = ep; + xe->i2c = i2c; + + /* PCI PM isn't aware of this device, bring it up and match it with SGUnit state. */ + xe_i2c_pm_resume(xe, true); + + regmap = devm_regmap_init(drm_dev, NULL, i2c, &i2c_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + i2c->bus_notifier.notifier_call = xe_i2c_notifier; + ret = bus_register_notifier(&i2c_bus_type, &i2c->bus_notifier); + if (ret) + return ret; + + ret = xe_i2c_create_irq(i2c); + if (ret) + goto err_unregister_notifier; + + ret = xe_i2c_register_adapter(i2c); + if (ret) + goto err_remove_irq; + + return devm_add_action_or_reset(drm_dev, xe_i2c_remove, i2c); + +err_remove_irq: + xe_i2c_remove_irq(i2c); + +err_unregister_notifier: + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h new file mode 100644 index 000000000000..b767ed8ce52b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_H_ +#define _XE_I2C_H_ + +#include <linux/bits.h> +#include <linux/notifier.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +struct device; +struct fwnode_handle; +struct i2c_adapter; +struct i2c_client; +struct irq_domain; +struct platform_device; +struct xe_device; +struct xe_mmio; + +#define XE_I2C_MAX_CLIENTS 3 + +#define XE_I2C_EP_COOKIE_DEVICE 0xde + +/* Endpoint Capabilities */ +#define XE_I2C_EP_CAP_IRQ BIT(0) + +struct xe_i2c_endpoint { + u8 cookie; + u8 capabilities; + u16 addr[XE_I2C_MAX_CLIENTS]; +}; + +struct xe_i2c { + struct fwnode_handle *adapter_node; + struct platform_device *pdev; + struct i2c_adapter *adapter; + struct i2c_client *client[XE_I2C_MAX_CLIENTS]; + + struct notifier_block bus_notifier; + struct work_struct work; + + struct irq_domain *irqdomain; + int adapter_irq; + + struct xe_i2c_endpoint ep; + struct device *drm_dev; + + struct xe_mmio *mmio; +}; + +#if IS_ENABLED(CONFIG_I2C) +int xe_i2c_probe(struct xe_device *xe); +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); +void xe_i2c_pm_suspend(struct xe_device *xe); +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold); +#else +static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } +static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } +static inline void xe_i2c_pm_suspend(struct xe_device *xe) { } +static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5362d3174b06..5df5b8c2a3e4 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -18,10 +18,12 @@ #include "xe_gt.h" #include "xe_guc.h" #include "xe_hw_engine.h" +#include "xe_i2c.h" #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_pxp.h" #include "xe_sriov.h" +#include "xe_tile.h" /* * Interrupt registers for a unit are always consecutive and ordered @@ -160,7 +162,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) dmask = irqs << 16 | irqs; smask = irqs << 16; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* Enable interrupts for each engine class */ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); if (ccs_mask) @@ -260,7 +262,7 @@ gt_engine_identity(struct xe_device *xe, static void gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) { - if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + if (instance == OTHER_GUC_INSTANCE && xe_gt_is_main_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); @@ -476,6 +478,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) if (xe->info.has_heci_cscfi) xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); + xe_i2c_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); } } @@ -550,7 +553,7 @@ static void xelp_irq_reset(struct xe_tile *tile) static void dg1_irq_reset(struct xe_tile *tile) { - if (tile->id == 0) + if (xe_tile_is_root(tile)) dg1_intr_disable(tile_to_xe(tile)); gt_irq_reset(tile); diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 89393dcb53d9..a2000307d5bf 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -11,6 +11,7 @@ #include "xe_assert.h" #include "xe_bo.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_lmtt.h" #include "xe_map.h" #include "xe_mmio.h" @@ -71,13 +72,16 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level lmtt->ops->lmtt_pte_num(level)), ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | - XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_PINNED); + XE_BO_FLAG_NEEDS_64K); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_free_pt; } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, xe_bo_size(bo)); pt->level = level; pt->bo = bo; @@ -91,6 +95,9 @@ out: static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -216,6 +223,58 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt) lmtt_setup_dir_ptr(lmtt); } +static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_gt_tlb_invalidation_fence fences[XE_MAX_GT_PER_TILE]; + struct xe_gt_tlb_invalidation_fence *fence = fences; + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_gt *gt; + int result = 0; + int err; + u8 id; + + for_each_gt_on_tile(gt, tile, id) { + xe_gt_tlb_invalidation_fence_init(gt, fence, true); + err = xe_gt_tlb_invalidation_all(gt, fence); + result = result ?: err; + fence++; + } + + lmtt_debug(lmtt, "num_fences=%d err=%d\n", (int)(fence - fences), result); + + /* + * It is fine to wait for all fences, even for those which covers the + * invalidation request that failed, as such fence should be already + * marked as signaled. + */ + fence = fences; + for_each_gt_on_tile(gt, tile, id) + xe_gt_tlb_invalidation_fence_wait(fence++); + + return result; +} + +/** + * xe_lmtt_invalidate_hw - Invalidate LMTT hardware. + * @lmtt: the &xe_lmtt to invalidate + * + * Send requests to all GuCs on this tile to invalidate all TLBs. + * + * This function should be called only when running as a PF driver. + */ +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_device *xe = lmtt_to_xe(lmtt); + int err; + + lmtt_assert(lmtt, IS_SRIOV_PF(xe)); + + err = lmtt_invalidate_hw(lmtt); + if (err) + xe_sriov_warn(xe, "LMTT%u invalidation failed (%pe)", + lmtt_to_tile(lmtt)->id, ERR_PTR(err)); +} + static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, u64 pte, unsigned int idx) { @@ -226,9 +285,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: @@ -265,6 +329,7 @@ static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) return; lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid); + lmtt_invalidate_hw(lmtt); lmtt_assert(lmtt, pd->level > 0); lmtt_assert(lmtt, pt->level == pd->level - 1); @@ -386,11 +451,11 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo u64 addr, vram_offset; lmtt_assert(lmtt, IS_ALIGNED(start, page_size)); - lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size)); + lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size)); lmtt_assert(lmtt, xe_bo_is_vram(bo)); vram_offset = vram_region_gpu_offset(bo->ttm.resource); - xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); while (cur.remaining) { addr = xe_res_dma(&cur); addr += vram_offset; /* XXX */ diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h index cb10ef994db6..75a234fbf367 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.h +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -15,6 +15,7 @@ struct xe_lmtt_ops; #ifdef CONFIG_PCI_IOV int xe_lmtt_init(struct xe_lmtt *lmtt); void xe_lmtt_init_hw(struct xe_lmtt *lmtt); +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt); int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 03bfba696b37..6d38411bdeba 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -38,7 +38,34 @@ #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) +#define LRC_PPHWSP_SIZE SZ_4K +#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +#define LRC_WA_BB_SIZE SZ_4K + +/* + * Layout of the LRC and associated data allocated as + * lrc->bo: + * + * Region Size + * +============================+=================================+ <- __xe_lrc_ring_offset() + * | Ring | ring_size, see | + * | | xe_lrc_init() | + * +============================+=================================+ <- __xe_lrc_pphwsp_offset() + * | PPHWSP (includes SW state) | 4K | + * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset() + * | Engine Context Image | n * 4K, see | + * | | xe_gt_lrc_size() | + * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset() + * | Indirect Ring State Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_RING_STATE | + * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset() + * | Indirect Context Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_CTX | + * +============================+=================================+ <- __xe_lrc_wa_bb_offset() + * | WA BB Per Ctx | 4k | + * +============================+=================================+ <- xe_bo_size(lrc->bo) + */ static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) @@ -46,24 +73,33 @@ lrc_to_xe(struct xe_lrc *lrc) return gt_to_xe(lrc->fence_ctx.gt); } +static bool +gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) +{ + return false; +} + size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) { struct xe_device *xe = gt_to_xe(gt); size_t size; + /* Per-process HW status page (PPHWSP) */ + size = LRC_PPHWSP_SIZE; + + /* Engine context image */ switch (class) { case XE_ENGINE_CLASS_RENDER: if (GRAPHICS_VER(xe) >= 20) - size = 4 * SZ_4K; + size += 3 * SZ_4K; else - size = 14 * SZ_4K; + size += 13 * SZ_4K; break; case XE_ENGINE_CLASS_COMPUTE: - /* 14 pages since graphics_ver == 11 */ if (GRAPHICS_VER(xe) >= 20) - size = 3 * SZ_4K; + size += 2 * SZ_4K; else - size = 14 * SZ_4K; + size += 13 * SZ_4K; break; default: WARN(1, "Unknown engine class: %d", class); @@ -72,7 +108,7 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_ENHANCE: case XE_ENGINE_CLASS_OTHER: - size = 2 * SZ_4K; + size += 1 * SZ_4K; } /* Add indirect ring state page */ @@ -577,8 +613,6 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) if (xe_gt_has_indirect_ring_state(hwe->gt)) regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ } static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) @@ -650,9 +684,8 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_SEQNO_PPHWSP_OFFSET 512 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) +#define LRC_ENGINE_ID_PPHWSP_OFFSET 1024 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 -#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 -#define LRC_PPHWSP_SIZE SZ_4K u32 xe_lrc_regs_offset(struct xe_lrc *lrc) { @@ -713,8 +746,23 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; + u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - + LRC_INDIRECT_RING_STATE_SIZE; + + if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) + offset -= LRC_INDIRECT_CTX_BO_SIZE; + + return offset; +} + +static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; +} + +static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; } #define DECL_MAP_ADDR_HELPERS(elem) \ @@ -906,17 +954,12 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); - xe_bo_lock(lrc->bo, false); - xe_bo_unpin(lrc->bo); - xe_bo_unlock(lrc->bo); - xe_bo_put(lrc->bo); - xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); + xe_bo_unpin_map_no_vm(lrc->bo); } /* - * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active - * context run ticks. - * @lrc: Pointer to the lrc. + * wa_bb_setup_utilization() - Write commands to wa bb to assist + * in calculating active context run ticks. * * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the * context, but only gets updated when the context switches out. In order to @@ -941,11 +984,15 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +static ssize_t setup_utilization_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, + size_t max_len) { - u32 *cmd; + u32 *cmd = batch; - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) + return -ENOSPC; *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; *cmd++ = ENGINE_ID(0).addr; @@ -964,86 +1011,228 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = upper_32_bits(CONTEXT_ACTIVE); } - *cmd++ = MI_BATCH_BUFFER_END; + return cmd - batch; +} + +struct bo_setup { + ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *batch, size_t max_size); +}; + +struct bo_setup_state { + /* Input: */ + struct xe_lrc *lrc; + struct xe_hw_engine *hwe; + size_t max_size; + size_t reserve_dw; + unsigned int offset; + const struct bo_setup *funcs; + unsigned int num_funcs; + + /* State: */ + u32 *buffer; + u32 *ptr; + unsigned int written; +}; + +static int setup_bo(struct bo_setup_state *state) +{ + ssize_t remain; + + if (state->lrc->bo->vmap.is_iomem) { + state->buffer = kmalloc(state->max_size, GFP_KERNEL); + if (!state->buffer) + return -ENOMEM; + state->ptr = state->buffer; + } else { + state->ptr = state->lrc->bo->vmap.vaddr + state->offset; + state->buffer = NULL; + } + + remain = state->max_size / sizeof(u32); + + for (size_t i = 0; i < state->num_funcs; i++) { + ssize_t len = state->funcs[i].setup(state->lrc, state->hwe, + state->ptr, remain); + + remain -= len; + + /* + * Caller has asked for at least reserve_dw to remain unused. + */ + if (len < 0 || + xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw)) + goto fail; + + state->ptr += len; + state->written += len; + } + + return 0; + +fail: + kfree(state->buffer); + return -ENOSPC; +} + +static void finish_bo(struct bo_setup_state *state) +{ + if (!state->buffer) + return; + + xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, + state->offset, state->buffer, + state->written * sizeof(u32)); + kfree(state->buffer); +} + +static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static const struct bo_setup funcs[] = { + { .setup = setup_utilization_wa }, + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = LRC_WA_BB_SIZE, + .reserve_dw = 1, + .offset = __xe_lrc_wa_bb_offset(lrc), + .funcs = funcs, + .num_funcs = ARRAY_SIZE(funcs), + }; + int ret; + + ret = setup_bo(&state); + if (ret) + return ret; + + *state.ptr++ = MI_BATCH_BUFFER_END; + state.written++; + + finish_bo(&state); xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, - xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + xe_bo_ggtt_addr(lrc->bo) + state.offset + 1); + return 0; } -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) +static int +setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static struct bo_setup rcs_funcs[] = { + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = (63 * 64) /* max 63 cachelines */, + .offset = __xe_lrc_indirect_ctx_offset(lrc), + }; + int ret; + + if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) + return 0; + + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) { + state.funcs = rcs_funcs; + state.num_funcs = ARRAY_SIZE(rcs_funcs); + } + + if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) + return 0; + + ret = setup_bo(&state); + if (ret) + return ret; + + /* + * Align to 64B cacheline so there's no garbage at the end for CS to + * execute: size for indirect ctx must be a multiple of 64. + */ + while (state.written & 0xf) { + *state.ptr++ = MI_NOOP; + state.written++; + } + + finish_bo(&state); + + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX, + (xe_bo_ggtt_addr(lrc->bo) + state.offset) | + /* Size in CLs. */ + (state.written * sizeof(u32) / 64)); + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX_OFFSET, + CTX_INDIRECT_CTX_OFFSET_DEFAULT); + + return 0; +} static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, u32 ring_size, u16 msix_vec, u32 init_flags) { struct xe_gt *gt = hwe->gt; + const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; - void *init_data = NULL; u32 arb_enable; - u32 lrc_size; u32 bo_flags; int err; kref_init(&lrc->refcount); lrc->gt = gt; + lrc->size = lrc_size; lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; + bo_size += LRC_INDIRECT_CTX_BO_SIZE; + } + if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE; + if (vm && vm->xef) /* userspace */ + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, ttm_bo_type_kernel, bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, - ttm_bo_type_kernel, - bo_flags); - if (IS_ERR(lrc->bb_per_ctx_bo)) { - err = PTR_ERR(lrc->bb_per_ctx_bo); - goto err_lrc_finish; - } - - lrc->size = lrc_size; - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - /* * Init Per-Process of HW status Page, LRC / context state to known - * values + * values. If there's already a primed default_lrc, just copy it, otherwise + * it's the early submission to record the lrc: build a new empty one from + * scratch. */ map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { + if (gt->default_lrc[hwe->class]) { xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); + lrc_size - LRC_PPHWSP_SIZE); } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); + void *init_data = empty_lrc_data(hwe); + + if (!init_data) { + err = -ENOMEM; + goto err_lrc_finish; + } + + xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); kfree(init_data); } @@ -1097,7 +1286,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); + xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); @@ -1123,7 +1312,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - xe_lrc_setup_utilization(lrc); + err = setup_wa_bb(lrc, hwe); + if (err) + goto err_lrc_finish; + + err = setup_indirect_ctx(lrc, hwe); + if (err) + goto err_lrc_finish; return 0; @@ -1566,6 +1761,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_CLIP_MESH); MATCH3D(3DSTATE_SBE_MESH); MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); + MATCH3D(3DSTATE_COARSE_PIXEL); MATCH3D(3DSTATE_DRAWING_RECTANGLE); MATCH3D(3DSTATE_CHROMA_KEY); @@ -1716,7 +1912,7 @@ static const struct instr_state xe_hpg_svg_state[] = { { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, }; -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) { struct xe_gt *gt = q->hwe->gt; struct xe_device *xe = gt_to_xe(gt); @@ -1751,7 +1947,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; + return cs; } for (int i = 0; i < state_table_size; i++) { @@ -1774,12 +1970,14 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b instr == CMD_3DSTATE_DRAWING_RECTANGLE) instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - bb->cs[bb->len] = instr; + *cs = instr; if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); + *cs |= (num_dw - 2); - bb->len += num_dw; + cs += num_dw; } + + return cs; } struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) @@ -1789,9 +1987,6 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; - if (lrc->bo->vm) - xe_vm_get(lrc->bo->vm); - snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); @@ -1803,7 +1998,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; + snapshot->lrc_size = lrc->size; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); @@ -1813,14 +2008,12 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; - struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; - vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1840,8 +2033,6 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); - if (vm) - xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1894,14 +2085,9 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) { - struct xe_vm *vm; - - vm = snapshot->lrc_bo->vm; + if (snapshot->lrc_bo) xe_bo_put(snapshot->lrc_bo); - if (vm) - xe_vm_put(vm); - } + kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index eb6e8de8c939..b6c8053c581b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -112,7 +112,7 @@ void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index ae24cf6f8dd9..e9883706e004 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -22,14 +22,15 @@ struct xe_lrc { */ struct xe_bo *bo; - /** @size: size of lrc including any indirect ring state page */ + /** @size: size of the lrc and optional indirect ring state */ u32 size; /** @gt: gt which this LRC belongs to */ struct xe_gt *gt; /** @flags: LRC flags */ -#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 +#define XE_LRC_FLAG_INDIRECT_CTX 0x1 +#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2 u32 flags; /** @refcount: ref count of this lrc */ @@ -53,9 +54,6 @@ struct xe_lrc { /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ u64 ctx_timestamp; - - /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ - struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h index f62e0c8b67ab..8d67f6ba2d95 100644 --- a/drivers/gpu/drm/xe/xe_map.h +++ b/drivers/gpu/drm/xe/xe_map.h @@ -78,6 +78,24 @@ static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map, iosys_map_wr(map__, offset__, type__, val__); \ }) +#define xe_map_rd_array(xe__, map__, index__, type__) \ + xe_map_rd(xe__, map__, (index__) * sizeof(type__), type__) + +#define xe_map_wr_array(xe__, map__, index__, type__, val__) \ + xe_map_wr(xe__, map__, (index__) * sizeof(type__), type__, val__) + +#define xe_map_rd_array_u32(xe__, map__, index__) \ + xe_map_rd_array(xe__, map__, index__, u32) + +#define xe_map_wr_array_u32(xe__, map__, index__, val__) \ + xe_map_wr_array(xe__, map__, index__, u32, val__) + +#define xe_map_rd_ring_u32(xe__, map__, index__, size__) \ + xe_map_rd_array_u32(xe__, map__, (index__) % (size__)) + +#define xe_map_wr_ring_u32(xe__, map__, index__, size__, val__) \ + xe_map_wr_array_u32(xe__, map__, (index__) % (size__), val__) + #define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({ \ struct xe_device *__xe = xe__; \ xe_device_assert_mem_access(__xe); \ diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 404fa2a456d5..49c45ec3e83c 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -86,7 +86,7 @@ static const char *guc_name(struct xe_guc *guc) * This object needs to be 4KiB aligned. * * - _`Interrupt Source Report Page`: this is the equivalent of the - * GEN11_GT_INTR_DWx registers, with each bit in those registers being + * GT_INTR_DWx registers, with each bit in those registers being * mapped to a byte here. The offsets are the same, just bytes instead * of bits. This object needs to be cacheline aligned. * diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 5a3e89022c38..ba1cff2e4cda 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -82,7 +82,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE +#define MAX_PTE_PER_SDI 0x1FEU /** * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. @@ -97,7 +97,7 @@ struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile) return tile->migrate->q; } -static void xe_migrate_fini(struct drm_device *dev, void *arg) +static void xe_migrate_fini(void *arg) { struct xe_migrate *m = arg; @@ -203,19 +203,18 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); /* Need to be sure everything fits in the first PT, or create more */ - xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); + xe_tile_assert(tile, m->batch_base_ofs + xe_bo_size(batch) < SZ_2M); bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED | XE_BO_FLAG_PAGETABLE); if (IS_ERR(bo)) return PTR_ERR(bo); /* PT30 & PT31 reserved for 2M identity map */ - pt29_ofs = bo->size - 3 * XE_PAGE_SIZE; + pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE; entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); @@ -237,7 +236,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (!IS_DGFX(xe)) { /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -248,13 +247,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level++; } if (xe->info.has_usm) { - xe_tile_assert(tile, batch->size == SZ_1M); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_1M); batch = tile->primary_gt->usm.bb_pool->bo; m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; - xe_tile_assert(tile, batch->size == SZ_512K); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_512K); - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -307,7 +306,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { - u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; + u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, pat_index, pt30_ofs); @@ -322,7 +321,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; u64 vram_offset = IDENTITY_OFFSET + DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); - u64 pt31_ofs = bo->size - XE_PAGE_SIZE; + u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); @@ -401,7 +400,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) struct xe_vm *vm; int err; - m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); + m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL); if (!m) return ERR_PTR(-ENOMEM); @@ -455,7 +454,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) might_lock(&m->job_mutex); fs_reclaim_release(GFP_KERNEL); - err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); + err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m); if (err) return ERR_PTR(err); @@ -670,6 +669,7 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, u32 mocs = 0; u32 tile_y = 0; + xe_gt_assert(gt, !(pitch & 3)); xe_gt_assert(gt, size / pitch <= S16_MAX); xe_gt_assert(gt, pitch / 4 <= S16_MAX); xe_gt_assert(gt, pitch <= U16_MAX); @@ -768,7 +768,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it, ccs_it; u64 src_L0_ofs, dst_L0_ofs; u32 src_L0_pt, dst_L0_pt; @@ -779,17 +779,19 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, bool dst_is_pltt = dst->mem_type == XE_PL_TT; bool src_is_vram = mem_type_is_vram(src->mem_type); bool dst_is_vram = mem_type_is_vram(dst->mem_type); + bool type_device = src_bo->ttm.type == ttm_bo_type_device; + bool needs_ccs_emit = type_device && xe_migrate_needs_ccs_emit(xe); bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo); bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); - bool use_comp_pat = xe_device_has_flat_ccs(xe) && + bool use_comp_pat = type_device && xe_device_has_flat_ccs(xe) && GRAPHICS_VER(xe) >= 20 && src_is_vram && !dst_is_vram; /* Copying CCS between two different BOs is not supported yet. */ if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) return ERR_PTR(-EINVAL); - if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) + if (src_bo != dst_bo && XE_WARN_ON(xe_bo_size(src_bo) != xe_bo_size(dst_bo))) return ERR_PTR(-EINVAL); if (!src_is_vram) @@ -839,6 +841,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, avail_pts, avail_pts); if (copy_system_ccs) { + xe_assert(xe, type_device); ccs_size = xe_device_ccs_bytes(xe, src_L0); batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, &ccs_pt, 0, @@ -849,7 +852,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, /* Add copy commands size here */ batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + - ((xe_migrate_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0)); + ((needs_ccs_emit ? EMIT_COPY_CCS_DW : 0)); bb = xe_bb_new(gt, batch_size, usm); if (IS_ERR(bb)) { @@ -860,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) @@ -878,7 +881,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (!copy_only_ccs) emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE); - if (xe_migrate_needs_ccs_emit(xe)) + if (needs_ccs_emit) flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, IS_DGFX(xe) ? src_is_vram : src_is_pltt, dst_L0_ofs, @@ -1061,7 +1064,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_device *xe = gt_to_xe(gt); bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; - u64 size = bo->size; + u64 size = xe_bo_size(bo); struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; @@ -1073,9 +1076,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_only_system_ccs = true; if (!clear_vram) - xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); + xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &src_it); else - xe_res_first(src, 0, bo->size, &src_it); + xe_res_first(src, 0, xe_bo_size(bo), &src_it); while (size) { u64 clear_L0_ofs; @@ -1404,7 +1407,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, if (idx == chunk) goto next_cmd; - xe_tile_assert(tile, pt_bo->size == SZ_4K); + xe_tile_assert(tile, xe_bo_size(pt_bo) == SZ_4K); /* Map a PT at most once */ if (pt_bo->update_index < 0) @@ -1550,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1574,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; @@ -1601,55 +1606,63 @@ enum xe_migrate_copy_dir { XE_MIGRATE_COPY_TO_SRAM, }; +#define XE_CACHELINE_BYTES 64ull +#define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1) + static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, - unsigned long npages, + unsigned long len, + unsigned long sram_offset, dma_addr_t *sram_addr, u64 vram_addr, const enum xe_migrate_copy_dir dir) { struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool use_usm_batch = xe->info.has_usm; struct dma_fence *fence = NULL; u32 batch_size = 2; u64 src_L0_ofs, dst_L0_ofs; - u64 round_update_size; struct xe_sched_job *job; struct xe_bb *bb; u32 update_idx, pt_slot = 0; + unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE); + unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ? + PAGE_SIZE : 4; int err; - if (npages * PAGE_SIZE > MAX_PREEMPTDISABLE_TRANSFER) - return ERR_PTR(-EINVAL); + if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || + (sram_offset | vram_addr) & XE_CACHELINE_MASK)) + return ERR_PTR(-EOPNOTSUPP); - round_update_size = npages * PAGE_SIZE; - batch_size += pte_update_cmd_size(round_update_size); + xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER); + + batch_size += pte_update_cmd_size(len); batch_size += EMIT_COPY_DW; - bb = xe_bb_new(gt, batch_size, true); + bb = xe_bb_new(gt, batch_size, use_usm_batch); if (IS_ERR(bb)) { err = PTR_ERR(bb); return ERR_PTR(err); } build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, - sram_addr, round_update_size); + sram_addr, len + sram_offset); if (dir == XE_MIGRATE_COPY_TO_VRAM) { - src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0); + src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); } else { src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); - dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0); + dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; } bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, round_update_size, - XE_PAGE_SIZE); + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, len, pitch); job = xe_bb_create_migration_job(m->q, bb, - xe_migrate_batch_base(m, true), + xe_migrate_batch_base(m, use_usm_batch), update_idx); if (IS_ERR(job)) { err = PTR_ERR(job); @@ -1694,7 +1707,7 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, dma_addr_t *src_addr, u64 dst_addr) { - return xe_migrate_vram(m, npages, src_addr, dst_addr, + return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr, XE_MIGRATE_COPY_TO_VRAM); } @@ -1715,10 +1728,193 @@ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, u64 src_addr, dma_addr_t *dst_addr) { - return xe_migrate_vram(m, npages, dst_addr, src_addr, + return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr, XE_MIGRATE_COPY_TO_SRAM); } +static void xe_migrate_dma_unmap(struct xe_device *xe, dma_addr_t *dma_addr, + int len, int write) +{ + unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); + + for (i = 0; i < npages; ++i) { + if (!dma_addr[i]) + break; + + dma_unmap_page(xe->drm.dev, dma_addr[i], PAGE_SIZE, + write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + } + kfree(dma_addr); +} + +static dma_addr_t *xe_migrate_dma_map(struct xe_device *xe, + void *buf, int len, int write) +{ + dma_addr_t *dma_addr; + unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); + + dma_addr = kcalloc(npages, sizeof(*dma_addr), GFP_KERNEL); + if (!dma_addr) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < npages; ++i) { + dma_addr_t addr; + struct page *page; + + if (is_vmalloc_addr(buf)) + page = vmalloc_to_page(buf); + else + page = virt_to_page(buf); + + addr = dma_map_page(xe->drm.dev, + page, 0, PAGE_SIZE, + write ? DMA_TO_DEVICE : + DMA_FROM_DEVICE); + if (dma_mapping_error(xe->drm.dev, addr)) + goto err_fault; + + dma_addr[i] = addr; + buf += PAGE_SIZE; + } + + return dma_addr; + +err_fault: + xe_migrate_dma_unmap(xe, dma_addr, len, write); + return ERR_PTR(-EFAULT); +} + +/** + * xe_migrate_access_memory - Access memory of a BO via GPU + * + * @m: The migration context. + * @bo: buffer object + * @offset: access offset into buffer object + * @buf: pointer to caller memory to read into or write from + * @len: length of access + * @write: write access + * + * Access memory of a BO via GPU either reading in or writing from a passed in + * pointer. Pointer is dma mapped for GPU access and GPU commands are issued to + * read to or write from pointer. + * + * Returns: + * 0 if successful, negative error code on failure. + */ +int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, + unsigned long offset, void *buf, int len, + int write) +{ + struct xe_tile *tile = m->tile; + struct xe_device *xe = tile_to_xe(tile); + struct xe_res_cursor cursor; + struct dma_fence *fence = NULL; + dma_addr_t *dma_addr; + unsigned long page_offset = (unsigned long)buf & ~PAGE_MASK; + int bytes_left = len, current_page = 0; + void *orig_buf = buf; + + xe_bo_assert_held(bo); + + /* Use bounce buffer for small access and unaligned access */ + if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || + !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { + int buf_offset = 0; + + /* + * Less than ideal for large unaligned access but this should be + * fairly rare, can fixup if this becomes common. + */ + do { + u8 bounce[XE_CACHELINE_BYTES]; + void *ptr = (void *)bounce; + int err; + int copy_bytes = min_t(int, bytes_left, + XE_CACHELINE_BYTES - + (offset & XE_CACHELINE_MASK)); + int ptr_offset = offset & XE_CACHELINE_MASK; + + err = xe_migrate_access_memory(m, bo, + offset & + ~XE_CACHELINE_MASK, + (void *)ptr, + sizeof(bounce), 0); + if (err) + return err; + + if (write) { + memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes); + + err = xe_migrate_access_memory(m, bo, + offset & ~XE_CACHELINE_MASK, + (void *)ptr, + sizeof(bounce), write); + if (err) + return err; + } else { + memcpy(buf + buf_offset, ptr + ptr_offset, + copy_bytes); + } + + bytes_left -= copy_bytes; + buf_offset += copy_bytes; + offset += copy_bytes; + } while (bytes_left); + + return 0; + } + + dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); + if (IS_ERR(dma_addr)) + return PTR_ERR(dma_addr); + + xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor); + + do { + struct dma_fence *__fence; + u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) + + cursor.start; + int current_bytes; + + if (cursor.size > MAX_PREEMPTDISABLE_TRANSFER) + current_bytes = min_t(int, bytes_left, + MAX_PREEMPTDISABLE_TRANSFER); + else + current_bytes = min_t(int, bytes_left, cursor.size); + + if (fence) + dma_fence_put(fence); + + __fence = xe_migrate_vram(m, current_bytes, + (unsigned long)buf & ~PAGE_MASK, + dma_addr + current_page, + vram_addr, write ? + XE_MIGRATE_COPY_TO_VRAM : + XE_MIGRATE_COPY_TO_SRAM); + if (IS_ERR(__fence)) { + if (fence) + dma_fence_wait(fence, false); + fence = __fence; + goto out_err; + } + fence = __fence; + + buf += current_bytes; + offset += current_bytes; + current_page = (int)(buf - orig_buf) / PAGE_SIZE; + bytes_left -= current_bytes; + if (bytes_left) + xe_res_next(&cursor, current_bytes); + } while (bytes_left); + + dma_fence_wait(fence, false); + dma_fence_put(fence); + +out_err: + xe_migrate_dma_unmap(xe, dma_addr, len + page_offset, write); + return IS_ERR(fence) ? PTR_ERR(fence) : 0; +} + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_migrate.c" #endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 6ff9a963425c..fb9839c1bae0 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -112,6 +112,10 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, + unsigned long offset, void *buf, int len, + int write); + #define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0) #define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1) #define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \ diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 46301f341773..e4db8d58ea2d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -22,6 +22,9 @@ #include "xe_macros.h" #include "xe_sriov.h" #include "xe_trace.h" +#include "xe_wa.h" + +#include "generated/xe_device_wa_oob.h" static void tiles_fini(void *arg) { @@ -55,6 +58,7 @@ static void tiles_fini(void *arg) static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) { struct xe_tile *tile; + struct xe_gt *gt; u8 id; /* @@ -67,7 +71,7 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) /* Possibly override number of tile based on configuration register */ if (!xe->info.skip_mtcfg) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); - u8 tile_count; + u8 tile_count, gt_count; u32 mtcfg; /* @@ -84,12 +88,15 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) xe->info.tile_count = tile_count; /* - * FIXME: Needs some work for standalone media, but - * should be impossible with multi-tile for now: - * multi-tile platform with standalone media doesn't - * exist + * We've already setup gt_count according to the full + * tile count. Re-calculate it to only include the GTs + * that belong to the remaining tile(s). */ - xe->info.gt_count = xe->info.tile_count; + gt_count = 0; + for_each_gt(gt, xe, id) + if (gt->info.id < tile_count * xe->info.max_gt_per_tile) + gt_count++; + xe->info.gt_count = gt_count; } } @@ -138,6 +145,7 @@ int xe_mmio_probe_early(struct xe_device *xe) return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } +ALLOW_ERROR_INJECTION(xe_mmio_probe_early, ERRNO); /* See xe_pci_probe() */ /** * xe_mmio_init() - Initialize an MMIO instance @@ -162,7 +170,7 @@ static void mmio_flush_pending_writes(struct xe_mmio *mmio) #define DUMMY_REG_OFFSET 0x130030 int i; - if (mmio->tile->xe->info.platform != XE_LUNARLAKE) + if (!XE_DEVICE_WA(mmio->tile->xe, 15015404425)) return; /* 4 dummy writes */ @@ -175,7 +183,6 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u8 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readb(mmio->regs + addr); @@ -189,7 +196,6 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u16 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readw(mmio->regs + addr); @@ -204,8 +210,9 @@ void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val) trace_xe_reg_rw(mmio, true, addr, val, sizeof(val)); - if (!reg.vf && mmio->sriov_vf_gt) - xe_gt_sriov_vf_write32(mmio->sriov_vf_gt, reg, val); + if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) + xe_gt_sriov_vf_write32(mmio->sriov_vf_gt ?: + mmio->tile->primary_gt, reg, val); else writel(val, mmio->regs + addr); } @@ -215,11 +222,11 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u32 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); - if (!reg.vf && mmio->sriov_vf_gt) - val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt, reg); + if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) + val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt ?: + mmio->tile->primary_gt, reg); else val = readl(mmio->regs + addr); diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e861c694f336..107ffe87808c 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -11,35 +11,52 @@ #include <drm/drm_module.h> #include "xe_drv.h" +#include "xe_configfs.h" #include "xe_hw_fence.h" #include "xe_pci.h" #include "xe_pm.h" #include "xe_observation.h" #include "xe_sched_job.h" +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define DEFAULT_GUC_LOG_LEVEL 3 +#else +#define DEFAULT_GUC_LOG_LEVEL 1 +#endif + +#define DEFAULT_PROBE_DISPLAY true +#define DEFAULT_VRAM_BAR_SIZE 0 +#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define DEFAULT_WEDGED_MODE 1 +#define DEFAULT_SVM_NOTIFIER_SIZE 512 + struct xe_modparam xe_modparam = { - .probe_display = true, - .guc_log_level = 3, - .force_probe = CONFIG_DRM_XE_FORCE_PROBE, - .wedged_mode = 1, - .svm_notifier_size = 512, + .probe_display = DEFAULT_PROBE_DISPLAY, + .guc_log_level = DEFAULT_GUC_LOG_LEVEL, + .force_probe = DEFAULT_FORCE_PROBE, + .wedged_mode = DEFAULT_WEDGED_MODE, + .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ }; module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); -MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); +MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " + "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]"); module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); -MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " + "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])"); -module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); -MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); +module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " + "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); -MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); +MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " + "[default=" __stringify(DEFAULT_GUC_LOG_LEVEL) "])"); module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400); MODULE_PARM_DESC(guc_firmware_path, @@ -55,7 +72,8 @@ MODULE_PARM_DESC(gsc_firmware_path, module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, - "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details " + "[default=" DEFAULT_FORCE_PROBE "])"); #ifdef CONFIG_PCI_IOV module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); @@ -66,7 +84,8 @@ MODULE_PARM_DESC(max_vfs, module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, - "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); + "Module's default policy for the wedged mode (0=never, 1=upon-critical-errors, 2=upon-any-hang " + "[default=" __stringify(DEFAULT_WEDGED_MODE) "])"); static int xe_check_nomodeset(void) { @@ -86,6 +105,10 @@ static const struct init_funcs init_funcs[] = { .init = xe_check_nomodeset, }, { + .init = xe_configfs_init, + .exit = xe_configfs_exit, + }, + { .init = xe_hw_fence_module_init, .exit = xe_hw_fence_module_exit, }, diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c new file mode 100644 index 000000000000..61b0a1531a53 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. + */ + +#include <linux/intel_dg_nvm_aux.h> +#include <linux/pci.h> + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_mmio.h" +#include "xe_nvm.h" +#include "regs/xe_gsc_regs.h" +#include "xe_sriov.h" + +#define GEN12_GUNIT_NVM_BASE 0x00102040 +#define GEN12_DEBUG_NVM_BASE 0x00101018 + +#define GEN12_CNTL_PROTECTED_NVM_REG 0x0010100C + +#define GEN12_GUNIT_NVM_SIZE 0x80 +#define GEN12_DEBUG_NVM_SIZE 0x4 + +#define NVM_NON_POSTED_ERASE_CHICKEN_BIT BIT(13) + +#define HECI_FW_STATUS_2_NVM_ACCESS_MODE BIT(3) + +static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { + [0] = { .name = "DESCRIPTOR", }, + [2] = { .name = "GSC", }, + [9] = { .name = "PADDING", }, + [11] = { .name = "OptionROM", }, + [12] = { .name = "DAM", }, +}; + +static void xe_nvm_release_dev(struct device *dev) +{ +} + +static bool xe_nvm_non_posted_erase(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + + if (xe->info.platform != XE_BATTLEMAGE) + return false; + return !(xe_mmio_read32(>->mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & + NVM_NON_POSTED_ERASE_CHICKEN_BIT); +} + +static bool xe_nvm_writable_override(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + bool writable_override; + resource_size_t base; + + switch (xe->info.platform) { + case XE_BATTLEMAGE: + base = DG2_GSC_HECI2_BASE; + break; + case XE_PVC: + base = PVC_GSC_HECI2_BASE; + break; + case XE_DG2: + base = DG2_GSC_HECI2_BASE; + break; + case XE_DG1: + base = DG1_GSC_HECI2_BASE; + break; + default: + drm_err(&xe->drm, "Unknown platform\n"); + return true; + } + + writable_override = + !(xe_mmio_read32(>->mmio, HECI_FWSTS2(base)) & + HECI_FW_STATUS_2_NVM_ACCESS_MODE); + if (writable_override) + drm_info(&xe->drm, "NVM access overridden by jumper\n"); + return writable_override; +} + +int xe_nvm_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct auxiliary_device *aux_dev; + struct intel_dg_nvm_dev *nvm; + int ret; + + if (!xe->info.has_gsc_nvm) + return 0; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return 0; + + /* Nvm pointer should be NULL here */ + if (WARN_ON(xe->nvm)) + return -EFAULT; + + xe->nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); + if (!xe->nvm) + return -ENOMEM; + + nvm = xe->nvm; + + nvm->writable_override = xe_nvm_writable_override(xe); + nvm->non_posted_erase = xe_nvm_non_posted_erase(xe); + nvm->bar.parent = &pdev->resource[0]; + nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start; + nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1; + nvm->bar.flags = IORESOURCE_MEM; + nvm->bar.desc = IORES_DESC_NONE; + nvm->regions = regions; + + nvm->bar2.parent = &pdev->resource[0]; + nvm->bar2.start = GEN12_DEBUG_NVM_BASE + pdev->resource[0].start; + nvm->bar2.end = nvm->bar2.start + GEN12_DEBUG_NVM_SIZE - 1; + nvm->bar2.flags = IORESOURCE_MEM; + nvm->bar2.desc = IORES_DESC_NONE; + + aux_dev = &nvm->aux_dev; + + aux_dev->name = "nvm"; + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | pci_dev_id(pdev); + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = xe_nvm_release_dev; + + ret = auxiliary_device_init(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); + goto err; + } + + ret = auxiliary_device_add(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); + auxiliary_device_uninit(aux_dev); + goto err; + } + return 0; + +err: + kfree(nvm); + xe->nvm = NULL; + return ret; +} + +void xe_nvm_fini(struct xe_device *xe) +{ + struct intel_dg_nvm_dev *nvm = xe->nvm; + + if (!xe->info.has_gsc_nvm) + return; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return; + + /* Nvm pointer should not be NULL here */ + if (WARN_ON(!nvm)) + return; + + auxiliary_device_delete(&nvm->aux_dev); + auxiliary_device_uninit(&nvm->aux_dev); + kfree(nvm); + xe->nvm = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_nvm.h b/drivers/gpu/drm/xe/xe_nvm.h new file mode 100644 index 000000000000..7f3d5f57bed0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2019-2025 Intel Corporation. All rights reserved. + */ + +#ifndef __XE_NVM_H__ +#define __XE_NVM_H__ + +struct xe_device; + +int xe_nvm_init(struct xe_device *xe); + +void xe_nvm_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 7ffc98f67e69..5729e7d3e335 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -43,6 +43,12 @@ #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX +enum xe_oam_unit_type { + XE_OAM_UNIT_SAG, + XE_OAM_UNIT_SCMI_0, + XE_OAM_UNIT_SCMI_1, +}; + enum xe_oa_submit_deps { XE_OA_SUBMIT_NO_DEPS, XE_OA_SUBMIT_ADD_DEPS, @@ -77,7 +83,7 @@ struct xe_oa_config { struct xe_oa_open_param { struct xe_file *xef; - u32 oa_unit_id; + struct xe_oa_unit *oa_unit; bool sample; u32 metric_set; enum xe_oa_format_name oa_format; @@ -194,7 +200,7 @@ static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *l static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) { - return &stream->hwe->oa_unit->regs; + return &stream->oa_unit->regs; } static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) @@ -397,7 +403,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - int size_exponent = __ffs(stream->oa_buffer.bo->size); + int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo)); u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; @@ -429,7 +435,7 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); + memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); } static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) @@ -454,7 +460,7 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream) static u32 __oactrl_used_bits(struct xe_oa_stream *stream) { - return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? + return stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; } @@ -475,7 +481,7 @@ static void xe_oa_enable(struct xe_oa_stream *stream) __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); @@ -838,11 +844,16 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) /* Reset PMON Enable to save power. */ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); + + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && + GRAPHICS_VER(stream->oa->xe) >= 30) + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, OAM_LAT_MEASURE_ENABLE, 0); } static void xe_oa_stream_destroy(struct xe_oa_stream *stream) { - struct xe_oa_unit *u = stream->hwe->oa_unit; + struct xe_oa_unit *u = stream->oa_unit; struct xe_gt *gt = stream->hwe->gt; if (WARN_ON(stream != u->exclusive_stream)) @@ -1054,7 +1065,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) static u32 oag_buf_size_select(const struct xe_oa_stream *stream) { return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, - stream->oa_buffer.bo->size > SZ_16M ? + xe_bo_size(stream->oa_buffer.bo) > SZ_16M ? OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); } @@ -1105,9 +1116,13 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) */ sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && + GRAPHICS_VER(stream->oa->xe) >= 30) + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, 0, OAM_LAT_MEASURE_ENABLE); + /* Configure OAR/OAC */ if (stream->exec_q) { ret = xe_oa_configure_oa_context(stream, true); @@ -1139,14 +1154,31 @@ static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *n return -EINVAL; } +static struct xe_oa_unit *xe_oa_lookup_oa_unit(struct xe_oa *oa, u32 oa_unit_id) +{ + struct xe_gt *gt; + int gt_id, i; + + for_each_gt(gt, oa->xe, gt_id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + struct xe_oa_unit *u = >->oa.oa_unit[i]; + + if (u->oa_unit_id == oa_unit_id) + return u; + } + } + + return NULL; +} + static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { - if (value >= oa->oa_unit_ids) { + param->oa_unit = xe_oa_lookup_oa_unit(oa, value); + if (!param->oa_unit) { drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); return -EINVAL; } - param->oa_unit_id = value; return 0; } @@ -1301,7 +1333,7 @@ static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_fr int err; u32 idx; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(oa->xe, err)) return -EFAULT; @@ -1338,7 +1370,7 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(oa->xe, err)) return -EFAULT; @@ -1550,7 +1582,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1636,7 +1668,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { + if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1677,13 +1709,13 @@ static const struct file_operations xe_oa_fops = { static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { - struct xe_oa_unit *u = param->hwe->oa_unit; struct xe_gt *gt = param->hwe->gt; unsigned int fw_ref; int ret; stream->exec_q = param->exec_q; stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; + stream->oa_unit = param->oa_unit; stream->hwe = param->hwe; stream->gt = stream->hwe->gt; stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; @@ -1704,7 +1736,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, * buffer whose size, circ_size, is a multiple of the report size */ if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) stream->oa_buffer.circ_size = param->oa_buffer_size - param->oa_buffer_size % stream->oa_buffer.format->size; @@ -1762,7 +1794,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", stream->oa_config->uuid); - WRITE_ONCE(u->exclusive_stream, stream); + WRITE_ONCE(stream->oa_unit->exclusive_stream, stream); hrtimer_setup(&stream->poll_check_timer, xe_oa_poll_check_timer_cb, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1798,7 +1830,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, int ret; /* We currently only allow exclusive access */ - if (param->hwe->oa_unit->exclusive_stream) { + if (param->oa_unit->exclusive_stream) { drm_dbg(&oa->xe->drm, "OA unit already in use\n"); ret = -EBUSY; goto exit; @@ -1874,13 +1906,14 @@ static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) return div_u64(nom + den - 1, den); } -static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) +static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) { - switch (hwe->oa_unit->type) { + switch (param->oa_unit->type) { case DRM_XE_OA_UNIT_TYPE_OAG: return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; case DRM_XE_OA_UNIT_TYPE_OAM: + case DRM_XE_OA_UNIT_TYPE_OAM_SAG: return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; default: return false; @@ -1899,37 +1932,48 @@ u16 xe_oa_unit_id(struct xe_hw_engine *hwe) hwe->oa_unit->oa_unit_id : U16_MAX; } +/* A hwe must be assigned to stream/oa_unit for batch submissions */ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) { - struct xe_gt *gt; - int i, ret = 0; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int ret = 0; + + /* If not provided, OA unit defaults to OA unit 0 as per uapi */ + if (!param->oa_unit) + param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0]; + /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { - /* When we have an exec_q, get hwe from the exec_q */ param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, param->engine_instance, true); - } else { - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - /* Else just get the first hwe attached to the oa unit */ - for_each_gt(gt, oa->xe, i) { - for_each_hw_engine(hwe, gt, id) { - if (xe_oa_unit_id(hwe) == param->oa_unit_id) { - param->hwe = hwe; - goto out; - } - } - } + if (!param->hwe || param->hwe->oa_unit != param->oa_unit) + goto err; + goto out; } -out: - if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { - drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", - param->exec_q ? param->exec_q->class : -1, - param->engine_instance, param->oa_unit_id); - ret = -EINVAL; + + /* Else just get the first hwe attached to the oa unit */ + for_each_hw_engine(hwe, param->oa_unit->gt, id) { + if (hwe->oa_unit == param->oa_unit) { + param->hwe = hwe; + goto out; + } } + /* If we still didn't find a hwe, just get one with a valid oa_unit from the same gt */ + for_each_hw_engine(hwe, param->oa_unit->gt, id) { + if (!hwe->oa_unit) + continue; + + param->hwe = hwe; + goto out; + } +err: + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", + param->exec_q ? param->exec_q->class : -1, + param->engine_instance, param->oa_unit->oa_unit_id); + ret = -EINVAL; +out: return ret; } @@ -2007,7 +2051,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f f = &oa->oa_formats[param.oa_format]; if (!param.oa_format || !f->size || - !engine_supports_oa_format(param.hwe, f->type)) { + !oa_unit_supports_oa_format(¶m, f->type)) { drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", param.oa_format, f->type, f->size, param.hwe->class); ret = -EINVAL; @@ -2155,6 +2199,7 @@ static const struct xe_mmio_range gen12_oa_mux_regs[] = { static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB01C, .end = 0xB01C }, /* LNCF_MISC_CONFIG_REGISTER0 */ { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ @@ -2221,6 +2266,7 @@ addr_err: kfree(oa_regs); return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_oa_alloc_regs, ERRNO); static ssize_t show_dynamic_id(struct kobject *kobj, struct kobj_attribute *attr, @@ -2280,7 +2326,7 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi return -EACCES; } - err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); + err = copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); if (XE_IOCTL_DBG(oa->xe, err)) return -EFAULT; @@ -2447,20 +2493,38 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { - return 1; + if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) + return 1; + else if (!IS_DGFX(gt_to_xe(gt))) + return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ + else + return XE_OAM_UNIT_SCMI_1 + 1; /* SAG + SCMI_0 + SCMI_1 */ } static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) { - if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { - /* - * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices - * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA - */ - xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) + return XE_OA_UNIT_INVALID; + + xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt)); + if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) return 0; - } + /* + * XE_OAM_UNIT_SAG has only GSCCS attached to it, but only on some platforms. Also + * GSCCS cannot be used to submit batches to program the OAM unit. Therefore we don't + * assign an OA unit to GSCCS. This means that XE_OAM_UNIT_SAG is exposed as an OA + * unit without attached engines. Fused off engines can also result in oa_unit's with + * num_engines == 0. OA streams can be opened on all OA units. + */ + else if (hwe->engine_id == XE_HW_ENGINE_GSCCS0) + return XE_OA_UNIT_INVALID; + else if (!IS_DGFX(gt_to_xe(hwe->gt))) + return XE_OAM_UNIT_SCMI_0; + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE) + return (hwe->instance / 2 & 0x1) + 1; + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE) + return (hwe->instance & 0x1) + 1; return XE_OA_UNIT_INVALID; } @@ -2474,6 +2538,7 @@ static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_ENHANCE: + case XE_ENGINE_CLASS_OTHER: return __hwe_oam_unit(hwe); default: @@ -2513,20 +2578,29 @@ static struct xe_oa_regs __oag_regs(void) static void __xe_oa_init_oa_units(struct xe_gt *gt) { - const u32 mtl_oa_base[] = { 0x13000 }; + /* Actual address is MEDIA_GT_GSI_OFFSET + oam_base_addr[i] */ + const u32 oam_base_addr[] = { + [XE_OAM_UNIT_SAG] = 0x13000, + [XE_OAM_UNIT_SCMI_0] = 0x14000, + [XE_OAM_UNIT_SCMI_1] = 0x14800, + }; int i, num_units = gt->oa.num_oa_units; for (i = 0; i < num_units; i++) { struct xe_oa_unit *u = >->oa.oa_unit[i]; - if (gt->info.type != XE_GT_TYPE_MEDIA) { + if (xe_gt_is_main_type(gt)) { u->regs = __oag_regs(); u->type = DRM_XE_OA_UNIT_TYPE_OAG; - } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { - u->regs = __oam_regs(mtl_oa_base[i]); - u->type = DRM_XE_OA_UNIT_TYPE_OAM; + } else { + xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270); + u->regs = __oam_regs(oam_base_addr[i]); + u->type = i == XE_OAM_UNIT_SAG && GRAPHICS_VER(gt_to_xe(gt)) >= 20 ? + DRM_XE_OA_UNIT_TYPE_OAM_SAG : DRM_XE_OA_UNIT_TYPE_OAM; } + u->gt = gt; + xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0); /* Ensure MMIO trigger remains disabled till there is a stream */ @@ -2559,10 +2633,6 @@ static int xe_oa_init_gt(struct xe_gt *gt) } } - /* - * Fused off engines can result in oa_unit's with num_engines == 0. These units - * will appear in OA unit query, but no OA streams can be opened on them. - */ gt->oa.num_oa_units = num_oa_units; gt->oa.oa_unit = u; @@ -2573,17 +2643,54 @@ static int xe_oa_init_gt(struct xe_gt *gt) return 0; } +static void xe_oa_print_gt_oa_units(struct xe_gt *gt) +{ + enum xe_hw_engine_id hwe_id; + struct xe_hw_engine *hwe; + struct xe_oa_unit *u; + char buf[256]; + int i, n; + + for (i = 0; i < gt->oa.num_oa_units; i++) { + u = >->oa.oa_unit[i]; + buf[0] = '\0'; + n = 0; + + for_each_hw_engine(hwe, gt, hwe_id) + if (xe_oa_unit_id(hwe) == u->oa_unit_id) + n += scnprintf(buf + n, sizeof(buf) - n, "%s ", hwe->name); + + xe_gt_dbg(gt, "oa_unit %d, type %d, Engines: %s\n", u->oa_unit_id, u->type, buf); + } +} + +static void xe_oa_print_oa_units(struct xe_oa *oa) +{ + struct xe_gt *gt; + int gt_id; + + for_each_gt(gt, oa->xe, gt_id) + xe_oa_print_gt_oa_units(gt); +} + static int xe_oa_init_oa_units(struct xe_oa *oa) { struct xe_gt *gt; int i, ret; + /* Needed for OAM implementation here */ + BUILD_BUG_ON(XE_OAM_UNIT_SAG != 0); + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_0 != 1); + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_1 != 2); + for_each_gt(gt, oa->xe, i) { ret = xe_oa_init_gt(gt); if (ret) return ret; } + xe_oa_print_oa_units(oa); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 52e33c37d5ee..2628f78c4e8d 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -95,6 +95,9 @@ struct xe_oa_unit { /** @oa_unit_id: identifier for the OA unit */ u16 oa_unit_id; + /** @gt: gt associated with the OA unit */ + struct xe_gt *gt; + /** @type: Type of OA unit - OAM, OAG etc. */ enum drm_xe_oa_unit_type type; @@ -182,6 +185,9 @@ struct xe_oa_stream { /** @gt: gt associated with the oa stream */ struct xe_gt *gt; + /** @oa_unit: oa unit for this stream */ + struct xe_oa_unit *oa_unit; + /** @hwe: hardware engine associated with this oa stream */ struct xe_hw_engine *hwe; diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 30fdbdb9341e..2e7cb99ae87a 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -103,7 +103,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { * * Note: There is an implicit assumption in the driver that compression and * coh_1way+ are mutually exclusive. If this is ever not true then userptr - * and imported dma-buf from external device will have uncleared ccs state. + * and imported dma-buf from external device will have uncleared ccs state. See + * also xe_bo_needs_ccs_pages(). */ #define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ { \ @@ -162,21 +163,35 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries) { + struct xe_device *xe = gt_to_xe(gt); + for (int i = 0; i < n_entries; i++) { struct xe_reg reg = XE_REG(_PAT_INDEX(i)); xe_mmio_write32(>->mmio, reg, table[i].value); } + + if (xe->pat.pat_ats) + xe_mmio_write32(>->mmio, XE_REG(_PAT_ATS), xe->pat.pat_ats->value); + if (xe->pat.pat_pta) + xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe->pat.pat_pta->value); } static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries) { + struct xe_device *xe = gt_to_xe(gt); + for (int i = 0; i < n_entries; i++) { struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i)); xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value); } + + if (xe->pat.pat_ats) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe->pat.pat_ats->value); + if (xe->pat.pat_pta) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_pta->value); } static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) @@ -303,26 +318,6 @@ static const struct xe_pat_ops xelpg_pat_ops = { .dump = xelpg_dump, }; -static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], - int n_entries) -{ - program_pat_mcr(gt, table, n_entries); - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value); - - if (IS_DGFX(gt_to_xe(gt))) - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe2_pat_pta.value); -} - -static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], - int n_entries) -{ - program_pat(gt, table, n_entries); - xe_mmio_write32(>->mmio, XE_REG(_PAT_ATS), xe2_pat_ats.value); - - if (IS_DGFX(gt_to_xe(gt))) - xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe2_pat_pta.value); -} - static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); @@ -375,8 +370,8 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) } static const struct xe_pat_ops xe2_pat_ops = { - .program_graphics = xe2lpg_program_pat, - .program_media = xe2lpm_program_pat, + .program_graphics = program_pat_mcr, + .program_media = program_pat, .dump = xe2_dump, }; @@ -385,6 +380,9 @@ void xe_pat_init_early(struct xe_device *xe) if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; xe->pat.table = xe2_pat_table; + xe->pat.pat_ats = &xe2_pat_ats; + if (IS_DGFX(xe)) + xe->pat.pat_pta = &xe2_pat_pta; /* Wa_16023588340. XXX: Should use XE_WA */ if (GRAPHICS_VERx100(xe) == 2001) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f4d108dc49b1..3c40ef426f0c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -38,40 +38,6 @@ enum toggle_d3cold { D3COLD_ENABLE, }; -struct xe_subplatform_desc { - enum xe_subplatform subplatform; - const char *name; - const u16 *pciidlist; -}; - -struct xe_device_desc { - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_graphics_ip; - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_media_ip; - - const char *platform_name; - const struct xe_subplatform_desc *subplatforms; - - enum xe_platform platform; - - u8 dma_mask_size; - u8 max_remote_tiles:2; - - u8 require_force_probe:1; - u8 is_dgfx:1; - - u8 has_display:1; - u8 has_heci_gscfi:1; - u8 has_heci_cscfi:1; - u8 has_llc:1; - u8 has_pxp:1; - u8 has_sriov:1; - u8 skip_guc_pc:1; - u8 skip_mtcfg:1; - u8 skip_pcode:1; -}; - __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); @@ -137,7 +103,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ @@ -177,9 +142,11 @@ static const struct xe_ip graphics_ips[] = { { 1271, "Xe_LPG", &graphics_xelpg }, { 1274, "Xe_LPG+", &graphics_xelpg }, { 2001, "Xe2_HPG", &graphics_xe2 }, + { 2002, "Xe2_HPG", &graphics_xe2 }, { 2004, "Xe2_LPG", &graphics_xe2 }, { 3000, "Xe3_LPG", &graphics_xe2 }, { 3001, "Xe3_LPG", &graphics_xe2 }, + { 3003, "Xe3_LPG", &graphics_xe2 }, }; /* Pre-GMDID Media IPs */ @@ -192,6 +159,7 @@ static const struct xe_ip media_ips[] = { { 1301, "Xe2_HPM", &media_xelpmp }, { 2000, "Xe2_LPM", &media_xelpmp }, { 3000, "Xe3_LPM", &media_xelpmp }, + { 3002, "Xe3_LPM", &media_xelpmp }, }; static const struct xe_device_desc tgl_desc = { @@ -201,6 +169,7 @@ static const struct xe_device_desc tgl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -211,6 +180,7 @@ static const struct xe_device_desc rkl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -223,6 +193,7 @@ static const struct xe_device_desc adl_s_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -239,6 +210,7 @@ static const struct xe_device_desc adl_p_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -253,6 +225,7 @@ static const struct xe_device_desc adl_n_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -266,7 +239,9 @@ static const struct xe_device_desc dg1_desc = { PLATFORM(DG1), .dma_mask_size = 39, .has_display = true, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -277,6 +252,7 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; #define DG2_FEATURES \ DGFX_FEATURES, \ PLATFORM(DG2), \ + .has_gsc_nvm = 1, \ .has_heci_gscfi = 1, \ .subplatforms = (const struct xe_subplatform_desc[]) { \ { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ @@ -289,6 +265,7 @@ static const struct xe_device_desc ats_m_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, @@ -299,10 +276,13 @@ static const struct xe_device_desc dg2_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, .has_display = true, + .has_fan_control = true, + .has_mbx_power_limits = false, }; static const __maybe_unused struct xe_device_desc pvc_desc = { @@ -311,9 +291,12 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { PLATFORM(PVC), .dma_mask_size = 52, .has_display = false, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .max_remote_tiles = 1, .require_force_probe = true, + .has_mbx_power_limits = false, }; static const struct xe_device_desc mtl_desc = { @@ -323,6 +306,7 @@ static const struct xe_device_desc mtl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, }; static const struct xe_device_desc lnl_desc = { @@ -330,6 +314,8 @@ static const struct xe_device_desc lnl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, + .needs_scratch = true, }; static const struct xe_device_desc bmg_desc = { @@ -337,7 +323,13 @@ static const struct xe_device_desc bmg_desc = { PLATFORM(BATTLEMAGE), .dma_mask_size = 46, .has_display = true, + .has_fan_control = true, + .has_mbx_power_limits = true, + .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .has_sriov = true, + .max_gt_per_tile = 2, + .needs_scratch = true, }; static const struct xe_device_desc ptl_desc = { @@ -345,7 +337,8 @@ static const struct xe_device_desc ptl_desc = { .dma_mask_size = 46, .has_display = true, .has_sriov = true, - .require_force_probe = true, + .max_gt_per_tile = 2, + .needs_scratch = true, }; #undef PLATFORM @@ -576,6 +569,9 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.dma_mask_size = desc->dma_mask_size; xe->info.is_dgfx = desc->is_dgfx; + xe->info.has_fan_control = desc->has_fan_control; + xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; + xe->info.has_gsc_nvm = desc->has_gsc_nvm; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; @@ -584,10 +580,15 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; + xe->info.needs_scratch = desc->needs_scratch; xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && desc->has_display; + + xe_assert(xe, desc->max_gt_per_tile > 0); + xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE); + xe->info.max_gt_per_tile = desc->max_gt_per_tile; xe->info.tile_count = 1 + desc->max_remote_tiles; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); @@ -687,10 +688,11 @@ static int xe_info_init(struct xe_device *xe, */ for_each_tile(tile, xe, id) { gt = tile->primary_gt; - gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; + gt->info.id = tile->id * xe->info.max_gt_per_tile; gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.engine_mask = graphics_desc->hw_engine_mask; + xe->info.gt_count++; if (MEDIA_VER(xe) < 13 && media_desc) gt->info.engine_mask |= media_desc->hw_engine_mask; @@ -708,17 +710,10 @@ static int xe_info_init(struct xe_device *xe, gt = tile->media_gt; gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; gt->info.engine_mask = media_desc->hw_engine_mask; - - /* - * FIXME: At the moment multi-tile and standalone media are - * mutually exclusive on current platforms. We'll need to - * come up with a better way to number GTs if we ever wind - * up with platforms that support both together. - */ - drm_WARN_ON(&xe->drm, id != 0); - gt->info.id = xe->info.gt_count++; + xe->info.gt_count++; } return 0; @@ -735,7 +730,7 @@ static void xe_pci_remove(struct pci_dev *pdev) return; xe_device_remove(xe); - xe_pm_runtime_fini(xe); + xe_pm_fini(xe); } /* @@ -805,18 +800,17 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; err = xe_device_probe_early(xe); - if (err) { - /* - * In Boot Survivability mode, no drm card is exposed and driver - * is loaded with bare minimum to allow for firmware to be - * flashed through mei. If early probe failed, but it managed to - * enable survivability mode, return success. - */ - if (xe_survivability_mode_is_enabled(xe)) - return 0; + /* + * In Boot Survivability mode, no drm card is exposed and driver + * is loaded with bare minimum to allow for firmware to be + * flashed through mei. Return success, if survivability mode + * is enabled due to pcode failure or configfs being set + */ + if (xe_survivability_mode_is_enabled(xe)) + return 0; + if (err) return err; - } err = xe_info_init(xe, desc); if (err) @@ -922,6 +916,7 @@ static int xe_pci_suspend(struct device *dev) pci_save_state(pdev); pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3cold); return 0; } diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 09ee8a06fe2e..8813efdcafbb 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -7,6 +7,8 @@ #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc_engine_activity.h" #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -111,6 +113,20 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs) } } +static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, bool enable) +{ + struct xe_gt *gt; + unsigned int id; + int ret = 0; + + for_each_gt(gt, xe, id) { + ret = xe_guc_engine_activity_function_stats(>->uc.guc, num_vfs, enable); + if (ret) + xe_gt_sriov_info(gt, "Failed to %s engine activity function stats (%pe)\n", + str_enable_disable(enable), ERR_PTR(ret)); + } +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -145,6 +161,9 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) xe_sriov_info(xe, "Enabled %u of %u VF%s\n", num_vfs, total_vfs, str_plural(total_vfs)); + + pf_engine_activity_stats(xe, num_vfs, true); + return num_vfs; failed: @@ -168,6 +187,8 @@ static int pf_disable_vfs(struct xe_device *xe) if (!num_vfs) return 0; + pf_engine_activity_stats(xe, num_vfs, false); + pci_disable_sriov(pdev); pf_reset_vfs(xe, num_vfs); diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index ca6b10d35573..4de6f69ed975 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -8,6 +8,47 @@ #include <linux/types.h> +#include "xe_platform_types.h" + +struct xe_subplatform_desc { + enum xe_subplatform subplatform; + const char *name; + const u16 *pciidlist; +}; + +struct xe_device_desc { + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_graphics_ip; + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_media_ip; + + const char *platform_name; + const struct xe_subplatform_desc *subplatforms; + + enum xe_platform platform; + + u8 dma_mask_size; + u8 max_remote_tiles:2; + u8 max_gt_per_tile:2; + + u8 require_force_probe:1; + u8 is_dgfx:1; + + u8 has_display:1; + u8 has_fan_control:1; + u8 has_gsc_nvm:1; + u8 has_heci_gscfi:1; + u8 has_heci_cscfi:1; + u8 has_llc:1; + u8 has_mbx_power_limits:1; + u8 has_pxp:1; + u8 has_sriov:1; + u8 needs_scratch:1; + u8 skip_guc_pc:1; + u8 skip_mtcfg:1; + u8 skip_pcode:1; +}; + struct xe_graphics_desc { u8 va_bits; u8 vm_max_level; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 9333ce776a6e..6a7ddb9005f9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -7,6 +7,7 @@ #include <linux/delay.h> #include <linux/errno.h> +#include <linux/error-injection.h> #include <drm/drm_managed.h> @@ -108,6 +109,17 @@ int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 data, int timeout return err; } +int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0, u32 data1, int timeout) +{ + int err; + + mutex_lock(&tile->pcode.lock); + err = pcode_mailbox_rw(tile, mbox, &data0, &data1, timeout, false, false); + mutex_unlock(&tile->pcode.lock); + + return err; +} + int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1) { int err; @@ -323,3 +335,34 @@ int xe_pcode_probe_early(struct xe_device *xe) { return xe_pcode_ready(xe, false); } +ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */ + +/* Helpers with drm device. These should only be called by the display side */ +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_read(tile, mbox, val, val1); +} + +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_write_timeout(tile, mbox, val, timeout_ms); +} + +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_request(tile, mbox, request, reply_mask, reply, timeout_base_ms); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index ba33991d72a7..a5584c1c75f9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -7,8 +7,10 @@ #define _XE_PCODE_H_ #include <linux/types.h> -struct xe_tile; + +struct drm_device; struct xe_device; +struct xe_tile; void xe_pcode_init(struct xe_tile *tile); int xe_pcode_probe_early(struct xe_device *xe); @@ -18,6 +20,9 @@ int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1); int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 val, int timeout_ms); +int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0, + u32 data1, int timeout); + #define xe_pcode_write(tile, mbox, val) \ xe_pcode_write_timeout(tile, mbox, val, 1) @@ -29,4 +34,12 @@ int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, | FIELD_PREP(PCODE_MB_PARAM1, param1)\ | FIELD_PREP(PCODE_MB_PARAM2, param2)) +/* Helpers with drm device */ +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1); +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms); +#define intel_pcode_write(drm, mbox, val) \ + intel_pcode_write_timeout((drm), (mbox), (val), 1) +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms); + #endif diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 2bae9afdbd35..92bfcba51e19 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -34,6 +34,7 @@ #define DGFX_PCODE_STATUS 0x7E #define DGFX_GET_INIT_STATUS 0x0 #define DGFX_INIT_STATUS_COMPLETE 0x1 +#define DGFX_LINK_DOWNGRADE_STATUS REG_BIT(31) #define PCODE_POWER_SETUP 0x7C #define POWER_SETUP_SUBCOMMAND_READ_I1 0x4 @@ -42,6 +43,28 @@ #define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */ #define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0) +#define READ_PSYSGPU_POWER_LIMIT 0x6 +#define WRITE_PSYSGPU_POWER_LIMIT 0x7 +#define READ_PACKAGE_POWER_LIMIT 0x8 +#define WRITE_PACKAGE_POWER_LIMIT 0x9 +#define READ_PL_FROM_FW 0x1 +#define READ_PL_FROM_PCODE 0x0 + +#define PCODE_LATE_BINDING 0x5C +#define GET_CAPABILITY_STATUS 0x0 +#define V1_FAN_SUPPORTED REG_BIT(0) +#define VR_PARAMS_SUPPORTED REG_BIT(3) +#define V1_FAN_PROVISIONED REG_BIT(16) +#define VR_PARAMS_PROVISIONED REG_BIT(19) +#define GET_VERSION_LOW 0x1 +#define GET_VERSION_HIGH 0x2 +#define MAJOR_VERSION_MASK REG_GENMASK(31, 16) +#define MINOR_VERSION_MASK REG_GENMASK(15, 0) +#define HOTFIX_VERSION_MASK REG_GENMASK(31, 16) +#define BUILD_VERSION_MASK REG_GENMASK(15, 0) +#define FAN_TABLE 1 +#define VR_CONFIG 2 + #define PCODE_FREQUENCY_CONFIG 0x6e /* Frequency Config Sub Commands (param1) */ #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 @@ -49,6 +72,9 @@ /* Domain IDs (param2) */ #define PCODE_MBOX_DOMAIN_HBM 0x2 +#define FAN_SPEED_CONTROL 0x7D +#define FSC_READ_NUM_FANS 0x4 + #define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4)) /* PCODE_SCRATCH0 */ #define AUXINFO_REG_OFFSET REG_GENMASK(17, 15) @@ -63,6 +89,10 @@ /* Auxiliary info bits */ #define AUXINFO_HISTORY_OFFSET REG_GENMASK(31, 29) +#define BMG_PCIE_CAP XE_REG(0x138340) +#define LINK_DOWNGRADE REG_GENMASK(1, 0) +#define DOWNGRADE_CAPABLE 2 + struct pcode_err_decode { int errno; const char *str; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 7b6b754ad6eb..e279b47ba03b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -16,10 +16,10 @@ #include "xe_bo.h" #include "xe_bo_evict.h" #include "xe_device.h" -#include "xe_device_sysfs.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc.h" +#include "xe_i2c.h" #include "xe_irq.h" #include "xe_pcode.h" #include "xe_pxp.h" @@ -135,7 +135,7 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err_pxp; + goto err_display; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); @@ -147,12 +147,13 @@ int xe_pm_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); + xe_i2c_pm_suspend(xe); + drm_dbg(&xe->drm, "Device suspended\n"); return 0; err_display: xe_display_pm_resume(xe); -err_pxp: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); @@ -188,10 +189,12 @@ int xe_pm_resume(struct xe_device *xe) * This only restores pinned memory which is the memory required for the * GT(s) to resume. */ - err = xe_bo_restore_kernel(xe); + err = xe_bo_restore_early(xe); if (err) goto err; + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -199,7 +202,7 @@ int xe_pm_resume(struct xe_device *xe) xe_display_pm_resume(xe); - err = xe_bo_restore_user(xe); + err = xe_bo_restore_late(xe); if (err) goto err; @@ -273,6 +276,7 @@ int xe_pm_init_early(struct xe_device *xe) if (err) return err; + xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); return 0; } ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ @@ -286,6 +290,42 @@ static u32 vram_threshold_value(struct xe_device *xe) return DEFAULT_VRAM_THRESHOLD; } +static int xe_pm_notifier_callback(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier); + int err = 0; + + switch (action) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + xe_pm_runtime_get(xe); + err = xe_bo_evict_all_user(xe); + if (err) { + drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); + xe_pm_runtime_put(xe); + break; + } + + err = xe_bo_notifier_prepare_all_pinned(xe); + if (err) { + drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); + xe_pm_runtime_put(xe); + } + break; + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + xe_bo_notifier_unprepare_all_pinned(xe); + xe_pm_runtime_put(xe); + break; + } + + if (err) + return NOTIFY_BAD; + + return NOTIFY_DONE; +} + /** * xe_pm_init - Initialize Xe Power Management * @xe: xe device instance @@ -299,33 +339,31 @@ int xe_pm_init(struct xe_device *xe) u32 vram_threshold; int err; + xe->pm_notifier.notifier_call = xe_pm_notifier_callback; + err = register_pm_notifier(&xe->pm_notifier); + if (err) + return err; + /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) return 0; - xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); - if (xe->d3cold.capable) { - err = xe_device_sysfs_init(xe); - if (err) - return err; - vram_threshold = vram_threshold_value(xe); err = xe_pm_set_vram_threshold(xe, vram_threshold); if (err) - return err; + goto err_unregister; } xe_pm_runtime_init(xe); - return 0; + +err_unregister: + unregister_pm_notifier(&xe->pm_notifier); + return err; } -/** - * xe_pm_runtime_fini - Finalize Runtime PM - * @xe: xe device instance - */ -void xe_pm_runtime_fini(struct xe_device *xe) +static void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -333,6 +371,18 @@ void xe_pm_runtime_fini(struct xe_device *xe) pm_runtime_forbid(dev); } +/** + * xe_pm_fini - Finalize PM + * @xe: xe device instance + */ +void xe_pm_fini(struct xe_device *xe) +{ + if (xe_device_uc_enabled(xe)) + xe_pm_runtime_fini(xe); + + unregister_pm_notifier(&xe->pm_notifier); +} + static void xe_pm_write_callback_task(struct xe_device *xe, struct task_struct *task) { @@ -442,6 +492,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_runtime_suspend_late(xe); + xe_i2c_pm_suspend(xe); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return 0; @@ -484,11 +536,13 @@ int xe_pm_runtime_resume(struct xe_device *xe) * This only restores pinned memory which is the memory * required for the GT(s) to resume. */ - err = xe_bo_restore_kernel(xe); + err = xe_bo_restore_early(xe); if (err) goto out; } + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -497,7 +551,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_display_pm_runtime_resume(xe); if (xe->d3cold.allowed) { - err = xe_bo_restore_user(xe); + err = xe_bo_restore_late(xe); if (err) goto out; } @@ -641,7 +695,7 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe) return dev->power.runtime_status == RPM_SUSPENDING || dev->power.runtime_status == RPM_RESUMING || - pm_suspend_target_state != PM_SUSPEND_ON; + pm_suspend_in_progress(); #else return false; #endif @@ -707,11 +761,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 998d1ed64556..59678b310e55 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -17,7 +17,7 @@ int xe_pm_resume(struct xe_device *xe); int xe_pm_init_early(struct xe_device *xe); int xe_pm_init(struct xe_device *xe); -void xe_pm_runtime_fini(struct xe_device *xe); +void xe_pm_fini(struct xe_device *xe); bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index 4f62a6e515d6..cab51d826345 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -10,9 +10,11 @@ #include "xe_force_wake.h" #include "xe_gt_idle.h" #include "xe_guc_engine_activity.h" +#include "xe_guc_pc.h" #include "xe_hw_engine.h" #include "xe_pm.h" #include "xe_pmu.h" +#include "xe_sriov_pf_helpers.h" /** * DOC: Xe PMU (Performance Monitoring Unit) @@ -32,9 +34,10 @@ * gt[60:63] Selects gt for the event * engine_class[20:27] Selects engine-class for event * engine_instance[12:19] Selects the engine-instance for the event + * function[44:59] Selects the function of the event (SRIOV enabled) * * For engine specific events (engine-*), gt, engine_class and engine_instance parameters must be - * set as populated by DRM_XE_DEVICE_QUERY_ENGINES. + * set as populated by DRM_XE_DEVICE_QUERY_ENGINES and function if SRIOV is enabled. * * For gt specific events (gt-*) gt parameter must be passed. All other parameters will be 0. * @@ -49,6 +52,7 @@ */ #define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60) +#define XE_PMU_EVENT_FUNCTION_MASK GENMASK_ULL(59, 44) #define XE_PMU_EVENT_ENGINE_CLASS_MASK GENMASK_ULL(27, 20) #define XE_PMU_EVENT_ENGINE_INSTANCE_MASK GENMASK_ULL(19, 12) #define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0) @@ -58,6 +62,11 @@ static unsigned int config_to_event_id(u64 config) return FIELD_GET(XE_PMU_EVENT_ID_MASK, config); } +static unsigned int config_to_function_id(u64 config) +{ + return FIELD_GET(XE_PMU_EVENT_FUNCTION_MASK, config); +} + static unsigned int config_to_engine_class(u64 config) { return FIELD_GET(XE_PMU_EVENT_ENGINE_CLASS_MASK, config); @@ -76,6 +85,8 @@ static unsigned int config_to_gt_id(u64 config) #define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01 #define XE_PMU_EVENT_ENGINE_ACTIVE_TICKS 0x02 #define XE_PMU_EVENT_ENGINE_TOTAL_TICKS 0x03 +#define XE_PMU_EVENT_GT_ACTUAL_FREQUENCY 0x04 +#define XE_PMU_EVENT_GT_REQUESTED_FREQUENCY 0x05 static struct xe_gt *event_to_gt(struct perf_event *event) { @@ -111,6 +122,14 @@ static bool is_engine_event(u64 config) event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); } +static bool is_gt_frequency_event(struct perf_event *event) +{ + u32 id = config_to_event_id(event->attr.config); + + return id == XE_PMU_EVENT_GT_ACTUAL_FREQUENCY || + id == XE_PMU_EVENT_GT_REQUESTED_FREQUENCY; +} + static bool event_gt_forcewake(struct perf_event *event) { struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); @@ -118,7 +137,7 @@ static bool event_gt_forcewake(struct perf_event *event) struct xe_gt *gt; unsigned int *fw_ref; - if (!is_engine_event(config)) + if (!is_engine_event(config) && !is_gt_frequency_event(event)) return true; gt = xe_device_get_gt(xe, config_to_gt_id(config)); @@ -138,10 +157,13 @@ static bool event_gt_forcewake(struct perf_event *event) return true; } -static bool event_supported(struct xe_pmu *pmu, unsigned int gt, +static bool event_supported(struct xe_pmu *pmu, unsigned int gt_id, unsigned int id) { - if (gt >= XE_MAX_GT_PER_TILE) + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + struct xe_gt *gt = xe_device_get_gt(xe, gt_id); + + if (!gt) return false; return id < sizeof(pmu->supported_events) * BITS_PER_BYTE && @@ -151,7 +173,7 @@ static bool event_supported(struct xe_pmu *pmu, unsigned int gt, static bool event_param_valid(struct perf_event *event) { struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); - unsigned int engine_class, engine_instance; + unsigned int engine_class, engine_instance, function_id; u64 config = event->attr.config; struct xe_gt *gt; @@ -161,16 +183,28 @@ static bool event_param_valid(struct perf_event *event) engine_class = config_to_engine_class(config); engine_instance = config_to_engine_instance(config); + function_id = config_to_function_id(config); switch (config_to_event_id(config)) { case XE_PMU_EVENT_GT_C6_RESIDENCY: - if (engine_class || engine_instance) + case XE_PMU_EVENT_GT_ACTUAL_FREQUENCY: + case XE_PMU_EVENT_GT_REQUESTED_FREQUENCY: + if (engine_class || engine_instance || function_id) return false; break; case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS: case XE_PMU_EVENT_ENGINE_TOTAL_TICKS: if (!event_to_hwe(event)) return false; + + /* PF(0) and total vfs when SRIOV is enabled */ + if (IS_SRIOV_PF(xe)) { + if (function_id > xe_sriov_pf_get_totalvfs(xe)) + return false; + } else if (function_id) { + return false; + } + break; } @@ -242,13 +276,17 @@ static int xe_pmu_event_init(struct perf_event *event) static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event) { struct xe_hw_engine *hwe; - u64 val = 0; + unsigned int function_id; + u64 config, val = 0; + + config = event->attr.config; + function_id = config_to_function_id(config); hwe = event_to_hwe(event); - if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS) - val = xe_guc_engine_activity_active_ticks(>->uc.guc, hwe); + if (config_to_event_id(config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS) + val = xe_guc_engine_activity_active_ticks(>->uc.guc, hwe, function_id); else - val = xe_guc_engine_activity_total_ticks(>->uc.guc, hwe); + val = xe_guc_engine_activity_total_ticks(>->uc.guc, hwe, function_id); return val; } @@ -266,6 +304,10 @@ static u64 __xe_pmu_event_read(struct perf_event *event) case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS: case XE_PMU_EVENT_ENGINE_TOTAL_TICKS: return read_engine_events(gt, event); + case XE_PMU_EVENT_GT_ACTUAL_FREQUENCY: + return xe_guc_pc_get_act_freq(>->uc.guc.pc); + case XE_PMU_EVENT_GT_REQUESTED_FREQUENCY: + return xe_guc_pc_get_cur_freq_fw(>->uc.guc.pc); } return 0; @@ -281,7 +323,14 @@ static void xe_pmu_event_update(struct perf_event *event) new = __xe_pmu_event_read(event); } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new)); - local64_add(new - prev, &event->count); + /* + * GT frequency is not a monotonically increasing counter, so add the + * instantaneous value instead. + */ + if (is_gt_frequency_event(event)) + local64_add(new, &event->count); + else + local64_add(new - prev, &event->count); } static void xe_pmu_event_read(struct perf_event *event) @@ -351,6 +400,7 @@ static void xe_pmu_event_del(struct perf_event *event, int flags) } PMU_FORMAT_ATTR(gt, "config:60-63"); +PMU_FORMAT_ATTR(function, "config:44-59"); PMU_FORMAT_ATTR(engine_class, "config:20-27"); PMU_FORMAT_ATTR(engine_instance, "config:12-19"); PMU_FORMAT_ATTR(event, "config:0-11"); @@ -359,6 +409,7 @@ static struct attribute *pmu_format_attrs[] = { &format_attr_event.attr, &format_attr_engine_class.attr, &format_attr_engine_instance.attr, + &format_attr_function.attr, &format_attr_gt.attr, NULL, }; @@ -419,6 +470,10 @@ static ssize_t event_attr_show(struct device *dev, XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency, XE_PMU_EVENT_GT_C6_RESIDENCY, "ms"); XE_EVENT_ATTR_NOUNIT(engine-active-ticks, engine_active_ticks, XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); XE_EVENT_ATTR_NOUNIT(engine-total-ticks, engine_total_ticks, XE_PMU_EVENT_ENGINE_TOTAL_TICKS); +XE_EVENT_ATTR_SIMPLE(gt-actual-frequency, gt_actual_frequency, + XE_PMU_EVENT_GT_ACTUAL_FREQUENCY, "MHz"); +XE_EVENT_ATTR_SIMPLE(gt-requested-frequency, gt_requested_frequency, + XE_PMU_EVENT_GT_REQUESTED_FREQUENCY, "MHz"); static struct attribute *pmu_empty_event_attrs[] = { /* Empty - all events are added as groups with .attr_update() */ @@ -434,6 +489,8 @@ static const struct attribute_group *pmu_events_attr_update[] = { &pmu_group_gt_c6_residency, &pmu_group_engine_active_ticks, &pmu_group_engine_total_ticks, + &pmu_group_gt_actual_frequency, + &pmu_group_gt_requested_frequency, NULL, }; @@ -442,8 +499,11 @@ static void set_supported_events(struct xe_pmu *pmu) struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); struct xe_gt *gt = xe_device_get_gt(xe, 0); - if (!xe->info.skip_guc_pc) + if (!xe->info.skip_guc_pc) { pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY); + pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_ACTUAL_FREQUENCY); + pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_REQUESTED_FREQUENCY); + } if (xe_guc_engine_activity_supported(>->uc.guc)) { pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 856038553b81..c8e63bd23300 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -103,6 +103,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, { struct xe_pt *pt; struct xe_bo *bo; + u32 bo_flags; int err; if (level) { @@ -115,14 +116,16 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, if (!pt) return ERR_PTR(-ENOMEM); + bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | + XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE; + if (vm->xef) /* userspace */ + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; + pt->level = level; bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); + bo_flags); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto err_kfree; @@ -269,8 +272,11 @@ struct xe_pt_update { bool preexisting; }; +/** + * struct xe_pt_stage_bind_walk - Walk state for the stage_bind walk. + */ struct xe_pt_stage_bind_walk { - /** base: The base class. */ + /** @base: The base class. */ struct xe_pt_walk base; /* Input parameters for the walk */ @@ -278,15 +284,19 @@ struct xe_pt_stage_bind_walk { struct xe_vm *vm; /** @tile: The tile we're building for. */ struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; + /** @default_vram_pte: PTE flag only template for VRAM. No address is associated */ + u64 default_vram_pte; + /** @default_system_pte: PTE flag only template for System. No address is associated */ + u64 default_system_pte; /** @dma_offset: DMA offset to add to the PTE. */ u64 dma_offset; /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. + * @needs_64K: This address range enforces 64K alignment and + * granularity on VRAM. */ bool needs_64K; + /** @clear_pt: clear page table entries during the bind walk */ + bool clear_pt; /** * @vma: VMA being mapped */ @@ -299,6 +309,7 @@ struct xe_pt_stage_bind_walk { u64 va_curs_start; /* Output */ + /** @wupd: Walk output data for page-table updates. */ struct xe_walk_update { /** @wupd.entries: Caller provided storage. */ struct xe_vm_pgtable_update *entries; @@ -316,7 +327,7 @@ struct xe_pt_stage_bind_walk { u64 l0_end_addr; /** @addr_64K: The start address of the current 64K chunk. */ u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ + /** @found_64K: Whether @add_64K actually points to a 64K chunk. */ bool found_64K; }; @@ -436,6 +447,10 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, if (xe_vma_is_null(xe_walk->vma)) return true; + /* if we are clearing page table, no dma addresses*/ + if (xe_walk->clear_pt) + return true; + /* Is the DMA address huge PTE size aligned? */ size = next - addr; dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); @@ -515,24 +530,35 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { struct xe_res_cursor *curs = xe_walk->curs; bool is_null = xe_vma_is_null(xe_walk->vma); + bool is_vram = is_null ? false : xe_res_is_vram(curs); XE_WARN_ON(xe_walk->va_curs_start != addr); - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; + if (xe_walk->clear_pt) { + pte = 0; + } else { + pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : + xe_res_dma(curs) + + xe_walk->dma_offset, + xe_walk->vma, + pat_index, level); + if (!is_null) + pte |= is_vram ? xe_walk->default_vram_pte : + xe_walk->default_system_pte; - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; + /* + * Set the XE_PTE_PS64 hint if possible, otherwise if + * this device *requires* 64K PTE size for VRAM, fail. + */ + if (level == 0 && !xe_parent->is_compact) { + if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { + xe_walk->vma->gpuva.flags |= + XE_VMA_PTE_64K; + pte |= XE_PTE_PS64; + } else if (XE_WARN_ON(xe_walk->needs_64K && + is_vram)) { + return -EINVAL; + } } } @@ -540,7 +566,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (unlikely(ret)) return ret; - if (!is_null) + if (!is_null && !xe_walk->clear_pt) xe_res_next(curs, next - addr); xe_walk->va_curs_start = next; xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); @@ -603,6 +629,44 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { .pt_entry = xe_pt_stage_bind_entry, }; +/* + * Default atomic expectations for different allocation scenarios are as follows: + * + * 1. Traditional API: When the VM is not in LR mode: + * - Device atomics are expected to function with all allocations. + * + * 2. Compute/SVM API: When the VM is in LR mode: + * - Device atomics are the default behavior when the bo is placed in a single region. + * - In all other cases device atomics will be disabled with AE=0 until an application + * request differently using a ioctl like madvise. + */ +static bool xe_atomic_for_vram(struct xe_vm *vm) +{ + return true; +} + +static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo) +{ + struct xe_device *xe = vm->xe; + + if (!xe->info.has_device_atomics_on_smem) + return false; + + /* + * If a SMEM+LMEM allocation is backed by SMEM, a device + * atomics will cause a gpu page fault and which then + * gets migrated to LMEM, bind such allocations with + * device atomics enabled. + * + * TODO: Revisit this. Perhaps add something like a + * fault_on_atomics_in_system UAPI flag. + * Note that this also prohibits GPU atomics in LR mode for + * userptr and system memory on DGFX. + */ + return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) || + (bo && xe_bo_has_single_placement(bo)))); +} + /** * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address * range. @@ -612,6 +676,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { * @entries: Storage for the update entries used for connecting the tree to * the main tree at commit time. * @num_entries: On output contains the number of @entries used. + * @clear_pt: Clear the page table entries. * * This function builds a disconnected page-table tree for a given address * range. The tree is connected to the main vm tree for the gpu using @@ -625,13 +690,13 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { static int xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_svm_range *range, - struct xe_vm_pgtable_update *entries, u32 *num_entries) + struct xe_vm_pgtable_update *entries, + u32 *num_entries, bool clear_pt) { struct xe_device *xe = tile_to_xe(tile); struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); struct xe_res_cursor curs; + struct xe_vm *vm = xe_vma_vm(vma); struct xe_pt_stage_bind_walk xe_walk = { .base = { .ops = &xe_pt_stage_bind_ops, @@ -639,34 +704,31 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .max_level = XE_PT_HIGHEST_LEVEL, .staging = true, }, - .vm = xe_vma_vm(vma), + .vm = vm, .tile = tile, .curs = &curs, .va_curs_start = range ? range->base.itree.start : xe_vma_start(vma), .vma = vma, .wupd.entries = entries, + .clear_pt = clear_pt, }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; + struct xe_pt *pt = vm->pt_root[tile->id]; int ret; if (range) { /* Move this entire thing to xe_svm.c? */ - xe_svm_notifier_lock(xe_vma_vm(vma)); + xe_svm_notifier_lock(vm); if (!xe_svm_range_pages_valid(range)) { xe_svm_range_debug(range, "BIND PREPARE - RETRY"); - xe_svm_notifier_unlock(xe_vma_vm(vma)); + xe_svm_notifier_unlock(vm); return -EAGAIN; } if (xe_svm_range_has_dma_mapping(range)) { xe_res_first_dma(range->base.dma_addr, 0, range->base.itree.last + 1 - range->base.itree.start, &curs); - is_devmem = xe_res_is_vram(&curs); - if (is_devmem) - xe_svm_range_debug(range, "BIND PREPARE - DMA VRAM"); - else - xe_svm_range_debug(range, "BIND PREPARE - DMA"); + xe_svm_range_debug(range, "BIND PREPARE - MIXED"); } else { xe_assert(xe, false); } @@ -674,54 +736,21 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, * Note, when unlocking the resource cursor dma addresses may become * stale, but the bind will be aborted anyway at commit time. */ - xe_svm_notifier_unlock(xe_vma_vm(vma)); + xe_svm_notifier_unlock(vm); } - xe_walk.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem; + xe_walk.needs_64K = (vm->flags & XE_VM_FLAG_64K); + if (clear_pt) + goto walk_pt; - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0; + xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0; + xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ? + XE_USM_PPGTT_PTE_AE : 0; } - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - + xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM; + xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0; if (!range) xe_bo_assert_held(bo); @@ -739,6 +768,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, curs.size = xe_vma_size(vma); } +walk_pt: ret = xe_pt_walk_range(&pt->base, pt->level, range ? range->base.itree.start : xe_vma_start(vma), range ? range->base.itree.last + 1 : xe_vma_end(vma), @@ -877,6 +907,11 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); + if (xe_vma_bo(vma)) + xe_bo_assert_held(xe_vma_bo(vma)); + else if (xe_vma_is_userptr(vma)) + lockdep_assert_held(&xe_vma_vm(vma)->userptr.notifier_lock); + if (!(pt_mask & BIT(tile->id))) return false; @@ -1103,12 +1138,14 @@ static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, static int xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_svm_range *range, - struct xe_vm_pgtable_update *entries, u32 *num_entries) + struct xe_vm_pgtable_update *entries, + u32 *num_entries, bool invalidate_on_bind) { int err; *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, range, entries, num_entries); + err = xe_pt_stage_bind(tile, vma, range, entries, num_entries, + invalidate_on_bind); if (!err) xe_tile_assert(tile, *num_entries); @@ -1420,11 +1457,13 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) return err; } +#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) { struct xe_vm *vm = pt_update->vops->vm; struct xe_vma_ops *vops = pt_update->vops; struct xe_vma_op *op; + unsigned long i; int err; err = xe_pt_pre_commit(pt_update); @@ -1434,25 +1473,41 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) xe_svm_notifier_lock(vm); list_for_each_entry(op, &vops->list, link) { - struct xe_svm_range *range = op->map_range.range; + struct xe_svm_range *range = NULL; if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) continue; - xe_svm_range_debug(range, "PRE-COMMIT"); + if (op->base.op == DRM_GPUVA_OP_PREFETCH) { + xe_assert(vm->xe, + xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); + xa_for_each(&op->prefetch_range.range, i, range) { + xe_svm_range_debug(range, "PRE-COMMIT"); + + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); + xe_svm_notifier_unlock(vm); + return -ENODATA; + } + } + } else { + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); + xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); + range = op->map_range.range; - xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); - xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); + xe_svm_range_debug(range, "PRE-COMMIT"); - if (!xe_svm_range_pages_valid(range)) { - xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); - xe_svm_notifier_unlock(vm); - return -EAGAIN; + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); + xe_svm_notifier_unlock(vm); + return -EAGAIN; + } } } return 0; } +#endif struct invalidation_fence { struct xe_gt_tlb_invalidation_fence base; @@ -1791,7 +1846,7 @@ static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) + struct xe_vma *vma, bool invalidate_on_bind) { u32 current_op = pt_update_ops->current_op; struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; @@ -1813,7 +1868,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, return err; err = xe_pt_prepare_bind(tile, vma, NULL, pt_op->entries, - &pt_op->num_entries); + &pt_op->num_entries, invalidate_on_bind); if (!err) { xe_tile_assert(tile, pt_op->num_entries <= ARRAY_SIZE(pt_op->entries)); @@ -1835,11 +1890,11 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, * If !rebind, and scratch enabled VMs, there is a chance the scratch * PTE is already cached in the TLB so it needs to be invalidated. * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. + * LR, in particular on user-space batch buffer chaining, it needs to + * be done here. */ if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) + xe_vm_in_lr_mode(vm))) pt_update_ops->needs_invalidation = true; else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) /* We bump also if batch_invalidate_tlb is true */ @@ -1875,7 +1930,7 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, pt_op->rebind = BIT(tile->id) & range->tile_present; err = xe_pt_prepare_bind(tile, vma, range, pt_op->entries, - &pt_op->num_entries); + &pt_op->num_entries, false); if (!err) { xe_tile_assert(tile, pt_op->num_entries <= ARRAY_SIZE(pt_op->entries)); @@ -1940,6 +1995,32 @@ static int unbind_op_prepare(struct xe_tile *tile, return 0; } +static bool +xe_pt_op_check_range_skip_invalidation(struct xe_vm_pgtable_update_op *pt_op, + struct xe_svm_range *range) +{ + struct xe_vm_pgtable_update *update = pt_op->entries; + + XE_WARN_ON(!pt_op->num_entries); + + /* + * We can't skip the invalidation if we are removing PTEs that span more + * than the range, do some checks to ensure we are removing PTEs that + * are invalid. + */ + + if (pt_op->num_entries > 1) + return false; + + if (update->pt->level == 0) + return true; + + if (update->pt->level == 1) + return xe_svm_range_size(range) >= SZ_2M; + + return false; +} + static int unbind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -1968,7 +2049,10 @@ static int unbind_range_prepare(struct xe_vm *vm, range->base.itree.last + 1); ++pt_update_ops->current_op; pt_update_ops->needs_svm_lock = true; - pt_update_ops->needs_invalidation = true; + pt_update_ops->needs_invalidation |= xe_vm_has_scratch(vm) || + xe_vm_has_valid_gpu_mapping(tile, range->tile_present, + range->tile_invalidated) || + !xe_pt_op_check_range_skip_invalidation(pt_op, range); xe_pt_commit_prepare_unbind(XE_INVALID_VMA, pt_op->entries, pt_op->num_entries); @@ -1987,11 +2071,13 @@ static int op_prepare(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: - if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || + if ((!op->map.immediate && xe_vm_in_fault_mode(vm) && + !op->map.invalidate_on_bind) || op->map.is_cpu_addr_mirror) break; - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); + err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma, + op->map.invalidate_on_bind); pt_update_ops->wait_vm_kernel = true; break; case DRM_GPUVA_OP_REMAP: @@ -2005,12 +2091,12 @@ static int op_prepare(struct xe_vm *vm, if (!err && op->remap.prev) { err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); + op->remap.prev, false); pt_update_ops->wait_vm_bookkeep = true; } if (!err && op->remap.next) { err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); + op->remap.next, false); pt_update_ops->wait_vm_bookkeep = true; } break; @@ -2029,11 +2115,20 @@ static int op_prepare(struct xe_vm *vm, { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - if (xe_vma_is_cpu_addr_mirror(vma)) - break; + if (xe_vma_is_cpu_addr_mirror(vma)) { + struct xe_svm_range *range; + unsigned long i; - err = bind_op_prepare(vm, tile, pt_update_ops, vma); - pt_update_ops->wait_vm_kernel = true; + xa_for_each(&op->prefetch_range.range, i, range) { + err = bind_range_prepare(vm, tile, pt_update_ops, + vma, range); + if (err) + return err; + } + } else { + err = bind_op_prepare(vm, tile, pt_update_ops, vma, false); + pt_update_ops->wait_vm_kernel = true; + } break; } case DRM_GPUVA_OP_DRIVER: @@ -2115,7 +2210,7 @@ ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO); static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) + struct dma_fence *fence2, bool invalidate_on_bind) { xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); @@ -2130,7 +2225,14 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); } - vma->tile_present |= BIT(tile->id); + /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + WRITE_ONCE(vma->tile_present, vma->tile_present | BIT(tile->id)); + if (invalidate_on_bind) + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated | BIT(tile->id)); + else + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated & ~BIT(tile->id)); vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { lockdep_assert_held_read(&vm->userptr.notifier_lock); @@ -2178,6 +2280,18 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, } } +static void range_present_and_invalidated_tile(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_id) +{ + /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + + lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); + + WRITE_ONCE(range->tile_present, range->tile_present | BIT(tile_id)); + WRITE_ONCE(range->tile_invalidated, range->tile_invalidated & ~BIT(tile_id)); +} + static void op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -2193,7 +2307,7 @@ static void op_commit(struct xe_vm *vm, break; bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); + fence2, op->map.invalidate_on_bind); break; case DRM_GPUVA_OP_REMAP: { @@ -2206,10 +2320,10 @@ static void op_commit(struct xe_vm *vm, if (op->remap.prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); + fence, fence2, false); if (op->remap.next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); + fence, fence2, false); break; } case DRM_GPUVA_OP_UNMAP: @@ -2225,27 +2339,28 @@ static void op_commit(struct xe_vm *vm, { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - if (!xe_vma_is_cpu_addr_mirror(vma)) + if (xe_vma_is_cpu_addr_mirror(vma)) { + struct xe_svm_range *range = NULL; + unsigned long i; + + xa_for_each(&op->prefetch_range.range, i, range) + range_present_and_invalidated_tile(vm, range, tile->id); + } else { bind_op_commit(vm, tile, pt_update_ops, vma, fence, - fence2); + fence2, false); + } break; } case DRM_GPUVA_OP_DRIVER: { - /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */ - - if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { - WRITE_ONCE(op->map_range.range->tile_present, - op->map_range.range->tile_present | - BIT(tile->id)); - WRITE_ONCE(op->map_range.range->tile_invalidated, - op->map_range.range->tile_invalidated & - ~BIT(tile->id)); - } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) + range_present_and_invalidated_tile(vm, op->map_range.range, tile->id); + else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) WRITE_ONCE(op->unmap_range.range->tile_present, op->unmap_range.range->tile_present & ~BIT(tile->id)); - } + break; } default: @@ -2265,11 +2380,15 @@ static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { .pre_commit = xe_pt_userptr_pre_commit, }; +#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) static const struct xe_migrate_pt_update_ops svm_migrate_ops = { .populate = xe_vm_populate_pgtable, .clear = xe_migrate_clear_pgtable_callback, .pre_commit = xe_pt_svm_pre_commit, }; +#else +static const struct xe_migrate_pt_update_ops svm_migrate_ops; +#endif /** * xe_pt_update_ops_run() - Run PT update operations @@ -2434,7 +2553,7 @@ free_ifence: kfree(mfence); kfree(ifence); kill_vm_tile1: - if (err != -EAGAIN && tile->id) + if (err != -EAGAIN && err != -ENODATA && tile->id) xe_vm_kill(vops->vm, false); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 454ea7dc08ac..3d62008c99f1 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -504,65 +504,62 @@ int xe_pxp_exec_queue_set_type(struct xe_pxp *pxp, struct xe_exec_queue *q, u8 t return 0; } -static void __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +static int __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) { - spin_lock_irq(&pxp->queues.lock); - list_add_tail(&q->pxp.link, &pxp->queues.list); - spin_unlock_irq(&pxp->queues.lock); + int ret = 0; + + /* + * A queue can be added to the list only if the PXP is in active status, + * otherwise the termination might not handle it correctly. + */ + mutex_lock(&pxp->mutex); + + if (pxp->status == XE_PXP_ACTIVE) { + spin_lock_irq(&pxp->queues.lock); + list_add_tail(&q->pxp.link, &pxp->queues.list); + spin_unlock_irq(&pxp->queues.lock); + } else if (pxp->status == XE_PXP_ERROR || pxp->status == XE_PXP_SUSPENDED) { + ret = -EIO; + } else { + ret = -EBUSY; /* try again later */ + } + + mutex_unlock(&pxp->mutex); + + return ret; } -/** - * xe_pxp_exec_queue_add - add a queue to the PXP list - * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) - * @q: the queue to add to the list - * - * If PXP is enabled and the prerequisites are done, start the PXP ARB - * session (if not already running) and add the queue to the PXP list. Note - * that the queue must have previously been marked as using PXP with - * xe_pxp_exec_queue_set_type. - * - * Returns 0 if the PXP ARB session is running and the queue is in the list, - * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done, - * other errno value if something goes wrong during the session start. - */ -int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +static int pxp_start(struct xe_pxp *pxp, u8 type) { int ret = 0; + bool restart = false; if (!xe_pxp_is_enabled(pxp)) return -ENODEV; /* we only support HWDRM sessions right now */ - xe_assert(pxp->xe, q->pxp.type == DRM_XE_PXP_TYPE_HWDRM); + xe_assert(pxp->xe, type == DRM_XE_PXP_TYPE_HWDRM); - /* - * Runtime suspend kills PXP, so we take a reference to prevent it from - * happening while we have active queues that use PXP - */ - xe_pm_runtime_get(pxp->xe); + /* get_readiness_status() returns 0 for in-progress and 1 for done */ + ret = xe_pxp_get_readiness_status(pxp); + if (ret <= 0) + return ret ?: -EBUSY; - if (!pxp_prerequisites_done(pxp)) { - ret = -EBUSY; - goto out; - } + ret = 0; wait_for_idle: /* * if there is an action in progress, wait for it. We need to wait * outside the lock because the completion is done from within the lock. - * Note that the two action should never be pending at the same time. + * Note that the two actions should never be pending at the same time. */ if (!wait_for_completion_timeout(&pxp->termination, - msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) { - ret = -ETIMEDOUT; - goto out; - } + msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) + return -ETIMEDOUT; if (!wait_for_completion_timeout(&pxp->activation, - msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) { - ret = -ETIMEDOUT; - goto out; - } + msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) + return -ETIMEDOUT; mutex_lock(&pxp->mutex); @@ -570,11 +567,9 @@ wait_for_idle: switch (pxp->status) { case XE_PXP_ERROR: ret = -EIO; - break; + goto out_unlock; case XE_PXP_ACTIVE: - __exec_queue_add(pxp, q); - mutex_unlock(&pxp->mutex); - goto out; + goto out_unlock; case XE_PXP_READY_TO_START: pxp->status = XE_PXP_START_IN_PROGRESS; reinit_completion(&pxp->activation); @@ -582,8 +577,8 @@ wait_for_idle: case XE_PXP_START_IN_PROGRESS: /* If a start is in progress then the completion must not be done */ XE_WARN_ON(completion_done(&pxp->activation)); - mutex_unlock(&pxp->mutex); - goto wait_for_idle; + restart = true; + goto out_unlock; case XE_PXP_NEEDS_TERMINATION: mark_termination_in_progress(pxp); break; @@ -591,29 +586,25 @@ wait_for_idle: case XE_PXP_NEEDS_ADDITIONAL_TERMINATION: /* If a termination is in progress then the completion must not be done */ XE_WARN_ON(completion_done(&pxp->termination)); - mutex_unlock(&pxp->mutex); - goto wait_for_idle; + restart = true; + goto out_unlock; case XE_PXP_SUSPENDED: default: drm_err(&pxp->xe->drm, "unexpected state during PXP start: %u\n", pxp->status); ret = -EIO; - break; + goto out_unlock; } mutex_unlock(&pxp->mutex); - if (ret) - goto out; - if (!completion_done(&pxp->termination)) { ret = pxp_terminate_hw(pxp); if (ret) { drm_err(&pxp->xe->drm, "PXP termination failed before start\n"); mutex_lock(&pxp->mutex); pxp->status = XE_PXP_ERROR; - mutex_unlock(&pxp->mutex); - goto out; + goto out_unlock; } goto wait_for_idle; @@ -635,21 +626,59 @@ wait_for_idle: if (pxp->status != XE_PXP_START_IN_PROGRESS) { drm_err(&pxp->xe->drm, "unexpected state after PXP start: %u\n", pxp->status); pxp->status = XE_PXP_NEEDS_TERMINATION; - mutex_unlock(&pxp->mutex); - goto wait_for_idle; + restart = true; + goto out_unlock; } /* If everything went ok, update the status and add the queue to the list */ - if (!ret) { + if (!ret) pxp->status = XE_PXP_ACTIVE; - __exec_queue_add(pxp, q); - } else { + else pxp->status = XE_PXP_ERROR; - } +out_unlock: mutex_unlock(&pxp->mutex); -out: + if (restart) + goto wait_for_idle; + + return ret; +} + +/** + * xe_pxp_exec_queue_add - add a queue to the PXP list + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @q: the queue to add to the list + * + * If PXP is enabled and the prerequisites are done, start the PXP default + * session (if not already running) and add the queue to the PXP list. + * + * Returns 0 if the PXP session is running and the queue is in the list, + * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done, + * other errno value if something goes wrong during the session start. + */ +int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +{ + int ret; + + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + /* + * Runtime suspend kills PXP, so we take a reference to prevent it from + * happening while we have active queues that use PXP + */ + xe_pm_runtime_get(pxp->xe); + +start: + ret = pxp_start(pxp, q->pxp.type); + + if (!ret) { + ret = __exec_queue_add(pxp, q); + if (ret == -EBUSY) + goto start; + } + /* * in the successful case the PM ref is released from * xe_pxp_exec_queue_remove diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 5e65830dad25..d517ec9ddcbf 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -141,7 +141,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id >= XE_MAX_GT_PER_TILE) + if (eci->gt_id >= xe->info.max_gt_per_tile) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); @@ -340,7 +340,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) if (xe_device_get_root_tile(xe)->mem.vram.usable_size) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; - if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_GPUSVM)) + if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR; config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= @@ -368,6 +368,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query struct drm_xe_query_gt_list __user *query_ptr = u64_to_user_ptr(query->data); struct drm_xe_query_gt_list *gt_list; + int iter = 0; u8 id; if (query->size == 0) { @@ -385,12 +386,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MEDIA; else - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; - gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; - gt_list->gt_list[id].gt_id = gt->info.id; - gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[iter].tile_id = gt_to_tile(gt)->id; + gt_list->gt_list[iter].gt_id = gt->info.id; + gt_list->gt_list[iter].reference_clock = gt->info.reference_clock; /* * The mem_regions indexes in the mask below need to * directly identify the struct @@ -406,19 +407,21 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query * assumption. */ if (!IS_DGFX(xe)) - gt_list->gt_list[id].near_mem_regions = 0x1; + gt_list->gt_list[iter].near_mem_regions = 0x1; else - gt_list->gt_list[id].near_mem_regions = + gt_list->gt_list[iter].near_mem_regions = BIT(gt_to_tile(gt)->id) << 1; - gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ - gt_list->gt_list[id].near_mem_regions; + gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[iter].near_mem_regions; - gt_list->gt_list[id].ip_ver_major = + gt_list->gt_list[iter].ip_ver_major = REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_minor = + gt_list->gt_list[iter].ip_ver_minor = REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_rev = + gt_list->gt_list[iter].ip_ver_rev = REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); + + iter++; } if (copy_to_user(query_ptr, gt_list, size)) { @@ -683,8 +686,8 @@ static int query_oa_units(struct xe_device *xe, du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | DRM_XE_OA_CAPS_OA_BUFFER_SIZE | - DRM_XE_OA_CAPS_WAIT_NUM_REPORTS; - + DRM_XE_OA_CAPS_WAIT_NUM_REPORTS | + DRM_XE_OA_CAPS_OAM; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { if (!xe_hw_engine_is_reserved(hwe) && diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 9475e3f74958..fc8447a838c4 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -173,6 +173,9 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) if (xa_empty(&sr->xa)) return; + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index bc1689db4cd7..7b50c7c1ee21 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 *dw, int i) +static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i) { dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | - MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; - dw[i++] = 0; + MI_FLUSH_IMM_DW; + + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; + dw[i++] = val; return i; } @@ -397,23 +398,20 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, static void emit_migration_job_gen12(struct xe_sched_job *job, struct xe_lrc *lrc, u32 seqno) { + u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc); u32 dw[MAX_JOB_SIZE_DW], i = 0; i = emit_copy_timestamp(lrc, dw, i); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + i = emit_store_imm_ggtt(saddr, seqno, dw, i); dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); - if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(dw, i); - dw[i++] = preparser_disable(false); - } + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(saddr, seqno, dw, i); + dw[i++] = preparser_disable(false); i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h index 1ae56e2ee7b4..d7e3e150a9a5 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops_types.h +++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h @@ -8,7 +8,7 @@ struct xe_sched_job; -#define MAX_JOB_SIZE_DW 48 +#define MAX_JOB_SIZE_DW 58 #define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4) /** diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 13bb62d3e615..95571b87aa73 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -56,37 +56,61 @@ static bool rule_matches(const struct xe_device *xe, xe->info.subplatform == r->subplatform; break; case XE_RTP_MATCH_GRAPHICS_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 >= r->ver_start && xe->info.graphics_verx100 <= r->ver_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start; break; case XE_RTP_MATCH_GRAPHICS_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 >= r->ver_start && xe->info.media_verx100 <= r->ver_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.media >= r->step_start && xe->info.step.media < r->step_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start; break; case XE_RTP_MATCH_INTEGRATED: @@ -108,6 +132,9 @@ static bool rule_matches(const struct xe_device *xe, match = hwe->class != r->engine_class; break; case XE_RTP_MATCH_FUNC: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = r->match_func(gt, hwe); break; default: @@ -186,6 +213,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, struct xe_device **xe) { switch (ctx->type) { + case XE_RTP_PROCESS_TYPE_DEVICE: + *hwe = NULL; + *gt = NULL; + *xe = ctx->xe; + break; case XE_RTP_PROCESS_TYPE_GT: *hwe = NULL; *gt = ctx->gt; @@ -258,9 +290,6 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, rtp_get_context(ctx, &hwe, >, &xe); - if (IS_SRIOV_VF(xe)) - return; - xe_assert(xe, entries); for (entry = entries; entry - entries < n_entries; entry++) { @@ -329,21 +358,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, hwe->engine_id == __ffs(render_compute_mask); } -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) -{ - unsigned int dss_per_gslice = 4; - unsigned int dss; - - if (drm_WARN(>_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask), - "Checking gslice for platform without geometry pipeline\n")) - return false; - - dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0); - - return dss >= dss_per_gslice; -} - bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, const struct xe_hw_engine *hwe) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 4fe736a11c42..5ed6c14b9ae3 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -422,7 +422,8 @@ struct xe_reg_sr; #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ - struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \ + struct xe_device * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE }) void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, unsigned long *active_entries, @@ -466,17 +467,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, const struct xe_hw_engine *hwe); /* - * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off - * - * @gt: GT structure - * @hwe: Engine instance - * - * Returns: true if first gslice is fused off, false otherwise. - */ -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe); - -/* * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device * * @gt: GT structure diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 1b76b947c706..f4cf30e298cf 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -110,12 +110,14 @@ struct xe_rtp_entry { }; enum xe_rtp_process_type { + XE_RTP_PROCESS_TYPE_DEVICE, XE_RTP_PROCESS_TYPE_GT, XE_RTP_PROCESS_TYPE_ENGINE, }; struct xe_rtp_process_ctx { union { + struct xe_device *xe; struct xe_gt *gt; struct xe_hw_engine *hwe; }; diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index f8fe61e25518..1d43e183ca21 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -60,7 +60,8 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3 bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) { drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n", size / SZ_1K, bo); diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 1905ca590965..d21bf8f26964 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -113,7 +113,8 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, kref_init(&job->refcount); xe_exec_queue_get(job->q); - err = drm_sched_job_init(&job->drm, q->entity, 1, NULL); + err = drm_sched_job_init(&job->drm, q->entity, 1, NULL, + q->xef ? q->xef->drm->client_id : 0); if (err) goto err_free; @@ -216,15 +217,17 @@ void xe_sched_job_set_error(struct xe_sched_job *job, int error) bool xe_sched_job_started(struct xe_sched_job *job) { + struct dma_fence *fence = dma_fence_chain_contained(job->fence); struct xe_lrc *lrc = job->q->lrc[0]; - return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job), - xe_lrc_start_seqno(lrc), - dma_fence_chain_contained(job->fence)->ops); + return !__dma_fence_is_later(fence, + xe_sched_job_lrc_seqno(job), + xe_lrc_start_seqno(lrc)); } bool xe_sched_job_completed(struct xe_sched_job *job) { + struct dma_fence *fence = dma_fence_chain_contained(job->fence); struct xe_lrc *lrc = job->q->lrc[0]; /* @@ -232,9 +235,9 @@ bool xe_sched_job_completed(struct xe_sched_job *job) * parallel handshake is done. */ - return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job), - xe_lrc_seqno(lrc), - dma_fence_chain_contained(job->fence)->ops); + return !__dma_fence_is_later(fence, + xe_sched_job_lrc_seqno(job), + xe_lrc_seqno(lrc)); } void xe_sched_job_arm(struct xe_sched_job *job) diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 86d47aaf0358..1c3c04d52f55 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -5,6 +5,7 @@ #include <linux/shrinker.h> +#include <drm/drm_managed.h> #include <drm/ttm/ttm_backup.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_tt.h> @@ -65,11 +66,15 @@ static s64 xe_shrinker_walk(struct xe_device *xe, struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type); struct ttm_bo_lru_cursor curs; struct ttm_buffer_object *ttm_bo; + struct ttm_lru_walk_arg arg = { + .ctx = ctx, + .trylock_only = true, + }; if (!man || !man->use_tt) continue; - ttm_bo_lru_for_each_reserved_guarded(&curs, man, ctx, ttm_bo) { + ttm_bo_lru_for_each_reserved_guarded(&curs, man, &arg, ttm_bo) { if (!ttm_bo_shrink_suitable(ttm_bo, ctx)) continue; @@ -81,6 +86,8 @@ static s64 xe_shrinker_walk(struct xe_device *xe, if (*scanned >= to_scan) break; } + /* Trylocks should never error, just fail. */ + xe_assert(xe, !IS_ERR(ttm_bo)); } return freed; @@ -213,24 +220,34 @@ static void xe_shrinker_pm(struct work_struct *work) xe_pm_runtime_put(shrinker->xe); } +static void xe_shrinker_fini(struct drm_device *drm, void *arg) +{ + struct xe_shrinker *shrinker = arg; + + xe_assert(shrinker->xe, !shrinker->shrinkable_pages); + xe_assert(shrinker->xe, !shrinker->purgeable_pages); + shrinker_free(shrinker->shrink); + flush_work(&shrinker->pm_worker); + kfree(shrinker); +} + /** * xe_shrinker_create() - Create an xe per-device shrinker * @xe: Pointer to the xe device. * - * Returns: A pointer to the created shrinker on success, - * Negative error code on failure. + * Return: %0 on success. Negative error code on failure. */ -struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) +int xe_shrinker_create(struct xe_device *xe) { struct xe_shrinker *shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL); if (!shrinker) - return ERR_PTR(-ENOMEM); + return -ENOMEM; shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique); if (!shrinker->shrink) { kfree(shrinker); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } INIT_WORK(&shrinker->pm_worker, xe_shrinker_pm); @@ -240,19 +257,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) shrinker->shrink->scan_objects = xe_shrinker_scan; shrinker->shrink->private_data = shrinker; shrinker_register(shrinker->shrink); + xe->mem.shrinker = shrinker; - return shrinker; -} - -/** - * xe_shrinker_destroy() - Destroy an xe per-device shrinker - * @shrinker: Pointer to the shrinker to destroy. - */ -void xe_shrinker_destroy(struct xe_shrinker *shrinker) -{ - xe_assert(shrinker->xe, !shrinker->shrinkable_pages); - xe_assert(shrinker->xe, !shrinker->purgeable_pages); - shrinker_free(shrinker->shrink); - flush_work(&shrinker->pm_worker); - kfree(shrinker); + return drmm_add_action_or_reset(&xe->drm, xe_shrinker_fini, shrinker); } diff --git a/drivers/gpu/drm/xe/xe_shrinker.h b/drivers/gpu/drm/xe/xe_shrinker.h index 28a038f4fcbf..5132ae5192e1 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.h +++ b/drivers/gpu/drm/xe/xe_shrinker.h @@ -11,8 +11,6 @@ struct xe_device; void xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgeable); -struct xe_shrinker *xe_shrinker_create(struct xe_device *xe); - -void xe_shrinker_destroy(struct xe_shrinker *shrinker); +int xe_shrinker_create(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 0f721ae17b26..afbdd894bd6e 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -3,6 +3,8 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> #include <drm/drm_managed.h> #include "xe_assert.h" @@ -10,6 +12,8 @@ #include "xe_module.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" static unsigned int wanted_max_vfs(struct xe_device *xe) @@ -80,9 +84,22 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) */ int xe_sriov_pf_init_early(struct xe_device *xe) { + int err; + xe_assert(xe, IS_SRIOV_PF(xe)); - return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + xe->sriov.pf.vfs = drmm_kcalloc(&xe->drm, 1 + xe_sriov_pf_get_totalvfs(xe), + sizeof(*xe->sriov.pf.vfs), GFP_KERNEL); + if (!xe->sriov.pf.vfs) + return -ENOMEM; + + err = drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + if (err) + return err; + + xe_sriov_pf_service_init(xe); + + return 0; } /** @@ -102,3 +119,45 @@ void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs); drm_printf(p, "enabled: %u\n", pci_num_vf(pdev)); } + +static int simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_device *xe = parent->d_inode->i_private; + void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data; + + print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, + { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, +}; + +/** + * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Prepare debugfs attributes exposed by the PF. + */ +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + struct drm_minor *minor = xe->drm.primary; + struct dentry *parent; + + /* + * /sys/kernel/debug/dri/0/ + * ├── pf + * │  ├── ... + */ + parent = debugfs_create_dir("pf", root); + if (IS_ERR(parent)) + return; + parent->d_inode->i_private = xe; + + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index d1220e70e1c0..c392c3fcf085 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -8,12 +8,14 @@ #include <linux/types.h> +struct dentry; struct drm_printer; struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else static inline bool xe_sriov_pf_readiness(struct xe_device *xe) @@ -25,6 +27,10 @@ static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } + +static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_sriov_pf_service.c new file mode 100644 index 000000000000..eee3b2a1ba41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#include "abi/guc_relay_actions_abi.h" + +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_printk.h" + +#include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_service_types.h" + +/** + * xe_sriov_pf_service_init - Early initialization of the SR-IOV PF service. + * @xe: the &xe_device to initialize + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_init(struct xe_device *xe) +{ + BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); + BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* base versions may differ between platforms */ + xe->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; + xe->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; + + /* latest version is same for all platforms */ + xe->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; + xe->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_negotiate_version(struct xe_device *xe, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + struct xe_sriov_pf_service_version base = xe->sriov.pf.service.version.base; + struct xe_sriov_pf_service_version latest = xe->sriov.pf.service.version.latest; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, base.major); + xe_assert(xe, base.major <= latest.major); + xe_assert(xe, (base.major < latest.major) || (base.minor <= latest.minor)); + + /* VF doesn't care - return our latest */ + if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && + wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants newer than our - return our latest */ + if (wanted_major > latest.major) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants older than min required - reject */ + if (wanted_major < base.major || + (wanted_major == base.major && wanted_minor < base.minor)) { + return -EPERM; + } + + /* previous major - return wanted, as we should still support it */ + if (wanted_major < latest.major) { + /* XXX: we are not prepared for multi-versions yet */ + xe_assert(xe, base.major == latest.major); + return -ENOPKG; + } + + /* same major - return common minor */ + *major = wanted_major; + *minor = min_t(u32, latest.minor, wanted_minor); + return 0; +} + +static void pf_connect(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + xe_assert(xe, major || minor); + + xe->sriov.pf.vfs[vfid].version.major = major; + xe->sriov.pf.vfs[vfid].version.minor = minor; +} + +static void pf_disconnect(struct xe_device *xe, u32 vfid) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + xe->sriov.pf.vfs[vfid].version.major = 0; + xe->sriov.pf.vfs[vfid].version.minor = 0; +} + +/** + * xe_sriov_pf_service_is_negotiated - Check if VF has negotiated given ABI version. + * @xe: the &xe_device + * @vfid: the VF identifier + * @major: the major version to check + * @minor: the minor version to check + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + * + * Returns: true if VF can use given ABI version functionality. + */ +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + return major == xe->sriov.pf.vfs[vfid].version.major && + minor <= xe->sriov.pf.vfs[vfid].version.minor; +} + +/** + * xe_sriov_pf_service_handshake_vf - Confirm a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * @wanted_major: the major service version expected by the VF + * @wanted_minor: the minor service version expected by the VF + * @major: the major service version to be used by the VF + * @minor: the minor service version to be used by the VF + * + * Negotiate a VF/PF ABI version to allow VF use the PF services. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + int err; + + xe_sriov_dbg_verbose(xe, "VF%u wants ABI version %u.%u\n", + vfid, wanted_major, wanted_minor); + + err = pf_negotiate_version(xe, wanted_major, wanted_minor, major, minor); + + if (err < 0) { + xe_sriov_notice(xe, "VF%u failed to negotiate ABI %u.%u (%pe)\n", + vfid, wanted_major, wanted_minor, ERR_PTR(err)); + pf_disconnect(xe, vfid); + } else { + xe_sriov_dbg(xe, "VF%u negotiated ABI version %u.%u\n", + vfid, *major, *minor); + pf_connect(xe, vfid, *major, *minor); + } + + return err; +} + +/** + * xe_sriov_pf_service_reset_vf - Reset a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Reset a VF driver negotiated VF/PF ABI version. + * + * After that point, the VF driver will have to perform new version handshake + * to continue use of the PF services again. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid) +{ + pf_disconnect(xe, vfid); +} + +static void print_pf_version(struct drm_printer *p, const char *name, + const struct xe_sriov_pf_service_version *version) +{ + drm_printf(p, "%s:\t%u.%u\n", name, version->major, version->minor); +} + +/** + * xe_sriov_pf_service_print_versions - Print ABI versions negotiated with VFs. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for PF use only. + */ +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_sriov_pf_service_version *version; + char name[8]; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + print_pf_version(p, "base", &xe->sriov.pf.service.version.base); + print_pf_version(p, "latest", &xe->sriov.pf.service.version.latest); + + for (n = 1; n <= total_vfs; n++) { + version = &xe->sriov.pf.vfs[n].version; + if (!version->major && !version->minor) + continue; + + print_pf_version(p, xe_sriov_function_name(n, name, sizeof(name)), version); + } +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_sriov_pf_service_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_sriov_pf_service.h new file mode 100644 index 000000000000..d38c18f5ed10 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_H_ +#define _XE_SRIOV_PF_SERVICE_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_device; + +void xe_sriov_pf_service_init(struct xe_device *xe); +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p); + +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor); +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor); +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h new file mode 100644 index 000000000000..0835dde358c1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_TYPES_H_ +#define _XE_SRIOV_PF_SERVICE_TYPES_H_ + +#include <linux/types.h> + +/** + * struct xe_sriov_pf_service_version - VF/PF ABI Version. + * @major: the major version of the VF/PF ABI + * @minor: the minor version of the VF/PF ABI + * + * See `GuC Relay Communication`_. + */ +struct xe_sriov_pf_service_version { + u16 major; + u16 minor; +}; + +/** + * struct xe_sriov_pf_service - Data used by the PF service. + * @version: information about VF/PF ABI versions for current platform. + * @version.base: lowest VF/PF ABI version that could be negotiated with VF. + * @version.latest: latest VF/PF ABI version supported by the PF driver. + */ +struct xe_sriov_pf_service { + struct { + struct xe_sriov_pf_service_version base; + struct xe_sriov_pf_service_version latest; + } version; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h new file mode 100644 index 000000000000..956a88f9f213 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_TYPES_H_ +#define _XE_SRIOV_PF_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/types.h> + +#include "xe_sriov_pf_service_types.h" + +/** + * struct xe_sriov_metadata - per-VF device level metadata + */ +struct xe_sriov_metadata { + /** @version: negotiated VF/PF ABI version */ + struct xe_sriov_pf_service_version version; +}; + +/** + * struct xe_device_pf - Xe PF related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_PF mode. + */ +struct xe_device_pf { + /** @device_total_vfs: Maximum number of VFs supported by the device. */ + u16 device_total_vfs; + + /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ + u16 driver_max_vfs; + + /** @master_lock: protects all VFs configurations across GTs */ + struct mutex master_lock; + + /** @service: device level service data. */ + struct xe_sriov_pf_service service; + + /** @vfs: metadata for all VFs. */ + struct xe_sriov_metadata *vfs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index ca94382a721e..1a138108d139 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,9 +7,6 @@ #define _XE_SRIOV_TYPES_H_ #include <linux/build_bug.h> -#include <linux/mutex.h> -#include <linux/types.h> -#include <linux/workqueue_types.h> /** * VFID - Virtual Function Identifier @@ -40,37 +37,4 @@ enum xe_sriov_mode { }; static_assert(XE_SRIOV_MODE_NONE); -/** - * struct xe_device_pf - Xe PF related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_PF mode. - */ -struct xe_device_pf { - /** @device_total_vfs: Maximum number of VFs supported by the device. */ - u16 device_total_vfs; - - /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ - u16 driver_max_vfs; - - /** @master_lock: protects all VFs configurations across GTs */ - struct mutex master_lock; -}; - -/** - * struct xe_device_vf - Xe Virtual Function related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_VF mode. - */ -struct xe_device_vf { - /** @migration: VF Migration state data */ - struct { - /** @migration.worker: VF migration recovery worker */ - struct work_struct worker; - /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ - unsigned long gt_flags; - } migration; -}; - #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index c1275e64aa9c..26e243c28994 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -7,12 +7,15 @@ #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt.h" #include "xe_gt_sriov_printk.h" #include "xe_gt_sriov_vf.h" +#include "xe_guc_ct.h" #include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" +#include "xe_tile_sriov_vf.h" /** * DOC: VF restore procedure in PF KMD and VF KMD @@ -121,6 +124,15 @@ * | | | */ +static bool vf_migration_supported(struct xe_device *xe) +{ + /* + * TODO: Add conditions to allow specific platforms, when they're + * supported at production quality. + */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG); +} + static void migration_worker_func(struct work_struct *w); /** @@ -130,86 +142,118 @@ static void migration_worker_func(struct work_struct *w); void xe_sriov_vf_init_early(struct xe_device *xe) { INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); + + if (!vf_migration_supported(xe)) + xe_sriov_info(xe, "migration not supported by this module version\n"); } -/** - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. +static bool gt_vf_post_migration_needed(struct xe_gt *gt) +{ + return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); +} + +/* + * Notify GuCs marked in flags about resource fixups apply finished. * @xe: the &xe_device struct instance - * - * After migration, we need to re-query all VF configuration to make sure - * they match previous provisioning. Note that most of VF provisioning - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. - * - * Returns: 0 if the operation completed successfully, or a negative error - * code otherwise. + * @gt_flags: flags marking to which GTs the notification shall be sent */ -static int vf_post_migration_requery_guc(struct xe_device *xe) +static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) { struct xe_gt *gt; unsigned int id; - int err, ret = 0; + int err = 0; for_each_gt(gt, xe, id) { - err = xe_gt_sriov_vf_query_config(gt); - ret = ret ?: err; + if (!test_bit(id, >_flags)) + continue; + /* skip asking GuC for RESFIX exit if new recovery request arrived */ + if (gt_vf_post_migration_needed(gt)) + continue; + err = xe_gt_sriov_vf_notify_resfix_done(gt); + if (err) + break; + clear_bit(id, >_flags); } - return ret; -} - -/* - * vf_post_migration_imminent - Check if post-restore recovery is coming. - * @xe: the &xe_device struct instance - * - * Return: True if migration recovery worker will soon be running. Any worker currently - * executing does not affect the result. - */ -static bool vf_post_migration_imminent(struct xe_device *xe) -{ - return xe->sriov.vf.migration.gt_flags != 0 || - work_pending(&xe->sriov.vf.migration.worker); + if (gt_flags && !err) + drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); + return err; } -/* - * Notify all GuCs about resource fixups apply finished. - */ -static void vf_post_migration_notify_resfix_done(struct xe_device *xe) +static int vf_get_next_migrated_gt_id(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (vf_post_migration_imminent(xe)) - goto skip; - xe_gt_sriov_vf_notify_resfix_done(gt); + if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) + return id; } - return; + return -1; +} -skip: - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); +/** + * Perform post-migration fixups on a single GT. + * + * After migration, GuC needs to be re-queried for VF configuration to check + * if it matches previous provisioning. Most of VF provisioning shall be the + * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT + * range has changed, we have to perform fixups - shift all GGTT references + * used anywhere within the driver. After the fixups in this function succeed, + * it is allowed to ask the GuC bound to this GT to continue normal operation. + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. + */ +static int gt_vf_post_migration_fixups(struct xe_gt *gt) +{ + s64 shift; + int err; + + err = xe_gt_sriov_vf_query_config(gt); + if (err) + return err; + + shift = xe_gt_sriov_vf_ggtt_shift(gt); + if (shift) { + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); + /* FIXME: add the recovery steps */ + xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); + } + return 0; } static void vf_post_migration_recovery(struct xe_device *xe) { - int err; + unsigned long fixed_gts = 0; + int id, err; drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); - err = vf_post_migration_requery_guc(xe); - if (vf_post_migration_imminent(xe)) - goto defer; - if (unlikely(err)) + + if (!vf_migration_supported(xe)) { + xe_sriov_err(xe, "migration not supported by this module version\n"); + err = -ENOTRECOVERABLE; + goto fail; + } + + while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { + struct xe_gt *gt = xe_device_get_gt(xe, id); + + err = gt_vf_post_migration_fixups(gt); + if (err) + goto fail; + + set_bit(id, &fixed_gts); + } + + err = vf_post_migration_notify_resfix_done(xe, fixed_gts); + if (err) goto fail; - /* FIXME: add the recovery steps */ - vf_post_migration_notify_resfix_done(xe); xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); return; -defer: - xe_pm_runtime_put(xe); - drm_dbg(&xe->drm, "migration recovery deferred\n"); - return; fail: xe_pm_runtime_put(xe); drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); @@ -224,18 +268,23 @@ static void migration_worker_func(struct work_struct *w) vf_post_migration_recovery(xe); } -static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +/* + * Check if post-restore recovery is coming on any of GTs. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); - return false; - } + if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) + return true; } - return true; + return false; } /** @@ -250,13 +299,9 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) xe_assert(xe, IS_SRIOV_VF(xe)); - if (!vf_ready_to_recovery_on_all_gts(xe)) + if (!vf_ready_to_recovery_on_any_gts(xe)) return; - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); - /* Ensure other threads see that no flags are set now. */ - smp_mb(); - started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); drm_info(&xe->drm, "VF migration recovery %s\n", started ? "scheduled" : "already in progress"); diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h new file mode 100644 index 000000000000..8300416a6226 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_TYPES_H_ +#define _XE_SRIOV_VF_TYPES_H_ + +#include <linux/types.h> +#include <linux/workqueue_types.h> + +/** + * struct xe_sriov_vf_relay_version - PF ABI version details. + */ +struct xe_sriov_vf_relay_version { + /** @major: major version. */ + u16 major; + /** @minor: minor version. */ + u16 minor; +}; + +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @pf_version: negotiated VF/PF ABI version. */ + struct xe_sriov_vf_relay_version pf_version; + + /** @migration: VF Migration state data */ + struct { + /** @migration.worker: VF migration recovery worker */ + struct work_struct worker; + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ + unsigned long gt_flags; + } migration; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index c77b5c317fa0..10e88f2c9615 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -5,6 +5,7 @@ #include "xe_step.h" +#include <kunit/visibility.h> #include <linux/bitfield.h> #include "xe_device.h" @@ -255,3 +256,4 @@ const char *xe_step_name(enum xe_step step) return "**"; } } +EXPORT_SYMBOL_IF_KUNIT(xe_step_name); diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index cb813b337fd3..41705f5d52e3 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -10,9 +10,11 @@ #include <linux/pci.h> #include <linux/sysfs.h> +#include "xe_configfs.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_heci_gsc.h" +#include "xe_i2c.h" #include "xe_mmio.h" #include "xe_pcode_api.h" #include "xe_vsec.h" @@ -28,20 +30,32 @@ * This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware * to be flashed through mei and collect telemetry. The driver's probe flow is modified * such that it enters survivability mode when pcode initialization is incomplete and boot status - * denotes a failure. The driver then populates the survivability_mode PCI sysfs indicating - * survivability mode and provides additional information required for debug + * denotes a failure. * - * KMD exposes below admin-only readable sysfs in survivability mode + * Survivability mode can also be entered manually using the survivability mode attribute available + * through configfs which is beneficial in several usecases. It can be used to address scenarios + * where pcode does not detect failure or for validation purposes. It can also be used in + * In-Field-Repair (IFR) to repair a single card without impacting the other cards in a node. * - * device/survivability_mode: The presence of this file indicates that the card is in survivability - * mode. Also, provides additional information on why the driver entered - * survivability mode. + * Use below command enable survivability mode manually:: * - * Capability Information - Provides boot status - * Postcode Information - Provides information about the failure - * Overflow Information - Provides history of previous failures - * Auxiliary Information - Certain failures may have information in - * addition to postcode information + * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode + * + * Refer :ref:`xe_configfs` for more details on how to use configfs + * + * Survivability mode is indicated by the below admin-only readable sysfs which provides additional + * debug information:: + * + * /sys/bus/pci/devices/<device>/surivability_mode + * + * Capability Information: + * Provides boot status + * Postcode Information: + * Provides information about the failure + * Overflow Information + * Provides history of previous failures + * Auxiliary Information + * Certain failures may have information in addition to postcode information */ static u32 aux_history_offset(u32 reg_value) @@ -133,6 +147,7 @@ static void xe_survivability_mode_fini(void *arg) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct device *dev = &pdev->dev; + xe_configfs_clear_survivability_mode(pdev); sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); } @@ -159,20 +174,22 @@ static int enable_survivability_mode(struct pci_dev *pdev) survivability->mode = true; ret = xe_heci_gsc_init(xe); - if (ret) { - /* - * But if it fails, device can't enter survivability - * so move it back for correct error handling - */ - survivability->mode = false; - return ret; - } + if (ret) + goto err; xe_vsec_init(xe); + ret = xe_i2c_probe(xe); + if (ret) + goto err; + dev_err(dev, "In Survivability Mode\n"); return 0; + +err: + survivability->mode = false; + return ret; } /** @@ -186,23 +203,40 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe) return xe->survivability.mode; } -/* - * survivability_mode_requested - check if it's possible to enable - * survivability mode and that was requested by firmware +/** + * xe_survivability_mode_is_requested - check if it's possible to enable survivability + * mode that was requested by firmware or userspace + * @xe: xe device instance * - * This function reads the boot status from Pcode. + * This function reads configfs and boot status from Pcode. * * Return: true if platform support is available and boot status indicates - * failure, false otherwise. + * failure or if survivability mode is requested, false otherwise. */ -static bool survivability_mode_requested(struct xe_device *xe) +bool xe_survivability_mode_is_requested(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; struct xe_mmio *mmio = xe_root_tile_mmio(xe); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u32 data; + bool survivability_mode; + + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) + return false; + + survivability_mode = xe_configfs_get_survivability_mode(pdev); - if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE || IS_SRIOV_VF(xe)) + if (xe->info.platform < XE_BATTLEMAGE) { + if (survivability_mode) { + dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n"); + xe_configfs_clear_survivability_mode(pdev); + } return false; + } + + /* Enable survivability mode if set via configfs */ + if (survivability_mode) + return true; data = xe_mmio_read32(mmio, PCODE_SCRATCH(0)); survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data); @@ -226,7 +260,7 @@ int xe_survivability_mode_enable(struct xe_device *xe) struct xe_survivability_info *info; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - if (!survivability_mode_requested(xe)) + if (!xe_survivability_mode_is_requested(xe)) return 0; survivability->size = MAX_SCRATCH_MMIO; diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h index d7e64885570d..02231c2bf008 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -12,5 +12,6 @@ struct xe_device; int xe_survivability_mode_enable(struct xe_device *xe); bool xe_survivability_mode_is_enabled(struct xe_device *xe); +bool xe_survivability_mode_is_requested(struct xe_device *xe); #endif /* _XE_SURVIVABILITY_MODE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 975094c1a582..a7ff5975873f 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -3,12 +3,17 @@ * Copyright © 2024 Intel Corporation */ +#include <drm/drm_drv.h> + #include "xe_bo.h" +#include "xe_gt_stats.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_module.h" +#include "xe_pm.h" #include "xe_pt.h" #include "xe_svm.h" +#include "xe_tile.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" #include "xe_vm_types.h" @@ -44,21 +49,6 @@ static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) return gpusvm_to_vm(r->gpusvm); } -static unsigned long xe_svm_range_start(struct xe_svm_range *range) -{ - return drm_gpusvm_range_start(&range->base); -} - -static unsigned long xe_svm_range_end(struct xe_svm_range *range) -{ - return drm_gpusvm_range_end(&range->base); -} - -static unsigned long xe_svm_range_size(struct xe_svm_range *range) -{ - return drm_gpusvm_range_size(&range->base); -} - #define range_debug(r__, operaton__) \ vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ @@ -102,11 +92,6 @@ static void xe_svm_range_free(struct drm_gpusvm_range *range) kfree(range); } -static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) -{ - return container_of(r, struct xe_svm_range, base); -} - static void xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, const struct mmu_notifier_range *mmu_range) @@ -160,7 +145,12 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, for_each_tile(tile, xe, id) if (xe_pt_zap_ptes_range(tile, vm, range)) { tile_mask |= BIT(id); - range->tile_invalidated |= BIT(id); + /* + * WRITE_ONCE pairs with READ_ONCE in + * xe_vm_has_valid_gpu_mapping() + */ + WRITE_ONCE(range->tile_invalidated, + range->tile_invalidated | BIT(id)); } return tile_mask; @@ -186,14 +176,9 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, { struct xe_vm *vm = gpusvm_to_vm(gpusvm); struct xe_device *xe = vm->xe; - struct xe_tile *tile; struct drm_gpusvm_range *r, *first; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; u64 adj_start = mmu_range->start, adj_end = mmu_range->end; u8 tile_mask = 0; - u8 id; - u32 fence_id = 0; long err; xe_svm_assert_in_notifier(vm); @@ -239,42 +224,8 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, xe_device_wmb(xe); - for_each_tile(tile, xe, id) { - if (tile_mask & BIT(id)) { - int err; - - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], true); - - err = xe_gt_tlb_invalidation_range(tile->primary_gt, - &fence[fence_id], - adj_start, - adj_end, - vm->usm.asid); - if (WARN_ON_ONCE(err < 0)) - goto wait; - ++fence_id; - - if (!tile->media_gt) - continue; - - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], true); - - err = xe_gt_tlb_invalidation_range(tile->media_gt, - &fence[fence_id], - adj_start, - adj_end, - vm->usm.asid); - if (WARN_ON_ONCE(err < 0)) - goto wait; - ++fence_id; - } - } - -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + err = xe_vm_range_tilemask_tlb_invalidation(vm, adj_start, adj_end, tile_mask); + WARN_ON_ONCE(err); range_notifier_event_end: r = first; @@ -348,6 +299,8 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w) up_write(&vm->lock); } +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + static struct xe_vram_region *page_to_vr(struct page *page) { return container_of(page_pgmap(page), struct xe_vram_region, pagemap); @@ -534,16 +487,18 @@ static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); } -static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) +static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) { return container_of(devmem_allocation, struct xe_bo, devmem_allocation); } -static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) +static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) { struct xe_bo *bo = to_xe_bo(devmem_allocation); + struct xe_device *xe = xe_bo_device(bo); xe_bo_put_async(bo); + xe_pm_runtime_put(xe); } static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) @@ -556,7 +511,7 @@ static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) return &tile->mem.vram.ttm.mm; } -static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, +static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, unsigned long npages, unsigned long *pfn) { struct xe_bo *bo = to_xe_bo(devmem_allocation); @@ -579,13 +534,15 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio return 0; } -static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { +static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { .devmem_release = xe_svm_devmem_release, .populate_devmem_pfn = xe_svm_populate_devmem_pfn, .copy_to_devmem = xe_svm_copy_to_devmem, .copy_to_ram = xe_svm_copy_to_ram, }; +#endif + static const struct drm_gpusvm_ops gpusvm_ops = { .range_alloc = xe_svm_range_alloc, .range_free = xe_svm_range_free, @@ -657,76 +614,141 @@ static bool xe_svm_range_is_valid(struct xe_svm_range *range, struct xe_tile *tile, bool devmem_only) { - /* - * Advisory only check whether the range currently has a valid mapping, - * READ_ONCE pairs with WRITE_ONCE in xe_pt.c - */ - return ((READ_ONCE(range->tile_present) & - ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) && - (!devmem_only || xe_svm_range_in_vram(range)); + return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present, + range->tile_invalidated) && + (!devmem_only || xe_svm_range_in_vram(range))); +} + +/** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM + * @vm: xe_vm pointer + * @range: Pointer to the SVM range structure + * + * The xe_svm_range_migrate_to_smem() checks range has pages in VRAM + * and migrates them to SMEM + */ +void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) +{ + if (xe_svm_range_in_vram(range)) + drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); +} + +/** + * xe_svm_range_validate() - Check if the SVM range is valid + * @vm: xe_vm pointer + * @range: Pointer to the SVM range structure + * @tile_mask: Mask representing the tiles to be checked + * @devmem_preferred : if true range needs to be in devmem + * + * The xe_svm_range_validate() function checks if a range is + * valid and located in the desired memory region. + * + * Return: true if the range is valid, false otherwise + */ +bool xe_svm_range_validate(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_mask, bool devmem_preferred) +{ + bool ret; + + xe_svm_notifier_lock(vm); + + ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask && + (devmem_preferred == range->base.flags.has_devmem_pages); + + xe_svm_notifier_unlock(vm); + + return ret; +} + +/** + * xe_svm_find_vma_start - Find start of CPU VMA + * @vm: xe_vm pointer + * @start: start address + * @end: end address + * @vma: Pointer to struct xe_vma + * + * + * This function searches for a cpu vma, within the specified + * range [start, end] in the given VM. It adjusts the range based on the + * xe_vma start and end addresses. If no cpu VMA is found, it returns ULONG_MAX. + * + * Return: The starting address of the VMA within the range, + * or ULONG_MAX if no VMA is found + */ +u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *vma) +{ + return drm_gpusvm_find_vma_start(&vm->svm.gpusvm, + max(start, xe_vma_start(vma)), + min(end, xe_vma_end(vma))); } +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) { return &tile->mem.vram; } -static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms) { - struct mm_struct *mm = vm->svm.gpusvm.mm; + struct xe_tile *tile = container_of(dpagemap, typeof(*tile), mem.vram.dpagemap); + struct xe_device *xe = tile_to_xe(tile); + struct device *dev = xe->drm.dev; struct xe_vram_region *vr = tile_to_vr(tile); struct drm_buddy_block *block; struct list_head *blocks; struct xe_bo *bo; - ktime_t end = 0; - int err; + ktime_t time_end = 0; + int err, idx; - range_debug(range, "ALLOCATE VRAM"); + if (!drm_dev_enter(&xe->drm, &idx)) + return -ENODEV; - if (!mmget_not_zero(mm)) - return -EFAULT; - mmap_read_lock(mm); + xe_pm_runtime_get(xe); -retry: - bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, - xe_svm_range_size(range), + retry: + bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, end - start, ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_CPU_ADDR_MIRROR); if (IS_ERR(bo)) { err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &end)) + if (xe_vm_validate_should_retry(NULL, err, &time_end)) goto retry; - goto unlock; + goto out_pm_put; } - drm_gpusvm_devmem_init(&bo->devmem_allocation, - vm->xe->drm.dev, mm, - &gpusvm_devmem_ops, - &tile->mem.vram.dpagemap, - xe_svm_range_size(range)); + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, + &dpagemap_devmem_ops, + &tile->mem.vram.dpagemap, + end - start); blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; list_for_each_entry(block, blocks, link) block->private = vr; xe_bo_get(bo); - err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, - &bo->devmem_allocation, ctx); + + /* Ensure the device has a pm ref while there are device pages active. */ + xe_pm_runtime_get_noresume(xe); + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, + start, end, timeslice_ms, + xe_svm_devm_owner(xe)); if (err) xe_svm_devmem_release(&bo->devmem_allocation); xe_bo_unlock(bo); xe_bo_put(bo); -unlock: - mmap_read_unlock(mm); - mmput(mm); +out_pm_put: + xe_pm_runtime_put(xe); + drm_dev_exit(idx); return err; } +#endif static bool supports_4K_migration(struct xe_device *xe) { @@ -736,21 +758,31 @@ static bool supports_4K_migration(struct xe_device *xe) return true; } -static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, - struct xe_vma *vma) +/** + * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not + * @range: SVM range for which migration needs to be decided + * @vma: vma which has range + * @preferred_region_is_vram: preferred region for range is vram + * + * Return: True for range needing migration and migration is supported else false + */ +bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, + bool preferred_region_is_vram) { struct xe_vm *vm = range_to_vm(&range->base); u64 range_size = xe_svm_range_size(range); - if (!range->base.flags.migrate_devmem) + if (!range->base.flags.migrate_devmem || !preferred_region_is_vram) return false; - if (xe_svm_range_in_vram(range)) { - drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); + xe_assert(vm->xe, IS_DGFX(vm->xe)); + + if (preferred_region_is_vram && xe_svm_range_in_vram(range)) { + drm_info(&vm->xe->drm, "Range is already in VRAM\n"); return false; } - if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + if (preferred_region_is_vram && range_size < SZ_64K && !supports_4K_migration(vm->xe)) { drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); return false; } @@ -762,7 +794,7 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, * xe_svm_handle_pagefault() - SVM handle page fault * @vm: The VM. * @vma: The CPU address mirror VMA. - * @tile: The tile upon the fault occurred. + * @gt: The gt upon the fault occurred. * @fault_addr: The GPU fault address. * @atomic: The fault atomic access bit. * @@ -772,24 +804,24 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, * Return: 0 on success, negative error code on error. */ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_tile *tile, u64 fault_addr, + struct xe_gt *gt, u64 fault_addr, bool atomic) { struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), .devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), .check_pages_threshold = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0, .devmem_only = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), .timeslice_ms = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0, + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? + vm->xe->atomic_svm_timeslice_ms : 0, }; struct xe_svm_range *range; - struct drm_gpusvm_range *r; - struct drm_exec exec; struct dma_fence *fence; + struct xe_tile *tile = gt_to_tile(gt); int migrate_try_count = ctx.devmem_only ? 3 : 1; ktime_t end = 0; int err; @@ -797,30 +829,30 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, lockdep_assert_held_write(&vm->lock); xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1); + retry: /* Always process UNMAPs first so view SVM ranges is current */ err = xe_svm_garbage_collector(vm); if (err) return err; - r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr, - xe_vma_start(vma), xe_vma_end(vma), - &ctx); - if (IS_ERR(r)) - return PTR_ERR(r); + range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); + + if (IS_ERR(range)) + return PTR_ERR(range); - if (ctx.devmem_only && !r->flags.migrate_devmem) + if (ctx.devmem_only && !range->base.flags.migrate_devmem) return -EACCES; - range = to_xe_range(r); if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) return 0; range_debug(range, "PAGE FAULT"); if (--migrate_try_count >= 0 && - xe_svm_range_needs_migrate_to_vram(range, vma)) { - err = xe_svm_alloc_vram(vm, tile, range, &ctx); + xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) { + err = xe_svm_alloc_vram(tile, range, &ctx); ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { if (migrate_try_count || !ctx.devmem_only) { @@ -838,16 +870,11 @@ retry: } range_debug(range, "GET PAGES"); - err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); + err = xe_svm_range_get_pages(vm, range, &ctx); /* Corner where CPU mappings have changed */ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (migrate_try_count > 0 || !ctx.devmem_only) { - if (err == -EOPNOTSUPP) { - range_debug(range, "PAGE FAULT - EVICT PAGES"); - drm_gpusvm_range_evict(&vm->svm.gpusvm, - &range->base); - } drm_dbg(&vm->xe->drm, "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); @@ -867,30 +894,21 @@ retry: range_debug(range, "PAGE FAULT - BIND"); retry_bind: - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj); - drm_exec_retry_on_contention(&exec); - if (err) { - drm_exec_fini(&exec); - goto err_out; - } - - fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); - if (IS_ERR(fence)) { - drm_exec_fini(&exec); - err = PTR_ERR(fence); - if (err == -EAGAIN) { - ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ - range_debug(range, "PAGE FAULT - RETRY BIND"); - goto retry; - } - if (xe_vm_validate_should_retry(&exec, err, &end)) - goto retry_bind; - goto err_out; + xe_vm_lock(vm, false); + fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); + if (IS_ERR(fence)) { + xe_vm_unlock(vm); + err = PTR_ERR(fence); + if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ + range_debug(range, "PAGE FAULT - RETRY BIND"); + goto retry; } + if (xe_vm_validate_should_retry(NULL, err, &end)) + goto retry_bind; + goto err_out; } - drm_exec_fini(&exec); + xe_vm_unlock(vm); dma_fence_wait(fence, false); dma_fence_put(fence); @@ -926,10 +944,85 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) */ int xe_svm_bo_evict(struct xe_bo *bo) { - return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); + return drm_pagemap_evict_to_ram(&bo->devmem_allocation); +} + +/** + * xe_svm_range_find_or_insert- Find or insert GPU SVM range + * @vm: xe_vm pointer + * @addr: address for which range needs to be found/inserted + * @vma: Pointer to struct xe_vma which mirrors CPU + * @ctx: GPU SVM context + * + * This function finds or inserts a newly allocated a SVM range based on the + * address. + * + * Return: Pointer to the SVM range on success, ERR_PTR() on failure. + */ +struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) +{ + struct drm_gpusvm_range *r; + + r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)), + xe_vma_start(vma), xe_vma_end(vma), ctx); + if (IS_ERR(r)) + return ERR_PTR(PTR_ERR(r)); + + return to_xe_range(r); +} + +/** + * xe_svm_range_get_pages() - Get pages for a SVM range + * @vm: Pointer to the struct xe_vm + * @range: Pointer to the xe SVM range structure + * @ctx: GPU SVM context + * + * This function gets pages for a SVM range and ensures they are mapped for + * DMA access. In case of failure with -EOPNOTSUPP, it evicts the range. + * + * Return: 0 on success, negative error code on failure. + */ +int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, + struct drm_gpusvm_ctx *ctx) +{ + int err = 0; + + err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, &range->base, ctx); + if (err == -EOPNOTSUPP) { + range_debug(range, "PAGE FAULT - EVICT PAGES"); + drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); + } + + return err; +} + +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + +/** + * xe_svm_alloc_vram()- Allocate device memory pages for range, + * migrating existing data. + * @tile: tile to allocate vram from + * @range: SVM range + * @ctx: DRM GPU SVM context + * + * Return: 0 on success, error code on failure. + */ +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + struct drm_pagemap *dpagemap; + + xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); + range_debug(range, "ALLOCATE VRAM"); + + dpagemap = xe_tile_local_pagemap(tile); + return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), + xe_svm_range_end(range), + range->base.gpusvm->mm, + ctx->timeslice_ms); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) static struct drm_pagemap_device_addr xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, struct device *dev, @@ -954,6 +1047,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, static const struct drm_pagemap_ops xe_drm_pagemap_ops = { .device_map = xe_drm_pagemap_device_map, + .populate_mm = xe_drm_pagemap_populate_mm, }; /** @@ -985,7 +1079,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) vr->pagemap.range.start = res->start; vr->pagemap.range.end = res->end; vr->pagemap.nr_range = 1; - vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); + vr->pagemap.ops = drm_pagemap_pagemap_ops_get(); vr->pagemap.owner = xe_svm_devm_owner(xe); addr = devm_memremap_pages(dev, &vr->pagemap); @@ -1006,6 +1100,13 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #else +int xe_svm_alloc_vram(struct xe_tile *tile, + struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + return -EOPNOTSUPP; +} + int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) { return 0; diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index fe58ac2f4baa..da9a69ea0bb1 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -6,16 +6,19 @@ #ifndef _XE_SVM_H_ #define _XE_SVM_H_ +#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) + #include <drm/drm_pagemap.h> #include <drm/drm_gpusvm.h> #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER struct xe_bo; -struct xe_vram_region; +struct xe_gt; struct xe_tile; struct xe_vm; struct xe_vma; +struct xe_vram_region; /** struct xe_svm_range - SVM range */ struct xe_svm_range { @@ -38,7 +41,6 @@ struct xe_svm_range { u8 tile_invalidated; }; -#if IS_ENABLED(CONFIG_DRM_GPUSVM) /** * xe_svm_range_pages_valid() - SVM range pages valid * @range: SVM range @@ -59,7 +61,7 @@ void xe_svm_fini(struct xe_vm *vm); void xe_svm_close(struct xe_vm *vm); int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_tile *tile, u64 fault_addr, + struct xe_gt *gt, u64 fault_addr, bool atomic); bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end); @@ -68,9 +70,120 @@ int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx); + +struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx); + +int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, + struct drm_gpusvm_ctx *ctx); + +bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, + bool preferred_region_is_vram); + +void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range); + +bool xe_svm_range_validate(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_mask, bool devmem_preferred); + +u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma); + +/** + * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping + * @range: SVM range + * + * Return: True if SVM range has a DMA mapping, False otherwise + */ +static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) +{ + lockdep_assert_held(&range->base.gpusvm->notifier_lock); + return range->base.flags.has_dma_mapping; +} + +/** + * to_xe_range - Convert a drm_gpusvm_range pointer to a xe_svm_range + * @r: Pointer to the drm_gpusvm_range structure + * + * This function takes a pointer to a drm_gpusvm_range structure and + * converts it to a pointer to the containing xe_svm_range structure. + * + * Return: Pointer to the xe_svm_range structure + */ +static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) +{ + return container_of(r, struct xe_svm_range, base); +} + +/** + * xe_svm_range_start() - SVM range start address + * @range: SVM range + * + * Return: start address of range. + */ +static inline unsigned long xe_svm_range_start(struct xe_svm_range *range) +{ + return drm_gpusvm_range_start(&range->base); +} + +/** + * xe_svm_range_end() - SVM range end address + * @range: SVM range + * + * Return: end address of range. + */ +static inline unsigned long xe_svm_range_end(struct xe_svm_range *range) +{ + return drm_gpusvm_range_end(&range->base); +} + +/** + * xe_svm_range_size() - SVM range size + * @range: SVM range + * + * Return: Size of range. + */ +static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) +{ + return drm_gpusvm_range_size(&range->base); +} + +#define xe_svm_assert_in_notifier(vm__) \ + lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_notifier_lock(vm__) \ + drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) + +#define xe_svm_notifier_unlock(vm__) \ + drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) + void xe_svm_flush(struct xe_vm *vm); #else +#include <linux/interval_tree.h> + +struct drm_pagemap_device_addr; +struct drm_gpusvm_ctx; +struct drm_gpusvm_range; +struct xe_bo; +struct xe_gt; +struct xe_vm; +struct xe_vma; +struct xe_tile; +struct xe_vram_region; + +#define XE_INTERCONNECT_VRAM 1 + +struct xe_svm_range { + struct { + struct interval_tree_node itree; + const struct drm_pagemap_device_addr *dma_addr; + } base; + u32 tile_present; + u32 tile_invalidated; +}; + static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range) { return false; @@ -100,7 +213,7 @@ void xe_svm_close(struct xe_vm *vm) static inline int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_tile *tile, u64 fault_addr, + struct xe_gt *gt, u64 fault_addr, bool atomic) { return 0; @@ -123,31 +236,86 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } -static inline void xe_svm_flush(struct xe_vm *vm) +static inline int +xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) { + return -EOPNOTSUPP; } -#endif +static inline +struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, + struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) +{ + return ERR_PTR(-EINVAL); +} -/** - * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping - * @range: SVM range - * - * Return: True if SVM range has a DMA mapping, False otherwise - */ -static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) +static inline +int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, + struct drm_gpusvm_ctx *ctx) { - lockdep_assert_held(&range->base.gpusvm->notifier_lock); - return range->base.flags.has_dma_mapping; + return -EINVAL; } -#define xe_svm_assert_in_notifier(vm__) \ - lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) +static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) +{ + return NULL; +} -#define xe_svm_notifier_lock(vm__) \ - drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) +static inline unsigned long xe_svm_range_start(struct xe_svm_range *range) +{ + return 0; +} -#define xe_svm_notifier_unlock(vm__) \ - drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) +static inline unsigned long xe_svm_range_end(struct xe_svm_range *range) +{ + return 0; +} +static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) +{ + return 0; +} + +static inline +bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, + u32 region) +{ + return false; +} + +static inline +void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) +{ +} + +static inline +bool xe_svm_range_validate(struct xe_vm *vm, + struct xe_svm_range *range, + u8 tile_mask, bool devmem_preferred) +{ + return false; +} + +static inline +u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma) +{ + return ULONG_MAX; +} + +#define xe_svm_assert_in_notifier(...) do {} while (0) +#define xe_svm_range_has_dma_mapping(...) false + +static inline void xe_svm_notifier_lock(struct xe_vm *vm) +{ +} + +static inline void xe_svm_notifier_unlock(struct xe_vm *vm) +{ +} + +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 0771acbbf367..86e9811e60ba 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -10,6 +10,7 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_memirq.h" #include "xe_migrate.h" #include "xe_pcode.h" #include "xe_sa.h" @@ -87,13 +88,9 @@ */ static int xe_tile_alloc(struct xe_tile *tile) { - struct drm_device *drm = &tile_to_xe(tile)->drm; - - tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt), - GFP_KERNEL); + tile->mem.ggtt = xe_ggtt_alloc(tile); if (!tile->mem.ggtt) return -ENOMEM; - tile->mem.ggtt->tile = tile; return 0; } @@ -178,6 +175,12 @@ int xe_tile_init_noalloc(struct xe_tile *tile) int xe_tile_init(struct xe_tile *tile) { + int err; + + err = xe_memirq_init(&tile->memirq); + if (err) + return err; + tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index eb939316d55b..cc33e8733983 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -16,4 +16,21 @@ int xe_tile_init(struct xe_tile *tile); void xe_tile_migrate_wait(struct xe_tile *tile); +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + return &tile->mem.vram.dpagemap; +} +#else +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + return NULL; +} +#endif + +static inline bool xe_tile_is_root(struct xe_tile *tile) +{ + return tile->id == 0; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c new file mode 100644 index 000000000000..f221dbed16f0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "regs/xe_gtt_defs.h" + +#include "xe_assert.h" +#include "xe_ggtt.h" +#include "xe_gt_sriov_vf.h" +#include "xe_sriov.h" +#include "xe_sriov_printk.h" +#include "xe_tile_sriov_vf.h" +#include "xe_wopcm.h" + +static int vf_init_ggtt_balloons(struct xe_tile *tile) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); + + tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); + } + + return 0; +} + +/** + * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range. + * @tile: the &xe_tile struct instance + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile) +{ + u64 ggtt_base = xe_gt_sriov_vf_ggtt_base(tile->primary_gt); + u64 ggtt_size = xe_gt_sriov_vf_ggtt(tile->primary_gt); + struct xe_device *xe = tile_to_xe(tile); + u64 wopcm = xe_wopcm_size(xe); + u64 start, end; + int err; + + xe_tile_assert(tile, IS_SRIOV_VF(xe)); + xe_tile_assert(tile, ggtt_size); + lockdep_assert_held(&tile->mem.ggtt->lock); + + /* + * VF can only use part of the GGTT as allocated by the PF: + * + * WOPCM GUC_GGTT_TOP + * |<------------ Total GGTT size ------------------>| + * + * VF GGTT base -->|<- size ->| + * + * +--------------------+----------+-----------------+ + * |////////////////////| block |\\\\\\\\\\\\\\\\\| + * +--------------------+----------+-----------------+ + * + * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| + */ + + if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP || + ggtt_size > GUC_GGTT_TOP - ggtt_base) { + xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n", + tile->id, ggtt_base, ggtt_base + ggtt_size - 1); + return -ERANGE; + } + + start = wopcm; + end = ggtt_base; + if (end != start) { + err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0], + start, end); + if (err) + return err; + } + + start = ggtt_base + ggtt_size; + end = GUC_GGTT_TOP; + if (end != start) { + err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1], + start, end); + if (err) { + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); + return err; + } + } + + return 0; +} + +static int vf_balloon_ggtt(struct xe_tile *tile) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + int err; + + mutex_lock(&ggtt->lock); + err = xe_tile_sriov_vf_balloon_ggtt_locked(tile); + mutex_unlock(&ggtt->lock); + + return err; +} + +/** + * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes. + * @tile: the &xe_tile struct instance + */ +void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile) +{ + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); +} + +static void vf_deballoon_ggtt(struct xe_tile *tile) +{ + mutex_lock(&tile->mem.ggtt->lock); + xe_tile_sriov_vf_deballoon_ggtt_locked(tile); + mutex_unlock(&tile->mem.ggtt->lock); +} + +static void vf_fini_ggtt_balloons(struct xe_tile *tile) +{ + xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); + + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); +} + +static void cleanup_ggtt(struct drm_device *drm, void *arg) +{ + struct xe_tile *tile = arg; + + vf_deballoon_ggtt(tile); + vf_fini_ggtt_balloons(tile); +} + +/** + * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. + * @tile: the &xe_tile + * + * This function is for VF use only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + int err; + + err = vf_init_ggtt_balloons(tile); + if (err) + return err; + + err = vf_balloon_ggtt(tile); + if (err) { + vf_fini_ggtt_balloons(tile); + return err; + } + + return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile); +} + +/** + * DOC: GGTT nodes shifting during VF post-migration recovery + * + * The first fixup applied to the VF KMD structures as part of post-migration + * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved + * from range previously assigned to this VF, into newly provisioned area. + * The changes include balloons, which are resized accordingly. + * + * The balloon nodes are there to eliminate unavailable ranges from use: one + * reserves the GGTT area below the range for current VF, and another one + * reserves area above. + * + * Below is a GGTT layout of example VF, with a certain address range assigned to + * said VF, and inaccessible areas above and below: + * + * 0 4GiB + * |<--------------------------- Total GGTT size ----------------------------->| + * WOPCM GUC_TOP + * |<-------------- Area mappable by xe_ggtt instance ---------------->| + * + * +---+---------------------------------+----------+----------------------+---+ + * |\\\|/////////////////////////////////| VF mem |//////////////////////|\\\| + * +---+---------------------------------+----------+----------------------+---+ + * + * Hardware enforced access rules before migration: + * + * |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->| + * + * GGTT nodes used for tracking allocations: + * + * |<---------- balloon ------------>|<- nodes->|<----- balloon ------>| + * + * After the migration, GGTT area assigned to the VF might have shifted, either + * to lower or to higher address. But we expect the total size and extra areas to + * be identical, as migration can only happen between matching platforms. + * Below is an example of GGTT layout of the VF after migration. Content of the + * GGTT for VF has been moved to a new area, and we receive its address from GuC: + * + * +---+----------------------+----------+---------------------------------+---+ + * |\\\|//////////////////////| VF mem |/////////////////////////////////|\\\| + * +---+----------------------+----------+---------------------------------+---+ + * + * Hardware enforced access rules after migration: + * + * |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->| + * + * So the VF has a new slice of GGTT assigned, and during migration process, the + * memory content was copied to that new area. But the &xe_ggtt nodes are still + * tracking allocations using the old addresses. The nodes within VF owned area + * have to be shifted, and balloon nodes need to be resized to properly mask out + * areas not owned by the VF. + * + * Fixed &xe_ggtt nodes used for tracking allocations: + * + * |<------ balloon ------>|<- nodes->|<----------- balloon ----------->| + * + * Due to use of GPU profiles, we do not expect the old and new GGTT ares to + * overlap; but our node shifting will fix addresses properly regardless. + */ + +/** + * xe_tile_sriov_vf_fixup_ggtt_nodes - Shift GGTT allocations to match assigned range. + * @tile: the &xe_tile struct instance + * @shift: the shift value + * + * Since Global GTT is not virtualized, each VF has an assigned range + * within the global space. This range might have changed during migration, + * which requires all memory addresses pointing to GGTT to be shifted. + */ +void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + + mutex_lock(&ggtt->lock); + + xe_tile_sriov_vf_deballoon_ggtt_locked(tile); + xe_ggtt_shift_nodes_locked(ggtt, shift); + xe_tile_sriov_vf_balloon_ggtt_locked(tile); + + mutex_unlock(&ggtt->lock); +} diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h new file mode 100644 index 000000000000..93eb043171e8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_SRIOV_VF_H_ +#define _XE_TILE_SRIOV_VF_H_ + +#include <linux/types.h> + +struct xe_tile; + +int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile); +int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile); +void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile); +void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift); + +#endif diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index ccebd5f0878e..86323cf3be2c 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -33,7 +33,7 @@ DECLARE_EVENT_CLASS(xe_bo, TP_fast_assign( __assign_str(dev); - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __entry->flags = bo->flags; __entry->vm = bo->vm; ), @@ -73,7 +73,7 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __assign_str(new_placement_name); __assign_str(old_placement_name); __assign_str(device_id); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 49ddbda7cdef..828b45b24c23 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -98,6 +98,11 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, + { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) + }, }; static const struct xe_rtp_entry_sr lrc_tunings[] = { diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3a8751a8b92d..465bda355443 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -33,6 +33,22 @@ uc_to_xe(struct xe_uc *uc) } /* Should be called once at driver load only */ +int xe_uc_init_noalloc(struct xe_uc *uc) +{ + int ret; + + ret = xe_guc_init_noalloc(&uc->guc); + if (ret) + goto err; + + /* HuC and GSC have no early dependencies and will be initialized during xe_uc_init(). */ + return 0; + +err: + xe_gt_err(uc_to_gt(uc), "Failed to early initialize uC (%pe)\n", ERR_PTR(ret)); + return ret; +} + int xe_uc_init(struct xe_uc *uc) { int ret; @@ -56,15 +72,17 @@ int xe_uc_init(struct xe_uc *uc) if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; - if (IS_SRIOV_VF(uc_to_xe(uc))) - return 0; + if (!IS_SRIOV_VF(uc_to_xe(uc))) { + ret = xe_wopcm_init(&uc->wopcm); + if (ret) + goto err; + } - ret = xe_wopcm_init(&uc->wopcm); + ret = xe_guc_min_load_for_hwconfig(&uc->guc); if (ret) goto err; return 0; - err: xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret)); return ret; @@ -126,28 +144,7 @@ int xe_uc_sanitize_reset(struct xe_uc *uc) return uc_reset(uc); } -/** - * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig - * @uc: The UC object - * - * Return: 0 on success, negative error code on error. - */ -int xe_uc_init_hwconfig(struct xe_uc *uc) -{ - int ret; - - /* GuC submission not enabled, nothing to do */ - if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; - - ret = xe_guc_min_load_for_hwconfig(&uc->guc); - if (ret) - return ret; - - return 0; -} - -static int vf_uc_init_hw(struct xe_uc *uc) +static int vf_uc_load_hw(struct xe_uc *uc) { int err; @@ -161,22 +158,30 @@ static int vf_uc_init_hw(struct xe_uc *uc) err = xe_gt_sriov_vf_connect(uc_to_gt(uc)); if (err) - return err; + goto err_out; uc->guc.submission_state.enabled = true; + err = xe_guc_opt_in_features_enable(&uc->guc); + if (err) + goto err_out; + err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) - return err; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(&uc->guc); + return err; } /* * Should be called during driver load, after every GT reset, and after every * suspend to reload / auth the firmwares. */ -int xe_uc_init_hw(struct xe_uc *uc) +int xe_uc_load_hw(struct xe_uc *uc) { int ret; @@ -185,7 +190,7 @@ int xe_uc_init_hw(struct xe_uc *uc) return 0; if (IS_SRIOV_VF(uc_to_xe(uc))) - return vf_uc_init_hw(uc); + return vf_uc_load_hw(uc); ret = xe_huc_upload(&uc->huc); if (ret) @@ -201,15 +206,15 @@ int xe_uc_init_hw(struct xe_uc *uc) ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (ret) - return ret; + goto err_out; ret = xe_guc_post_load_init(&uc->guc); if (ret) - return ret; + goto err_out; ret = xe_guc_pc_start(&uc->guc.pc); if (ret) - return ret; + goto err_out; xe_guc_engine_activity_enable_stats(&uc->guc); @@ -221,11 +226,10 @@ int xe_uc_init_hw(struct xe_uc *uc) xe_gsc_load_start(&uc->gsc); return 0; -} -int xe_uc_fini_hw(struct xe_uc *uc) -{ - return xe_uc_sanitize_reset(uc); +err_out: + xe_guc_sanitize(&uc->guc); + return ret; } int xe_uc_reset_prepare(struct xe_uc *uc) diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index c23e6f5e2514..21c9306098cf 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -8,11 +8,10 @@ struct xe_uc; +int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); -int xe_uc_init_hwconfig(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); -int xe_uc_init_hw(struct xe_uc *uc); -int xe_uc_fini_hw(struct xe_uc *uc); +int xe_uc_load_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index fb0eda3d5682..9bbdde604923 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -16,6 +16,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_gt_sriov_vf.h" #include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" @@ -92,6 +93,8 @@ struct uc_fw_entry { enum xe_platform platform; + enum xe_gt_type gt_type; + struct { const char *path; u16 major; @@ -106,32 +109,39 @@ struct fw_blobs_by_type { u32 count; }; -#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \ - fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ - fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ - fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ - fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \ - fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \ - fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \ - fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \ - fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \ - fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) +/* + * Add an "ANY" define just to convey the meaning it's given here. + */ +#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED + +#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 47, 0)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ + fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ + fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ + fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ + fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ + fw_def(ALDERLAKE_S, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ + fw_def(ROCKETLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ + fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ - fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \ - fw_def(LUNARLAKE, no_ver(xe, huc, lnl)) \ - fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \ - fw_def(DG1, no_ver(i915, huc, dg1)) \ - fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \ - fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \ - fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \ - fw_def(TIGERLAKE, no_ver(i915, huc, tgl)) + fw_def(PANTHERLAKE, GT_TYPE_ANY, no_ver(xe, huc, ptl)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \ + fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \ + fw_def(DG1, GT_TYPE_ANY, no_ver(i915, huc, dg1)) \ + fw_def(ALDERLAKE_P, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \ + fw_def(ALDERLAKE_S, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \ + fw_def(ROCKETLAKE, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \ + fw_def(TIGERLAKE, GT_TYPE_ANY, no_ver(i915, huc, tgl)) /* for the GSC FW we match the compatibility version and not the release one */ #define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \ - fw_def(LUNARLAKE, major_ver(xe, gsc, lnl, 104, 1, 0)) \ - fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 102, 1, 0)) + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, gsc, lnl, 104, 1, 0)) \ + fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, gsc, mtl, 102, 1, 0)) #define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \ __stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin" @@ -159,12 +169,13 @@ struct fw_blobs_by_type { a, b, c } /* All blobs need to be declared via MODULE_FIRMWARE() */ -#define XE_UC_MODULE_FIRMWARE(platform__, fw_filename) \ +#define XE_UC_MODULE_FIRMWARE(platform__, gt_type__, fw_filename) \ MODULE_FIRMWARE(fw_filename); -#define XE_UC_FW_ENTRY(platform__, entry__) \ +#define XE_UC_FW_ENTRY(platform__, gt_type__, entry__) \ { \ .platform = XE_ ## platform__, \ + .gt_type = XE_ ## gt_type__, \ entry__, \ }, @@ -222,30 +233,38 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) [XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) }, [XE_UC_FW_TYPE_GSC] = { entries_gsc, ARRAY_SIZE(entries_gsc) }, }; - static const struct uc_fw_entry *entries; + struct xe_gt *gt = uc_fw_to_gt(uc_fw); enum xe_platform p = xe->info.platform; + const struct uc_fw_entry *entries; u32 count; int i; - xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all)); + xe_gt_assert(gt, uc_fw->type < ARRAY_SIZE(blobs_all)); + xe_gt_assert(gt, gt->info.type != XE_GT_TYPE_UNINITIALIZED); + entries = blobs_all[uc_fw->type].entries; count = blobs_all[uc_fw->type].count; for (i = 0; i < count && p <= entries[i].platform; i++) { - if (p == entries[i].platform) { - uc_fw->path = entries[i].path; - uc_fw->versions.wanted.major = entries[i].major; - uc_fw->versions.wanted.minor = entries[i].minor; - uc_fw->versions.wanted.patch = entries[i].patch; - uc_fw->full_ver_required = entries[i].full_ver_required; - - if (uc_fw->type == XE_UC_FW_TYPE_GSC) - uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; - else - uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE; - - break; - } + if (p != entries[i].platform) + continue; + + if (entries[i].gt_type != XE_GT_TYPE_ANY && + entries[i].gt_type != gt->info.type) + continue; + + uc_fw->path = entries[i].path; + uc_fw->versions.wanted.major = entries[i].major; + uc_fw->versions.wanted.minor = entries[i].minor; + uc_fw->versions.wanted.patch = entries[i].patch; + uc_fw->full_ver_required = entries[i].full_ver_required; + + if (uc_fw->type == XE_UC_FW_TYPE_GSC) + uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; + else + uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE; + + break; } } @@ -646,11 +665,39 @@ do { \ ver_->major, ver_->minor, ver_->patch); \ } while (0) +static void uc_fw_vf_override(struct xe_uc_fw *uc_fw) +{ + struct xe_uc_fw_version *compat = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY]; + struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted; + + /* Only GuC/HuC are supported */ + if (uc_fw->type != XE_UC_FW_TYPE_GUC && uc_fw->type != XE_UC_FW_TYPE_HUC) + uc_fw->path = NULL; + + /* VF will support only firmwares that driver can autoselect */ + xe_uc_fw_change_status(uc_fw, uc_fw->path ? + XE_UC_FIRMWARE_PRELOADED : + XE_UC_FIRMWARE_NOT_SUPPORTED); + + if (!xe_uc_fw_is_supported(uc_fw)) + return; + + /* PF is doing the loading, so we don't need a path on the VF */ + uc_fw->path = "Loaded by PF"; + + /* The GuC versions are set up during the VF bootstrap */ + if (uc_fw->type == XE_UC_FW_TYPE_GUC) { + uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; + xe_gt_sriov_vf_guc_versions(uc_fw_to_gt(uc_fw), wanted, compat); + } +} + static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p) { struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct drm_printer p = xe_gt_info_printer(gt); struct device *dev = xe->drm.dev; - struct drm_printer p = drm_info_printer(dev); const struct firmware *fw = NULL; int err; @@ -659,20 +706,13 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar * before we're looked at the HW caps to see if we have uc support */ BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); - xe_assert(xe, !uc_fw->status); - xe_assert(xe, !uc_fw->path); + xe_gt_assert(gt, !uc_fw->status); + xe_gt_assert(gt, !uc_fw->path); uc_fw_auto_select(xe, uc_fw); if (IS_SRIOV_VF(xe)) { - /* Only GuC/HuC are supported */ - if (uc_fw->type != XE_UC_FW_TYPE_GUC && - uc_fw->type != XE_UC_FW_TYPE_HUC) - uc_fw->path = NULL; - /* VF will support only firmwares that driver can autoselect */ - xe_uc_fw_change_status(uc_fw, uc_fw->path ? - XE_UC_FIRMWARE_PRELOADED : - XE_UC_FIRMWARE_NOT_SUPPORTED); + uc_fw_vf_override(uc_fw); return 0; } @@ -684,7 +724,7 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar if (!xe_uc_fw_is_supported(uc_fw)) { if (uc_fw->type == XE_UC_FW_TYPE_GUC) { - drm_err(&xe->drm, "No GuC firmware defined for platform\n"); + xe_gt_err(gt, "No GuC firmware defined for platform\n"); return -ENOENT; } return 0; @@ -693,7 +733,7 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar /* an empty path means the firmware is disabled */ if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) { xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); - drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type)); + xe_gt_dbg(gt, "%s disabled\n", xe_uc_fw_type_repr(uc_fw->type)); return 0; } @@ -726,10 +766,10 @@ fail: XE_UC_FIRMWARE_MISSING : XE_UC_FIRMWARE_ERROR); - drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", - xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", - xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); + xe_gt_notice(gt, "%s firmware %s: fetch failed with error %pe\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, ERR_PTR(err)); + xe_gt_info(gt, "%s firmware(s) can be downloaded from %s\n", + xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); release_firmware(fw); /* OK even if fw is NULL */ diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index ad3b35a0e6eb..914026015019 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -65,6 +65,8 @@ enum xe_uc_fw_type { * struct xe_uc_fw_version - Version for XE micro controller firmware */ struct xe_uc_fw_version { + /** @branch: branch version of the FW (not always available) */ + u16 branch; /** @major: major version of the FW */ u16 major; /** @minor: minor version of the FW */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 367c84b90e9e..2035604121e6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -732,7 +732,9 @@ int xe_vm_userptr_pin(struct xe_vm *vm) DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); + down_read(&vm->userptr.notifier_lock); err = xe_vm_invalidate_vma(&uvma->vma); + up_read(&vm->userptr.notifier_lock); xe_vm_unlock(vm); if (err) break; @@ -798,21 +800,47 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) } ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); +static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op) +{ + struct xe_vma *vma; + + vma = gpuva_to_vma(op->base.prefetch.va); + + if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) + xa_destroy(&op->prefetch_range.range); +} + +static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return; + + list_for_each_entry(op, &vops->list, link) + xe_vma_svm_prefetch_op_fini(op); +} + static void xe_vma_ops_fini(struct xe_vma_ops *vops) { int i; + xe_vma_svm_prefetch_ops_fini(vops); + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) kfree(vops->pt_update_ops[i].ops); } -static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) +static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val) { int i; + if (!inc_val) + return; + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) if (BIT(i) & tile_mask) - ++vops->pt_update_ops[i].num_ops; + vops->pt_update_ops[i].num_ops += inc_val; } static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, @@ -842,7 +870,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, xe_vm_populate_rebind(op, vma, tile_mask); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); return 0; } @@ -977,7 +1005,7 @@ xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, xe_vm_populate_range_rebind(op, vma, range, tile_mask); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1); return 0; } @@ -1062,7 +1090,7 @@ xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, xe_vm_populate_range_unbind(op, range); list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, range->tile_present); + xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1); return 0; } @@ -1678,13 +1706,21 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) * scheduler drops all the references of it, hence protecting the VM * for this case is necessary. */ - if (flags & XE_VM_FLAG_LR_MODE) + if (flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); xe_pm_runtime_get_noresume(xe); + } + + if (flags & XE_VM_FLAG_FAULT_MODE) { + err = xe_svm_init(vm); + if (err) + goto err_no_resv; + } vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { err = -ENOMEM; - goto err_no_resv; + goto err_svm_fini; } drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, @@ -1724,10 +1760,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = true; } - if (vm->flags & XE_VM_FLAG_LR_MODE) { - INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + if (vm->flags & XE_VM_FLAG_LR_MODE) vm->batch_invalidate_tlb = false; - } /* Fill pt_root after allocating scratch tables */ for_each_tile(tile, xe, id) { @@ -1757,12 +1791,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) } } - if (flags & XE_VM_FLAG_FAULT_MODE) { - err = xe_svm_init(vm); - if (err) - goto err_close; - } - if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); @@ -1776,6 +1804,11 @@ err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); +err_svm_fini: + if (flags & XE_VM_FLAG_FAULT_MODE) { + vm->size = 0; /* close the vm */ + xe_svm_fini(vm); + } err_no_resv: mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) @@ -2049,7 +2082,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && - args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) + args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && + !xe->info.needs_scratch)) return -EINVAL; if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && @@ -2135,6 +2169,35 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; } +static bool vma_matches(struct xe_vma *vma, u64 page_addr) +{ + if (page_addr > xe_vma_end(vma) - 1 || + page_addr + SZ_4K - 1 < xe_vma_start(vma)) + return false; + + return true; +} + +/** + * xe_vm_find_vma_by_addr() - Find a VMA by its address + * + * @vm: the xe_vm the vma belongs to + * @page_addr: address to look up + */ +struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr) +{ + struct xe_vma *vma = NULL; + + if (vm->usm.last_fault_vma) { /* Fast lookup */ + if (vma_matches(vm->usm.last_fault_vma, page_addr)) + vma = vm->usm.last_fault_vma; + } + if (!vma) + vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); + + return vma; +} + static const u32 region_to_mem_type[] = { XE_PL_TT, XE_PL_VRAM0, @@ -2201,13 +2264,39 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) } #endif +static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags) +{ + if (!xe_vm_in_fault_mode(vm)) + return false; + + if (!xe_vm_has_scratch(vm)) + return false; + + if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE) + return false; + + return true; +} + +static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops) +{ + struct drm_gpuva_op *__op; + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + xe_vma_svm_prefetch_op_fini(op); + } +} + /* * Create operations list from IOCTL arguments, setup operations fields so parse * and commit steps are decoupled from IOCTL arguments. This step can fail. */ static struct drm_gpuva_ops * -vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, - u64 bo_offset_or_userptr, u64 addr, u64 range, +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, + struct xe_bo *bo, u64 bo_offset_or_userptr, + u64 addr, u64 range, u32 operation, u32 flags, u32 prefetch_region, u16 pat_index) { @@ -2215,6 +2304,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; struct drm_gpuvm_bo *vm_bo; + u64 range_end = addr + range; int err; lockdep_assert_held_write(&vm->lock); @@ -2273,15 +2363,83 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; + op->map.invalidate_on_bind = + __xe_vm_needs_clear_scratch_pages(vm, flags); } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { - op->prefetch.region = prefetch_region; - } + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + struct xe_tile *tile; + u8 id, tile_mask = 0; + u32 i; + + if (!xe_vma_is_cpu_addr_mirror(vma)) { + op->prefetch.region = prefetch_region; + break; + } + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + for_each_tile(tile, vm->xe, id) + tile_mask |= 0x1 << id; + + xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); + op->prefetch_range.region = prefetch_region; + op->prefetch_range.ranges_count = 0; +alloc_next_range: + svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); + + if (PTR_ERR(svm_range) == -ENOENT) { + u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma); + + addr = ret == ULONG_MAX ? 0 : ret; + if (addr) + goto alloc_next_range; + else + goto print_op_label; + } + + if (IS_ERR(svm_range)) { + err = PTR_ERR(svm_range); + goto unwind_prefetch_ops; + } + + if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { + xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); + goto check_next_range; + } + + err = xa_alloc(&op->prefetch_range.range, + &i, svm_range, xa_limit_32b, + GFP_KERNEL); + + if (err) + goto unwind_prefetch_ops; + + op->prefetch_range.ranges_count++; + vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH; + xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED"); +check_next_range: + if (range_end > xe_svm_range_end(svm_range) && + xe_svm_range_end(svm_range) < xe_vma_end(vma)) { + addr = xe_svm_range_end(svm_range); + goto alloc_next_range; + } + } +print_op_label: print_op(vm->xe, __op); } return ops; + +unwind_prefetch_ops: + xe_svm_prefetch_gpuva_ops_fini(ops); + drm_gpuva_ops_free(&vm->gpuvm, ops); + return ERR_PTR(err); } + ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, @@ -2472,10 +2630,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return PTR_ERR(vma); op->map.vma = vma; - if ((op->map.immediate || !xe_vm_in_fault_mode(vm)) && - !op->map.is_cpu_addr_mirror) + if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && + !op->map.is_cpu_addr_mirror) || + op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, - op->tile_mask); + op->tile_mask, 1); break; } case DRM_GPUVA_OP_REMAP: @@ -2484,6 +2643,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, gpuva_to_vma(op->base.remap.unmap->va); bool skip = xe_vma_is_cpu_addr_mirror(old); u64 start = xe_vma_start(old), end = xe_vma_end(old); + int num_remap_ops = 0; if (op->base.remap.prev) start = op->base.remap.prev->va.addr + @@ -2536,7 +2696,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } @@ -2565,11 +2725,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, (ULL)op->remap.start, (ULL)op->remap.range); } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; } } if (!skip) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + num_remap_ops++; + + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops); break; } case DRM_GPUVA_OP_UNMAP: @@ -2581,7 +2743,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return -EBUSY; if (!xe_vma_is_cpu_addr_mirror(vma)) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); break; case DRM_GPUVA_OP_PREFETCH: vma = gpuva_to_vma(op->base.prefetch.va); @@ -2592,8 +2754,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, return err; } - if (!xe_vma_is_cpu_addr_mirror(vma)) - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + if (xe_vma_is_cpu_addr_mirror(vma)) + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, + op->prefetch_range.ranges_count); + else + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); + break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -2719,6 +2885,57 @@ static int check_ufence(struct xe_vma *vma) return 0; } +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) +{ + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + int err = 0; + + struct xe_svm_range *svm_range; + struct drm_gpusvm_ctx ctx = {}; + struct xe_tile *tile; + unsigned long i; + u32 region; + + if (!xe_vma_is_cpu_addr_mirror(vma)) + return 0; + + region = op->prefetch_range.region; + + ctx.read_only = xe_vma_read_only(vma); + ctx.devmem_possible = devmem_possible; + ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; + + /* TODO: Threading the migration */ + xa_for_each(&op->prefetch_range.range, i, svm_range) { + if (!region) + xe_svm_range_migrate_to_smem(vm, svm_range); + + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { + tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; + err = xe_svm_alloc_vram(tile, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + return -ENODATA; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM"); + } + + err = xe_svm_range_get_pages(vm, svm_range, &ctx); + if (err) { + drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) + err = -ENODATA; + return err; + } + xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE"); + } + + return err; +} + static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_op *op) { @@ -2726,9 +2943,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: - err = vma_lock_and_validate(exec, op->map.vma, - !xe_vm_in_fault_mode(vm) || - op->map.immediate); + if (!op->map.invalidate_on_bind) + err = vma_lock_and_validate(exec, op->map.vma, + !xe_vm_in_fault_mode(vm) || + op->map.immediate); break; case DRM_GPUVA_OP_REMAP: err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); @@ -2755,7 +2973,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_PREFETCH: { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - u32 region = op->prefetch.region; + u32 region; + + if (xe_vma_is_cpu_addr_mirror(vma)) + region = op->prefetch_range.region; + else + region = op->prefetch.region; xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); @@ -2774,6 +2997,25 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, return err; } +static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops) +{ + struct xe_vma_op *op; + int err; + + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH)) + return 0; + + list_for_each_entry(op, &vops->list, link) { + if (op->base.op == DRM_GPUVA_OP_PREFETCH) { + err = prefetch_ranges(vm, op); + if (err) + return err; + } + } + + return 0; +} + static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_ops *vops) @@ -3082,9 +3324,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (!*bind_ops) return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; - err = __copy_from_user(*bind_ops, bind_user, - sizeof(struct drm_xe_vm_bind_op) * - args->num_binds); + err = copy_from_user(*bind_ops, bind_user, + sizeof(struct drm_xe_vm_bind_op) * + args->num_binds); if (XE_IOCTL_DBG(xe, err)) { err = -EFAULT; goto free_bind_ops; @@ -3109,7 +3351,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && (!xe_vm_in_fault_mode(vm) || - !IS_ENABLED(CONFIG_DRM_GPUSVM)))) { + !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) { err = -EINVAL; goto free_bind_ops; } @@ -3215,6 +3457,7 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, vops->q = q; vops->syncs = syncs; vops->num_syncs = num_syncs; + vops->flags = 0; } static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, @@ -3223,9 +3466,9 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; - if (XE_IOCTL_DBG(xe, range > bo->size) || + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { + xe_bo_size(bo) - range)) { return -EINVAL; } @@ -3243,7 +3486,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { - return -EINVAL; + return -EINVAL; } } @@ -3251,7 +3494,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, if (bo->cpu_caching) { if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { - return -EINVAL; + return -EINVAL; } } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { /* @@ -3260,7 +3503,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, * how it was mapped on the CPU. Just assume is it * potentially cached on CPU side. */ - return -EINVAL; + return -EINVAL; } /* If a BO is protected it can only be mapped if the key is still valid */ @@ -3422,7 +3665,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; u16 pat_index = bind_ops[i].pat_index; - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, + ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset, addr, range, op, flags, prefetch_region, pat_index); if (IS_ERR(ops[i])) { @@ -3455,6 +3698,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) goto unwind_ops; + err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops); + if (err) + goto unwind_ops; + fence = vm_bind_ioctl_ops_execute(vm, &vops); if (IS_ERR(fence)) err = PTR_ERR(fence); @@ -3524,7 +3771,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, xe_vma_ops_init(&vops, vm, q, NULL, 0); - ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size, + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), DRM_XE_VM_BIND_OP_MAP, 0, 0, vm->xe->pat.idx[cache_lvl]); if (IS_ERR(ops)) { @@ -3596,6 +3843,68 @@ void xe_vm_unlock(struct xe_vm *vm) } /** + * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an + * address range + * @vm: The VM + * @start: start address + * @end: end address + * @tile_mask: mask for which gt's issue tlb invalidation + * + * Issue a range based TLB invalidation for gt's in tilemask + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask) +{ + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + struct xe_tile *tile; + u32 fence_id = 0; + u8 id; + int err; + + if (!tile_mask) + return 0; + + for_each_tile(tile, vm->xe, id) { + if (tile_mask & BIT(id)) { + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[fence_id], true); + + err = xe_gt_tlb_invalidation_range(tile->primary_gt, + &fence[fence_id], + start, + end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + + if (!tile->media_gt) + continue; + + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], true); + + err = xe_gt_tlb_invalidation_range(tile->media_gt, + &fence[fence_id], + start, + end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; + } + } + +wait: + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); + + return err; +} + +/** * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock * @vma: VMA to invalidate * @@ -3608,28 +3917,34 @@ void xe_vm_unlock(struct xe_vm *vm) int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; + struct xe_vm *vm = xe_vma_vm(vma); struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; - u8 id; - u32 fence_id = 0; + u8 tile_mask = 0; int ret = 0; + u8 id; xe_assert(xe, !xe_vma_is_null(vma)); xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma)); trace_xe_vma_invalidate(vma); - vm_dbg(&xe_vma_vm(vma)->xe->drm, + vm_dbg(&vm->xe->drm, "INVALIDATE: addr=0x%016llx, range=0x%016llx", xe_vma_start(vma), xe_vma_size(vma)); - /* Check that we don't race with page-table updates */ + /* + * Check that we don't race with page-table updates, tile_invalidated + * update is safe + */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { + lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || + (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && + lockdep_is_held(&xe_vm_resv(vm)->lock.base))); + WARN_ON_ONCE(!mmu_interval_check_retry (&to_userptr_vma(vma)->userptr.notifier, to_userptr_vma(vma)->userptr.notifier_seq)); - WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), + WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP)); } else { @@ -3637,39 +3952,17 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) } } - for_each_tile(tile, xe, id) { - if (xe_pt_zap_ptes(tile, vma)) { - xe_device_wmb(xe); - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], - true); - - ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; - - if (!tile->media_gt) - continue; + for_each_tile(tile, xe, id) + if (xe_pt_zap_ptes(tile, vma)) + tile_mask |= BIT(id); - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], - true); + xe_device_wmb(xe); - ret = xe_gt_tlb_invalidation_vma(tile->media_gt, - &fence[fence_id], vma); - if (ret) - goto wait; - ++fence_id; - } - } - -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma), + xe_vma_end(vma), tile_mask); - vma->tile_invalidated = vma->tile_mask; + /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ + WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); return ret; } @@ -3846,6 +4139,9 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) } drm_puts(p, "\n"); + + if (drm_coredump_printer_is_full(p)) + return; } } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 0ef811fc2bde..3475a118f666 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -169,6 +169,8 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma) !xe_vma_is_cpu_addr_mirror(vma); } +struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr); + /** * to_userptr_vma() - Return a pointer to an embedding userptr vma * @vma: Pointer to the embedded struct xe_vma @@ -226,6 +228,9 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, struct xe_svm_range *range); +int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask); + int xe_vm_invalidate_vma(struct xe_vma *vma); int xe_vm_validate_protected(struct xe_vm *vm); @@ -301,6 +306,94 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); +/** + * xe_vm_set_validating() - Register this task as currently making bos resident + * @allow_res_evict: Allow eviction of buffer objects bound to @vm when + * validating. + * @vm: Pointer to the vm or NULL. + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + * + * Return: A pin cookie that should be used for xe_vm_clear_validating(). + */ +static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, + bool allow_res_evict) +{ + struct pin_cookie cookie = {}; + + if (vm && !allow_res_evict) { + xe_vm_assert_held(vm); + cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base); + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validating, current); + } + + return cookie; +} + +/** + * xe_vm_clear_validating() - Unregister this task as currently making bos resident + * @vm: Pointer to the vm or NULL + * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same + * value as for xe_vm_set_validation(). + * @cookie: Cookie obtained from xe_vm_set_validating(). + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + */ +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict, + struct pin_cookie cookie) +{ + if (vm && !allow_res_evict) { + lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie); + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validating, NULL); + } +} + +/** + * xe_vm_is_validating() - Whether bos bound to the vm are currently being made resident + * by the current task. + * @vm: Pointer to the vm. + * + * If this function returns %true, we should be in a vm resv locked region, since + * the current process is the same task that called xe_vm_set_validating(). + * The function asserts that that's indeed the case. + * + * Return: %true if the task is currently making bos resident, %false otherwise. + */ +static inline bool xe_vm_is_validating(struct xe_vm *vm) +{ + /* Pairs with WRITE_ONCE in xe_vm_is_validating() */ + if (READ_ONCE(vm->validating) == current) { + xe_vm_assert_held(vm); + return true; + } + return false; +} + +/** + * xe_vm_has_valid_gpu_mapping() - Advisory helper to check if VMA or SVM range has + * a valid GPU mapping + * @tile: The tile which the GPU mapping belongs to + * @tile_present: Tile present mask + * @tile_invalidated: Tile invalidated mask + * + * The READ_ONCEs pair with WRITE_ONCEs in either the TLB invalidation paths + * (xe_vm.c, xe_svm.c) or the binding paths (xe_pt.c). These are not reliable + * without the notifier lock in userptr or SVM cases, and not reliable without + * the BO dma-resv lock in the BO case. As such, they should only be used in + * opportunistic cases (e.g., skipping a page fault fix or not skipping a TLB + * invalidation) where it is harmless. + * + * Return: True is there are valid GPU pages, False otherwise + */ +#define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \ + ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id)) + #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); #else diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 84fa41b9fa20..bed6088e1bb3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -100,14 +100,21 @@ struct xe_vma { struct work_struct destroy_work; }; - /** @tile_invalidated: VMA has been invalidated */ + /** + * @tile_invalidated: Tile mask of binding are invalidated for this VMA. + * protected by BO's resv and for userptrs, vm->userptr.notifier_lock in + * write mode for writing or vm->userptr.notifier_lock in read mode and + * the vm->resv. For stable reading, BO's resv or userptr + * vm->userptr.notifier_lock in read mode is required. Can be + * opportunistically read with READ_ONCE outside of locks. + */ u8 tile_invalidated; /** @tile_mask: Tile mask of where to create binding for this VMA */ u8 tile_mask; /** - * @tile_present: GT mask of binding are present for this VMA. + * @tile_present: Tile mask of binding are present for this VMA. * protected by vm->lock, vm->resv and for userptrs, * vm->userptr.notifier_lock for writing. Needs either for reading, * but if reading is done under the vm->lock only, it needs to be held @@ -310,6 +317,14 @@ struct xe_vm { * protected by the vm resv. */ u64 tlb_flush_seqno; + /** + * @validating: The task that is currently making bos resident for this vm. + * Protected by the VM's resv for writing. Opportunistic reading can be done + * using READ_ONCE. Note: This is a workaround for the + * TTM eviction_valuable() callback not being passed a struct + * ttm_operation_context(). Future work might want to address this. + */ + struct task_struct *validating; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ @@ -330,6 +345,8 @@ struct xe_vma_op_map { bool is_cpu_addr_mirror; /** @dumpable: whether BO is dumped on GPU hang */ bool dumpable; + /** @invalidate: invalidate the VMA before bind */ + bool invalidate_on_bind; /** @pat_index: The pat index to use for this operation. */ u16 pat_index; }; @@ -372,6 +389,16 @@ struct xe_vma_op_unmap_range { struct xe_svm_range *range; }; +/** struct xe_vma_op_prefetch_range - VMA prefetch range operation */ +struct xe_vma_op_prefetch_range { + /** @range: xarray for SVM ranges data */ + struct xarray range; + /** @ranges_count: number of svm ranges to map */ + u32 ranges_count; + /** @region: memory region to prefetch to */ + u32 region; +}; + /** enum xe_vma_op_flags - flags for VMA operation */ enum xe_vma_op_flags { /** @XE_VMA_OP_COMMITTED: VMA operation committed */ @@ -414,6 +441,8 @@ struct xe_vma_op { struct xe_vma_op_map_range map_range; /** @unmap_range: VMA unmap range operation specific data */ struct xe_vma_op_unmap_range unmap_range; + /** @prefetch_range: VMA prefetch range operation specific data */ + struct xe_vma_op_prefetch_range prefetch_range; }; }; @@ -431,6 +460,9 @@ struct xe_vma_ops { u32 num_syncs; /** @pt_update_ops: page table update operations */ struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE]; + /** @flag: signify the properties within xe_vma_ops*/ +#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) + u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ bool inject_error; diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b1f81dca610d..e421a74fb87c 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -49,7 +49,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) */ static void resize_vram_bar(struct xe_device *xe) { - u64 force_vram_bar_size = xe_modparam.force_vram_bar_size; + int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct pci_bus *root = pdev->bus; resource_size_t current_size; @@ -66,6 +66,9 @@ static void resize_vram_bar(struct xe_device *xe) if (!bar_size_mask) return; + if (force_vram_bar_size < 0) + return; + /* set to a specific size? */ if (force_vram_bar_size) { u32 bar_size_bit; diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c index b378848d3b7b..8f23a27871b6 100644 --- a/drivers/gpu/drm/xe/xe_vsec.c +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -24,6 +24,7 @@ #define BMG_DEVICE_ID 0xE2F8 static struct intel_vsec_header bmg_telemetry = { + .rev = 1, .length = 0x10, .id = VSEC_ID_TELEMETRY, .num_entries = 2, @@ -32,28 +33,19 @@ static struct intel_vsec_header bmg_telemetry = { .offset = BMG_DISCOVERY_OFFSET, }; -static struct intel_vsec_header bmg_punit_crashlog = { +static struct intel_vsec_header bmg_crashlog = { + .rev = 1, .length = 0x10, .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, + .num_entries = 2, + .entry_size = 6, .tbir = 0, .offset = BMG_DISCOVERY_OFFSET + 0x60, }; -static struct intel_vsec_header bmg_oobmsm_crashlog = { - .length = 0x10, - .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, - .tbir = 0, - .offset = BMG_DISCOVERY_OFFSET + 0x78, -}; - static struct intel_vsec_header *bmg_capabilities[] = { &bmg_telemetry, - &bmg_punit_crashlog, - &bmg_oobmsm_crashlog, + &bmg_crashlog, NULL }; @@ -149,8 +141,8 @@ static int xe_guid_decode(u32 guid, int *index, u32 *offset) return 0; } -static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, - u32 count) +int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, + u32 count) { struct xe_device *xe = pdev_to_xe_device(pdev); void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET; diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h index 5777c53faec2..dabfb4e02d70 100644 --- a/drivers/gpu/drm/xe/xe_vsec.h +++ b/drivers/gpu/drm/xe/xe_vsec.h @@ -4,8 +4,12 @@ #ifndef _XE_VSEC_H_ #define _XE_VSEC_H_ +#include <linux/types.h> + +struct pci_dev; struct xe_device; void xe_vsec_init(struct xe_device *xe); +int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, u32 count); #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 2f833f0d575f..22a98600fd8f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -10,6 +10,7 @@ #include <linux/compiler_types.h> #include <linux/fault-inject.h> +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> #include "regs/xe_engine_regs.h" @@ -230,6 +231,18 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + /* Xe2_HPG */ + + { XE_RTP_NAME("16025250150"), + XE_RTP_RULES(GRAPHICS_VERSION(2001)), + XE_RTP_ACTIONS(SET(LSN_VC_REG2, + LSN_LNI_WGT(1) | + LSN_LNE_WGT(1) | + LSN_DIM_X_WGT(1) | + LSN_DIM_Y_WGT(1) | + LSN_DIM_Z_WGT(1))) + }, + /* Xe2_HPM */ { XE_RTP_NAME("16021867713"), @@ -273,6 +286,18 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16021865536"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, { XE_RTP_NAME("14021486841"), XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), @@ -491,10 +516,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) }, - { XE_RTP_NAME("16018737384"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) - }, /* * These two workarounds are the same, just applying to different * engines. Although Wa_18032095049 (for the RCS) isn't required on @@ -521,31 +542,38 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe2_HPG */ { XE_RTP_NAME("16018712365"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) }, { XE_RTP_NAME("16018737384"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) }, { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) }, { XE_RTP_NAME("14020338487"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) }, { XE_RTP_NAME("18032247524"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) }, { XE_RTP_NAME("14018471104"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) }, /* @@ -554,7 +582,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { * apply this to all engines for simplicity. */ { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(GRAPHICS_VERSION(2001)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002)), XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), GHWSP_CSB_REPORT_DIS | PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, @@ -566,11 +594,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) }, { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, - { XE_RTP_NAME("14021821874"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + { XE_RTP_NAME("14021821874, 14022954250"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) }, @@ -628,6 +657,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { @@ -762,7 +795,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT)) }, { XE_RTP_NAME("18033852989"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) }, { XE_RTP_NAME("14021567978"), @@ -795,7 +828,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) }, { XE_RTP_NAME("14019386621"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) }, { XE_RTP_NAME("14020756599"), @@ -812,13 +845,17 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_AUTOSTRIP)) }, { XE_RTP_NAME("15016589081"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, { XE_RTP_NAME("22021007897"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, + { XE_RTP_NAME("18033852989"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), @@ -840,9 +877,34 @@ static __maybe_unused const struct xe_rtp_entry oob_was[] = { static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); +static __maybe_unused const struct xe_rtp_entry device_oob_was[] = { +#include <generated/xe_device_wa_oob.c> + {} +}; + +static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT); + __diag_pop(); /** + * xe_wa_process_device_oob - process OOB workaround table + * @xe: device instance to process workarounds for + * + * process OOB workaround table for this device, marking in @xe the + * workarounds that are active. + */ + +void xe_wa_process_device_oob(struct xe_device *xe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)); + + xe->wa_active.oob_initialized = true; + xe_rtp_process(&ctx, device_oob_was); +} + +/** * xe_wa_process_oob - process OOB workaround table * @gt: GT instance to process workarounds for * @@ -911,6 +973,28 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) } /** + * xe_wa_device_init - initialize device with workaround oob bookkeeping + * @xe: Xe device instance to initialize + * + * Returns 0 for success, negative with error code otherwise + */ +int xe_wa_device_init(struct xe_device *xe) +{ + unsigned long *p; + + p = drmm_kzalloc(&xe->drm, + sizeof(*p) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)), + GFP_KERNEL); + + if (!p) + return -ENOMEM; + + xe->wa_active.oob = p; + + return 0; +} + +/** * xe_wa_init - initialize gt with workaround bookkeeping * @gt: GT instance to initialize * @@ -944,6 +1028,16 @@ int xe_wa_init(struct xe_gt *gt) } ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */ +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "Device OOB Workarounds\n"); + for_each_set_bit(idx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)) + if (device_oob_was[idx].name) + drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name); +} + void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) { size_t idx; diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 52337405b5bc..f3880c65cb8d 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -13,17 +13,19 @@ struct xe_gt; struct xe_hw_engine; struct xe_tile; +int xe_wa_device_init(struct xe_device *xe); int xe_wa_init(struct xe_gt *gt); +void xe_wa_process_device_oob(struct xe_device *xe); void xe_wa_process_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); /** - * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any - * other more specific type + * XE_WA - Out-of-band workarounds, to be queried and called as needed. * @gt__: gt instance * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h */ @@ -32,4 +34,20 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \ }) +/** + * XE_DEVICE_WA - Out-of-band Device workarounds, to be queried and called + * as needed. + * @xe__: xe_device + * @id__: XE_DEVICE_WA_OOB_<id__>, as generated by build system in generated/xe_device_wa_oob.h + */ +#define XE_DEVICE_WA(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + +#define XE_DEVICE_WA_DISABLE(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + clear_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + #endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 9b9e176992a8..e990f20eccfe 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -21,7 +21,8 @@ GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) -14018094691 GRAPHICS_VERSION(2004) +14018094691 GRAPHICS_VERSION_RANGE(2001, 2002) + GRAPHICS_VERSION(2004) 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) 18024947630 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) @@ -30,17 +31,19 @@ GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) GRAPHICS_VERSION(3001) -14022293748 GRAPHICS_VERSION(2001) +14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) -22019794406 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(3003) +22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 22019338487 MEDIA_VERSION(2000) - GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) -16023588340 GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) no_media_l3 MEDIA_VERSION(3000) @@ -57,3 +60,15 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION(3003) +16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) + MEDIA_VERSION_RANGE(1300, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION(3003) + +# SoC workaround - currently applies to all platforms with the following +# primary GT GMDID +14022085890 GRAPHICS_VERSION(2001) + +15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) |