diff options
Diffstat (limited to 'include')
32 files changed, 1361 insertions, 201 deletions
diff --git a/include/drm/intel/xe_sriov_vfio.h b/include/drm/intel/xe_sriov_vfio.h new file mode 100644 index 000000000000..e9814e8149fd --- /dev/null +++ b/include/drm/intel/xe_sriov_vfio.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VFIO_H_ +#define _XE_SRIOV_VFIO_H_ + +#include <linux/types.h> + +struct pci_dev; +struct xe_device; + +/** + * xe_sriov_vfio_get_pf() - Get PF &xe_device. + * @pdev: the VF &pci_dev device + * + * Return: pointer to PF &xe_device, NULL otherwise. + */ +struct xe_device *xe_sriov_vfio_get_pf(struct pci_dev *pdev); + +/** + * xe_sriov_vfio_migration_supported() - Check if migration is supported. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * + * Return: true if migration is supported, false otherwise. + */ +bool xe_sriov_vfio_migration_supported(struct xe_device *xe); + +/** + * xe_sriov_vfio_wait_flr_done() - Wait for VF FLR completion. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * This function will wait until VF FLR is processed by PF on all tiles (or + * until timeout occurs). + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vfio_wait_flr_done(struct xe_device *xe, unsigned int vfid); + +/** + * xe_sriov_vfio_suspend_device() - Suspend VF. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * This function will pause VF on all tiles/GTs. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vfio_suspend_device(struct xe_device *xe, unsigned int vfid); + +/** + * xe_sriov_vfio_resume_device() - Resume VF. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * This function will resume VF on all tiles. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vfio_resume_device(struct xe_device *xe, unsigned int vfid); + +/** + * xe_sriov_vfio_stop_copy_enter() - Initiate a VF device migration data save. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vfio_stop_copy_enter(struct xe_device *xe, unsigned int vfid); + +/** + * xe_sriov_vfio_stop_copy_exit() - Finish a VF device migration data save. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vfio_stop_copy_exit(struct xe_device *xe, unsigned int vfid); + +/** + * xe_sriov_vfio_resume_data_enter() - Initiate a VF device migration data restore. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vfio_resume_data_enter(struct xe_device *xe, unsigned int vfid); + +/** + * xe_sriov_vfio_resume_data_exit() - Finish a VF device migration data restore. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vfio_resume_data_exit(struct xe_device *xe, unsigned int vfid); + +/** + * xe_sriov_vfio_error() - Move VF device to error state. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * Reset is needed to move it out of error state. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vfio_error(struct xe_device *xe, unsigned int vfid); + +/** + * xe_sriov_vfio_data_read() - Read migration data from the VF device. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * @buf: start address of userspace buffer + * @len: requested read size from userspace + * + * Return: number of bytes that has been successfully read, + * 0 if no more migration data is available, -errno on failure. + */ +ssize_t xe_sriov_vfio_data_read(struct xe_device *xe, unsigned int vfid, + char __user *buf, size_t len); +/** + * xe_sriov_vfio_data_write() - Write migration data to the VF device. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * @buf: start address of userspace buffer + * @len: requested write size from userspace + * + * Return: number of bytes that has been successfully written, -errno on failure. + */ +ssize_t xe_sriov_vfio_data_write(struct xe_device *xe, unsigned int vfid, + const char __user *buf, size_t len); +/** + * xe_sriov_vfio_stop_copy_size() - Get a size estimate of VF device migration data. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * Return: migration data size in bytes or a negative error code on failure. + */ +ssize_t xe_sriov_vfio_stop_copy_size(struct xe_device *xe, unsigned int vfid); + +#endif diff --git a/include/dt-bindings/memory/mediatek,mt8189-memory-port.h b/include/dt-bindings/memory/mediatek,mt8189-memory-port.h new file mode 100644 index 000000000000..849fead3d0f7 --- /dev/null +++ b/include/dt-bindings/memory/mediatek,mt8189-memory-port.h @@ -0,0 +1,283 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2025 MediaTek Inc. + * Author: Zhengnan chen <zhengnan.chen@mediatek.com> + */ +#ifndef _DT_BINDINGS_MEMORY_MEDIATEK_MT8189_MEMORY_PORT_H_ +#define _DT_BINDINGS_MEMORY_MEDIATEK_MT8189_MEMORY_PORT_H_ + +#include <dt-bindings/memory/mtk-memory-port.h> + +#define SMI_L0_ID (0) +#define SMI_L1_ID (1) +#define SMI_L2_ID (2) +#define SMI_L4_ID (3) +#define SMI_L7_ID (4) +#define SMI_L9_ID (5) +#define SMI_L11_ID (6) +#define SMI_L13_ID (7) +#define SMI_L14_ID (8) +#define SMI_L16_ID (9) +#define SMI_L17_ID (10) +#define SMI_L19_ID (11) +#define SMI_L20_ID (12) + +/* + * MM IOMMU supports 16GB dma address. We separate it to four ranges: + * 0 ~ 4G; 4G ~ 8G; 8G ~ 12G; 12G ~ 16G, we could adjust these masters + * locate in anyone region. BUT: + * a) Make sure all the ports inside a larb are in one range. + * b) The iova of any master can NOT cross the 4G/8G/12G boundary. + * + * This is the suggested mapping in this SoC: + * + * modules dma-address-region larbs-ports + * disp/mdp 0 ~ 4G larb0/1/2 + * vcodec 4G ~ 8G larb4/7 + * imgsys/cam/ipesys 8G ~ 12G the other larbs. + * N/A 12G ~ 16G + */ + +/* Larb0 -- disp */ +#define M4U_L0_P0_DISP_OVL0_4L_HDR MTK_M4U_ID(SMI_L0_ID, 0) +#define M4U_L0_P1_DISP_OVL0_4L_RDMA0 MTK_M4U_ID(SMI_L0_ID, 1) +#define M4U_L0_P2_DISP_OVL1_4L_RDMA1 MTK_M4U_ID(SMI_L0_ID, 2) +#define M4U_L0_P3_DISP_OVL0_4L_RDMA2 MTK_M4U_ID(SMI_L0_ID, 3) +#define M4U_L0_P4_DISP_OVL1_4L_RDMA3 MTK_M4U_ID(SMI_L0_ID, 4) +#define M4U_L0_P5_DISP_RDMA0 MTK_M4U_ID(SMI_L0_ID, 5) +#define M4U_L0_P6_DISP_WDMA0 MTK_M4U_ID(SMI_L0_ID, 6) +#define M4U_L0_P7_DISP_FAKE_ENG0 MTK_M4U_ID(SMI_L0_ID, 7) + +/* Larb1 -- disp */ +#define M4U_L1_P0_DISP_OVL1_4L_HDR MTK_M4U_ID(SMI_L1_ID, 0) +#define M4U_L1_P1_DISP_OVL1_4L_RDMA0 MTK_M4U_ID(SMI_L1_ID, 1) +#define M4U_L1_P2_DISP_OVL0_4L_RDMA1 MTK_M4U_ID(SMI_L1_ID, 2) +#define M4U_L1_P3_DISP_OVL1_4L_RDMA2 MTK_M4U_ID(SMI_L1_ID, 3) +#define M4U_L1_P4_DISP_OVL0_4L_RDMA3 MTK_M4U_ID(SMI_L1_ID, 4) +#define M4U_L1_P5_DISP_RDMA1 MTK_M4U_ID(SMI_L1_ID, 5) +#define M4U_L1_P6_DISP_WDMA1 MTK_M4U_ID(SMI_L1_ID, 6) +#define M4U_L1_P7_DISP_FAKE_ENG1 MTK_M4U_ID(SMI_L1_ID, 7) + +/* Larb2 -- mmlsys(mdp) */ +#define M4U_L2_P0_MDP_RDMA0 MTK_M4U_ID(SMI_L2_ID, 0) +#define M4U_L2_P1_MDP_RDMA1 MTK_M4U_ID(SMI_L2_ID, 1) +#define M4U_L2_P2_MDP_WROT0 MTK_M4U_ID(SMI_L2_ID, 2) +#define M4U_L2_P3_MDP_WROT1 MTK_M4U_ID(SMI_L2_ID, 3) +#define M4U_L2_P4_MDP_DUMMY0 MTK_M4U_ID(SMI_L2_ID, 4) +#define M4U_L2_P5_MDP_DUMMY1 MTK_M4U_ID(SMI_L2_ID, 5) +#define M4U_L2_P6_MDP_RDMA2 MTK_M4U_ID(SMI_L2_ID, 6) +#define M4U_L2_P7_MDP_RDMA3 MTK_M4U_ID(SMI_L2_ID, 7) +#define M4U_L2_P8_MDP_WROT2 MTK_M4U_ID(SMI_L2_ID, 8) +#define M4U_L2_P9_MDP_WROT3 MTK_M4U_ID(SMI_L2_ID, 9) +#define M4U_L2_P10_DISP_FAKE0 MTK_M4U_ID(SMI_L2_ID, 10) + +/* Larb3: null */ + +/* Larb4 -- vdec */ +#define M4U_L4_P0_HW_VDEC_MC_EXT MTK_M4U_ID(SMI_L4_ID, 0) +#define M4U_L4_P1_HW_VDEC_UFO_EXT MTK_M4U_ID(SMI_L4_ID, 1) +#define M4U_L4_P2_HW_VDEC_PP_EXT MTK_M4U_ID(SMI_L4_ID, 2) +#define M4U_L4_P3_HW_VDEC_PRED_RD_EXT MTK_M4U_ID(SMI_L4_ID, 3) +#define M4U_L4_P4_HW_VDEC_PRED_WR_EXT MTK_M4U_ID(SMI_L4_ID, 4) +#define M4U_L4_P5_HW_VDEC_PPWRAP_EXT MTK_M4U_ID(SMI_L4_ID, 5) +#define M4U_L4_P6_HW_VDEC_TILE_EXT MTK_M4U_ID(SMI_L4_ID, 6) +#define M4U_L4_P7_HW_VDEC_VLD_EXT MTK_M4U_ID(SMI_L4_ID, 7) +#define M4U_L4_P8_HW_VDEC_VLD2_EXT MTK_M4U_ID(SMI_L4_ID, 8) +#define M4U_L4_P9_HW_VDEC_AVC_MV_EXT MTK_M4U_ID(SMI_L4_ID, 9) +#define M4U_L4_P10_HW_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(SMI_L4_ID, 10) +#define M4U_L4_P11_HW_VDEC_UFO_ENC_EXT MTK_M4U_ID(SMI_L4_ID, 11) + +/* Larb5: null */ + +/* Larb6: null */ + +/* Larb7 -- venc */ +#define M4U_L7_P0_VENC_RCPU MTK_M4U_ID(SMI_L7_ID, 0) +#define M4U_L7_P1_VENC_REC MTK_M4U_ID(SMI_L7_ID, 1) +#define M4U_L7_P2_VENC_BSDMA MTK_M4U_ID(SMI_L7_ID, 2) +#define M4U_L7_P3_VENC_SV_COMV MTK_M4U_ID(SMI_L7_ID, 3) +#define M4U_L7_P4_VENC_RD_COMV MTK_M4U_ID(SMI_L7_ID, 4) +#define M4U_L7_P5_JPGENC_Y_RDMA MTK_M4U_ID(SMI_L7_ID, 5) +#define M4U_L7_P6_JPGENC_C_RDMA MTK_M4U_ID(SMI_L7_ID, 6) +#define M4U_L7_P7_JPGENC_Q_RDMA MTK_M4U_ID(SMI_L7_ID, 7) +#define M4U_L7_P8_VENC_SUB_W_LUMA MTK_M4U_ID(SMI_L7_ID, 8) +#define M4U_L7_P9_JPGENC_BSDMA MTK_M4U_ID(SMI_L7_ID, 9) +#define M4U_L7_P10_VENC_CUR_LUMA MTK_M4U_ID(SMI_L7_ID, 10) +#define M4U_L7_P11_VENC_CUR_CHROMA MTK_M4U_ID(SMI_L7_ID, 11) +#define M4U_L7_P12_VENC_REF_LUMA MTK_M4U_ID(SMI_L7_ID, 12) +#define M4U_L7_P13_VENC_REF_CHROMA MTK_M4U_ID(SMI_L7_ID, 13) +#define M4U_L7_P14_VENC_SUB_R_LUMA MTK_M4U_ID(SMI_L7_ID, 14) +#define M4U_L7_P15_JPGDEC_WDMA MTK_M4U_ID(SMI_L7_ID, 15) +#define M4U_L7_P16_JPGDEC_BSDMA MTK_M4U_ID(SMI_L7_ID, 16) +#define M4U_L7_P17_JPGDEC_HUFF_OFFSET MTK_M4U_ID(SMI_L7_ID, 17) + +/* Larb8: null */ + +/* Larb9 --imgsys */ +#define M4U_L9_P0_IMGI_D1 MTK_M4U_ID(SMI_L9_ID, 0) +#define M4U_L9_P1_IMGBI_D1 MTK_M4U_ID(SMI_L9_ID, 1) +#define M4U_L9_P2_DMGI_D1 MTK_M4U_ID(SMI_L9_ID, 2) +#define M4U_L9_P3_DEPI_D1 MTK_M4U_ID(SMI_L9_ID, 3) +#define M4U_L9_P4_LCE_D1 MTK_M4U_ID(SMI_L9_ID, 4) +#define M4U_L9_P5_SMTI_D1 MTK_M4U_ID(SMI_L9_ID, 5) +#define M4U_L9_P6_SMTO_D2 MTK_M4U_ID(SMI_L9_ID, 6) +#define M4U_L9_P7_SMTO_D1 MTK_M4U_ID(SMI_L9_ID, 7) +#define M4U_L9_P8_CRZO_D1 MTK_M4U_ID(SMI_L9_ID, 8) +#define M4U_L9_P9_IMG3O_D1 MTK_M4U_ID(SMI_L9_ID, 9) +#define M4U_L9_P10_VIPI_D1 MTK_M4U_ID(SMI_L9_ID, 10) +#define M4U_L9_P11_SMTI_D5 MTK_M4U_ID(SMI_L9_ID, 11) +#define M4U_L9_P12_TIMGO_D1 MTK_M4U_ID(SMI_L9_ID, 12) +#define M4U_L9_P13_UFBC_W0 MTK_M4U_ID(SMI_L9_ID, 13) +#define M4U_L9_P14_UFBC_R0 MTK_M4U_ID(SMI_L9_ID, 14) +#define M4U_L9_P15_WPE_RDMA1 MTK_M4U_ID(SMI_L9_ID, 15) +#define M4U_L9_P16_WPE_RDMA0 MTK_M4U_ID(SMI_L9_ID, 16) +#define M4U_L9_P17_WPE_WDMA MTK_M4U_ID(SMI_L9_ID, 17) +#define M4U_L9_P18_MFB_RDMA0 MTK_M4U_ID(SMI_L9_ID, 18) +#define M4U_L9_P19_MFB_RDMA1 MTK_M4U_ID(SMI_L9_ID, 19) +#define M4U_L9_P20_MFB_RDMA2 MTK_M4U_ID(SMI_L9_ID, 20) +#define M4U_L9_P21_MFB_RDMA3 MTK_M4U_ID(SMI_L9_ID, 21) +#define M4U_L9_P22_MFB_RDMA4 MTK_M4U_ID(SMI_L9_ID, 22) +#define M4U_L9_P23_MFB_RDMA5 MTK_M4U_ID(SMI_L9_ID, 23) +#define M4U_L9_P24_MFB_WDMA0 MTK_M4U_ID(SMI_L9_ID, 24) +#define M4U_L9_P25_MFB_WDMA1 MTK_M4U_ID(SMI_L9_ID, 25) +#define M4U_L9_P26_RESERVE6 MTK_M4U_ID(SMI_L9_ID, 26) +#define M4U_L9_P27_RESERVE7 MTK_M4U_ID(SMI_L9_ID, 27) +#define M4U_L9_P28_RESERVE8 MTK_M4U_ID(SMI_L9_ID, 28) + +/* Larb10: null */ + +/* Larb11 -- imgsys */ +#define M4U_L11_P0_IMGI_D1 MTK_M4U_ID(SMI_L11_ID, 0) +#define M4U_L11_P1_IMGBI_D1 MTK_M4U_ID(SMI_L11_ID, 1) +#define M4U_L11_P2_DMGI_D1 MTK_M4U_ID(SMI_L11_ID, 2) +#define M4U_L11_P3_DEPI_D1 MTK_M4U_ID(SMI_L11_ID, 3) +#define M4U_L11_P4_LCE_D1 MTK_M4U_ID(SMI_L11_ID, 4) +#define M4U_L11_P5_SMTI_D1 MTK_M4U_ID(SMI_L11_ID, 5) +#define M4U_L11_P6_SMTO_D2 MTK_M4U_ID(SMI_L11_ID, 6) +#define M4U_L11_P7_SMTO_D1 MTK_M4U_ID(SMI_L11_ID, 7) +#define M4U_L11_P8_CRZO_D1 MTK_M4U_ID(SMI_L11_ID, 8) +#define M4U_L11_P9_IMG3O_D1 MTK_M4U_ID(SMI_L11_ID, 9) +#define M4U_L11_P10_VIPI_D1 MTK_M4U_ID(SMI_L11_ID, 10) +#define M4U_L11_P11_SMTI_D5 MTK_M4U_ID(SMI_L11_ID, 11) +#define M4U_L11_P12_TIMGO_D1 MTK_M4U_ID(SMI_L11_ID, 12) +#define M4U_L11_P13_UFBC_W0 MTK_M4U_ID(SMI_L11_ID, 13) +#define M4U_L11_P14_UFBC_R0 MTK_M4U_ID(SMI_L11_ID, 14) +#define M4U_L11_P15_WPE_RDMA1 MTK_M4U_ID(SMI_L11_ID, 15) +#define M4U_L11_P16_WPE_RDMA0 MTK_M4U_ID(SMI_L11_ID, 16) +#define M4U_L11_P17_WPE_WDMA MTK_M4U_ID(SMI_L11_ID, 17) +#define M4U_L11_P18_MFB_RDMA0 MTK_M4U_ID(SMI_L11_ID, 18) +#define M4U_L11_P19_MFB_RDMA1 MTK_M4U_ID(SMI_L11_ID, 19) +#define M4U_L11_P20_MFB_RDMA2 MTK_M4U_ID(SMI_L11_ID, 20) +#define M4U_L11_P21_MFB_RDMA3 MTK_M4U_ID(SMI_L11_ID, 21) +#define M4U_L11_P22_MFB_RDMA4 MTK_M4U_ID(SMI_L11_ID, 22) +#define M4U_L11_P23_MFB_RDMA5 MTK_M4U_ID(SMI_L11_ID, 23) +#define M4U_L11_P24_MFB_WDMA0 MTK_M4U_ID(SMI_L11_ID, 24) +#define M4U_L11_P25_MFB_WDMA1 MTK_M4U_ID(SMI_L11_ID, 25) +#define M4U_L11_P26_RESERVE6 MTK_M4U_ID(SMI_L11_ID, 26) +#define M4U_L11_P27_RESERVE7 MTK_M4U_ID(SMI_L11_ID, 27) +#define M4U_L11_P28_RESERVE8 MTK_M4U_ID(SMI_L11_ID, 28) + +/* Larb12: null */ + +/* Larb13 -- cam */ +#define M4U_L13_P0_MRAWI MTK_M4U_ID(SMI_L13_ID, 0) +#define M4U_L13_P1_MRAWO_0 MTK_M4U_ID(SMI_L13_ID, 1) +#define M4U_L13_P2_MRAWO_1 MTK_M4U_ID(SMI_L13_ID, 2) +#define M4U_L13_P3_CAMSV_1 MTK_M4U_ID(SMI_L13_ID, 3) +#define M4U_L13_P4_CAMSV_2 MTK_M4U_ID(SMI_L13_ID, 4) +#define M4U_L13_P5_CAMSV_3 MTK_M4U_ID(SMI_L13_ID, 5) +#define M4U_L13_P6_CAMSV_4 MTK_M4U_ID(SMI_L13_ID, 6) +#define M4U_L13_P7_CAMSV_5 MTK_M4U_ID(SMI_L13_ID, 7) +#define M4U_L13_P8_CAMSV_6 MTK_M4U_ID(SMI_L13_ID, 8) +#define M4U_L13_P9_CCUI MTK_M4U_ID(SMI_L13_ID, 9) +#define M4U_L13_P10_CCUO MTK_M4U_ID(SMI_L13_ID, 10) +#define M4U_L13_P11_FAKE MTK_M4U_ID(SMI_L13_ID, 11) +#define M4U_L13_P12_PDAI_0 MTK_M4U_ID(SMI_L13_ID, 12) +#define M4U_L13_P13_PDAI_1 MTK_M4U_ID(SMI_L13_ID, 13) +#define M4U_L13_P14_PDAO MTK_M4U_ID(SMI_L13_ID, 14) + +/* Larb14 -- cam */ +#define M4U_L14_P0_RESERVE MTK_M4U_ID(SMI_L14_ID, 0) +#define M4U_L14_P1_RESERVE MTK_M4U_ID(SMI_L14_ID, 1) +#define M4U_L14_P2_RESERVE MTK_M4U_ID(SMI_L14_ID, 2) +#define M4U_L14_P3_CAMSV_0 MTK_M4U_ID(SMI_L14_ID, 3) +#define M4U_L14_P4_CCUI MTK_M4U_ID(SMI_L14_ID, 4) +#define M4U_L14_P5_CCUO MTK_M4U_ID(SMI_L14_ID, 5) +#define M4U_L14_P6_CAMSV_7 MTK_M4U_ID(SMI_L14_ID, 6) +#define M4U_L14_P7_CAMSV_8 MTK_M4U_ID(SMI_L14_ID, 7) +#define M4U_L14_P8_CAMSV_9 MTK_M4U_ID(SMI_L14_ID, 8) +#define M4U_L14_P9_CAMSV_10 MTK_M4U_ID(SMI_L14_ID, 9) + +/* Larb15: null */ + +/* Larb16 -- cam */ +#define M4U_L16_P0_IMGO_R1_A MTK_M4U_ID(SMI_L16_ID, 0) +#define M4U_L16_P1_RRZO_R1_A MTK_M4U_ID(SMI_L16_ID, 1) +#define M4U_L16_P2_CQI_R1_A MTK_M4U_ID(SMI_L16_ID, 2) +#define M4U_L16_P3_BPCI_R1_A MTK_M4U_ID(SMI_L16_ID, 3) +#define M4U_L16_P4_YUVO_R1_A MTK_M4U_ID(SMI_L16_ID, 4) +#define M4U_L16_P5_UFDI_R2_A MTK_M4U_ID(SMI_L16_ID, 5) +#define M4U_L16_P6_RAWI_R2_A MTK_M4U_ID(SMI_L16_ID, 6) +#define M4U_L16_P7_RAWI_R3_A MTK_M4U_ID(SMI_L16_ID, 7) +#define M4U_L16_P8_AAO_R1_A MTK_M4U_ID(SMI_L16_ID, 8) +#define M4U_L16_P9_AFO_R1_A MTK_M4U_ID(SMI_L16_ID, 9) +#define M4U_L16_P10_FLKO_R1_A MTK_M4U_ID(SMI_L16_ID, 10) +#define M4U_L16_P11_LCESO_R1_A MTK_M4U_ID(SMI_L16_ID, 11) +#define M4U_L16_P12_CRZO_R1_A MTK_M4U_ID(SMI_L16_ID, 12) +#define M4U_L16_P13_LTMSO_R1_A MTK_M4U_ID(SMI_L16_ID, 13) +#define M4U_L16_P14_RSSO_R1_A MTK_M4U_ID(SMI_L16_ID, 14) +#define M4U_L16_P15_AAHO_R1_A MTK_M4U_ID(SMI_L16_ID, 15) +#define M4U_L16_P16_LSCI_R1_A MTK_M4U_ID(SMI_L16_ID, 16) + +/* Larb17 -- cam */ +#define M4U_L17_P0_IMGO_R1_B MTK_M4U_ID(SMI_L17_ID, 0) +#define M4U_L17_P1_RRZO_R1_B MTK_M4U_ID(SMI_L17_ID, 1) +#define M4U_L17_P2_CQI_R1_B MTK_M4U_ID(SMI_L17_ID, 2) +#define M4U_L17_P3_BPCI_R1_B MTK_M4U_ID(SMI_L17_ID, 3) +#define M4U_L17_P4_YUVO_R1_B MTK_M4U_ID(SMI_L17_ID, 4) +#define M4U_L17_P5_UFDI_R2_B MTK_M4U_ID(SMI_L17_ID, 5) +#define M4U_L17_P6_RAWI_R2_B MTK_M4U_ID(SMI_L17_ID, 6) +#define M4U_L17_P7_RAWI_R3_B MTK_M4U_ID(SMI_L17_ID, 7) +#define M4U_L17_P8_AAO_R1_B MTK_M4U_ID(SMI_L17_ID, 8) +#define M4U_L17_P9_AFO_R1_B MTK_M4U_ID(SMI_L17_ID, 9) +#define M4U_L17_P10_FLKO_R1_B MTK_M4U_ID(SMI_L17_ID, 10) +#define M4U_L17_P11_LCESO_R1_B MTK_M4U_ID(SMI_L17_ID, 11) +#define M4U_L17_P12_CRZO_R1_B MTK_M4U_ID(SMI_L17_ID, 12) +#define M4U_L17_P13_LTMSO_R1_B MTK_M4U_ID(SMI_L17_ID, 13) +#define M4U_L17_P14_RSSO_R1_B MTK_M4U_ID(SMI_L17_ID, 14) +#define M4U_L17_P15_AAHO_R1_B MTK_M4U_ID(SMI_L17_ID, 15) +#define M4U_L17_P16_LSCI_R1_B MTK_M4U_ID(SMI_L17_ID, 16) + +/* Larb19 -- ipesys */ +#define M4U_L19_P0_DVS_RDMA MTK_M4U_ID(SMI_L19_ID, 0) +#define M4U_L19_P1_DVS_WDMA MTK_M4U_ID(SMI_L19_ID, 1) +#define M4U_L19_P2_DVP_RDMA MTK_M4U_ID(SMI_L19_ID, 2) +#define M4U_L19_P3_DVP_WDMA MTK_M4U_ID(SMI_L19_ID, 3) + +/* Larb20 -- ipesys */ +#define M4U_L20_P0_FDVT_RDA_0 MTK_M4U_ID(SMI_L20_ID, 0) +#define M4U_L20_P1_FDVT_RDB_0 MTK_M4U_ID(SMI_L20_ID, 1) +#define M4U_L20_P2_FDVT_WRA_0 MTK_M4U_ID(SMI_L20_ID, 2) +#define M4U_L20_P3_FDVT_WRB_0 MTK_M4U_ID(SMI_L20_ID, 3) +#define M4U_L20_P4_RSC_RDMA MTK_M4U_ID(SMI_L20_ID, 4) +#define M4U_L20_P5_RSC_WDMA MTK_M4U_ID(SMI_L20_ID, 5) + +/* fake larb21 for gce */ +#define M4U_L21_GCE_DM MTK_M4U_ID(21, 0) +#define M4U_L21_GCE_MM MTK_M4U_ID(21, 1) + +/* fake larb & port for svp and dual svp and wfd */ +#define M4U_PORT_SVP_HEAP MTK_M4U_ID(22, 0) +#define M4U_PORT_DUAL_SVP_HEAP MTK_M4U_ID(22, 1) +#define M4U_PORT_WFD_HEAP MTK_M4U_ID(22, 2) + +/* fake larb0 for apu */ +#define M4U_L0_APU_DATA MTK_M4U_ID(0, 0) +#define M4U_L0_APU_CODE MTK_M4U_ID(0, 1) +#define M4U_L0_APU_SECURE MTK_M4U_ID(0, 2) +#define M4U_L0_APU_VLM MTK_M4U_ID(0, 3) + +/* infra/peri */ +#define IFR_IOMMU_PORT_PCIE_0 MTK_IFAIOMMU_PERI_ID(0, 26) + +#endif diff --git a/include/linux/cper.h b/include/linux/cper.h index 0ed60a91eca9..5b1236d8c65b 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -297,11 +297,11 @@ enum { #define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2) #define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3) -#define CPER_ARM_CACHE_ERROR 0 -#define CPER_ARM_TLB_ERROR 1 -#define CPER_ARM_BUS_ERROR 2 -#define CPER_ARM_VENDOR_ERROR 3 -#define CPER_ARM_MAX_TYPE CPER_ARM_VENDOR_ERROR +#define CPER_ARM_ERR_TYPE_MASK GENMASK(4,1) +#define CPER_ARM_CACHE_ERROR BIT(1) +#define CPER_ARM_TLB_ERROR BIT(2) +#define CPER_ARM_BUS_ERROR BIT(3) +#define CPER_ARM_VENDOR_ERROR BIT(4) #define CPER_ARM_ERR_VALID_TRANSACTION_TYPE BIT(0) #define CPER_ARM_ERR_VALID_OPERATION_TYPE BIT(1) @@ -588,6 +588,8 @@ const char *cper_mem_err_type_str(unsigned int); const char *cper_mem_err_status_str(u64 status); void cper_print_bits(const char *prefix, unsigned int bits, const char * const strs[], unsigned int strs_size); +int cper_bits_to_str(char *buf, int buf_size, unsigned long bits, + const char * const strs[], unsigned int strs_size); void cper_mem_err_pack(const struct cper_sec_mem_err *, struct cper_mem_err_compact *); const char *cper_mem_err_unpack(struct trace_seq *, diff --git a/include/linux/dma-buf-mapping.h b/include/linux/dma-buf-mapping.h new file mode 100644 index 000000000000..a3c0ce2d3a42 --- /dev/null +++ b/include/linux/dma-buf-mapping.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * DMA BUF Mapping Helpers + * + */ +#ifndef __DMA_BUF_MAPPING_H__ +#define __DMA_BUF_MAPPING_H__ +#include <linux/dma-buf.h> + +struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach, + struct p2pdma_provider *provider, + struct dma_buf_phys_vec *phys_vec, + size_t nr_ranges, size_t size, + enum dma_data_direction dir); +void dma_buf_free_sgt(struct dma_buf_attachment *attach, struct sg_table *sgt, + enum dma_data_direction dir); +#endif diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index d58e329ac0e7..0bc492090237 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -22,6 +22,7 @@ #include <linux/fs.h> #include <linux/dma-fence.h> #include <linux/wait.h> +#include <linux/pci-p2pdma.h> struct device; struct dma_buf; @@ -531,6 +532,16 @@ struct dma_buf_export_info { }; /** + * struct dma_buf_phys_vec - describe continuous chunk of memory + * @paddr: physical address of that chunk + * @len: Length of this chunk + */ +struct dma_buf_phys_vec { + phys_addr_t paddr; + size_t len; +}; + +/** * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters * @name: export-info name * diff --git a/include/linux/efi.h b/include/linux/efi.h index b23ff8b83219..2a43094e23f7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -290,7 +290,7 @@ typedef efi_status_t efi_get_variable_t (efi_char16_t *name, efi_guid_t *vendor, unsigned long *data_size, void *data); typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor); -typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, +typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data); typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count); @@ -373,6 +373,8 @@ void efi_native_runtime_setup(void); #define EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID EFI_GUID(0x8b843e20, 0x8132, 0x4852, 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c) #define EFI_DEVICE_PATH_FROM_TEXT_PROTOCOL_GUID EFI_GUID(0x05c99a21, 0xc70f, 0x4ad2, 0x8a, 0x5f, 0x35, 0xdf, 0x33, 0x43, 0xf5, 0x1e) #define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a) +#define EFI_EDID_DISCOVERED_PROTOCOL_GUID EFI_GUID(0x1c0c34f6, 0xd380, 0x41fa, 0xa0, 0x49, 0x8a, 0xd0, 0x6c, 0x1a, 0x66, 0xaa) +#define EFI_EDID_ACTIVE_PROTOCOL_GUID EFI_GUID(0xbd8c1056, 0x9f36, 0x44ec, 0x92, 0xa8, 0xa6, 0x33, 0x7f, 0x81, 0x79, 0x86) #define EFI_PCI_IO_PROTOCOL_GUID EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a) #define EFI_FILE_INFO_ID EFI_GUID(0x09576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_SYSTEM_RESOURCE_TABLE_GUID EFI_GUID(0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80) @@ -772,7 +774,7 @@ extern unsigned long efi_mem_attr_table; */ typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *, bool); -extern int efi_memattr_init(void); +extern void efi_memattr_init(void); extern int efi_memattr_apply_permissions(struct mm_struct *mm, efi_memattr_perm_setter fn); diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h new file mode 100644 index 000000000000..6a9a1acb5aad --- /dev/null +++ b/include/linux/generic_pt/common.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __GENERIC_PT_COMMON_H +#define __GENERIC_PT_COMMON_H + +#include <linux/types.h> +#include <linux/build_bug.h> +#include <linux/bits.h> + +/** + * DOC: Generic Radix Page Table + * + * Generic Radix Page Table is a set of functions and helpers to efficiently + * parse radix style page tables typically seen in HW implementations. The + * interface is built to deliver similar code generation as the mm's pte/pmd/etc + * system by fully inlining the exact code required to handle each table level. + * + * Like the mm subsystem each format contributes its parsing implementation + * under common names and the common code implements the required algorithms. + * + * The system is divided into three logical levels: + * + * - The page table format and its manipulation functions + * - Generic helpers to give a consistent API regardless of underlying format + * - An algorithm implementation (e.g. IOMMU/DRM/KVM/MM) + * + * Multiple implementations are supported. The intention is to have the generic + * format code be re-usable for whatever specialized implementation is required. + * The generic code is solely about the format of the radix tree; it does not + * include memory allocation or higher level decisions that are left for the + * implementation. + * + * The generic framework supports a superset of functions across many HW + * implementations: + * + * - Entries comprised of contiguous blocks of IO PTEs for larger page sizes + * - Multi-level tables, up to 6 levels. Runtime selected top level + * - Runtime variable table level size (ARM's concatenated tables) + * - Expandable top level allowing dynamic sizing of table levels + * - Optional leaf entries at any level + * - 32-bit/64-bit virtual and output addresses, using every address bit + * - Dirty tracking + * - Sign extended addressing + */ + +/** + * struct pt_common - struct for all page table implementations + */ +struct pt_common { + /** + * @top_of_table: Encodes the table top pointer and the top level in a + * single value. Must use READ_ONCE/WRITE_ONCE to access it. The lower + * bits of the aligned table pointer are used for the level. + */ + uintptr_t top_of_table; + /** + * @max_oasz_lg2: Maximum number of bits the OA can contain. Upper bits + * must be zero. This may be less than what the page table format + * supports, but must not be more. + */ + u8 max_oasz_lg2; + /** + * @max_vasz_lg2: Maximum number of bits the VA can contain. Upper bits + * are 0 or 1 depending on pt_full_va_prefix(). This may be less than + * what the page table format supports, but must not be more. When + * PT_FEAT_DYNAMIC_TOP is set this reflects the maximum VA capability. + */ + u8 max_vasz_lg2; + /** + * @features: Bitmap of `enum pt_features` + */ + unsigned int features; +}; + +/* Encoding parameters for top_of_table */ +enum { + PT_TOP_LEVEL_BITS = 3, + PT_TOP_LEVEL_MASK = GENMASK(PT_TOP_LEVEL_BITS - 1, 0), +}; + +/** + * enum pt_features - Features turned on in the table. Each symbol is a bit + * position. + */ +enum pt_features { + /** + * @PT_FEAT_DMA_INCOHERENT: Cache flush page table memory before + * assuming the HW can read it. Otherwise a SMP release is sufficient + * for HW to read it. + */ + PT_FEAT_DMA_INCOHERENT, + /** + * @PT_FEAT_FULL_VA: The table can span the full VA range from 0 to + * PT_VADDR_MAX. + */ + PT_FEAT_FULL_VA, + /** + * @PT_FEAT_DYNAMIC_TOP: The table's top level can be increased + * dynamically during map. This requires HW support for atomically + * setting both the table top pointer and the starting table level. + */ + PT_FEAT_DYNAMIC_TOP, + /** + * @PT_FEAT_SIGN_EXTEND: The top most bit of the valid VA range sign + * extends up to the full pt_vaddr_t. This divides the page table into + * three VA ranges:: + * + * 0 -> 2^N - 1 Lower + * 2^N -> (MAX - 2^N - 1) Non-Canonical + * MAX - 2^N -> MAX Upper + * + * In this mode pt_common::max_vasz_lg2 includes the sign bit and the + * upper bits that don't fall within the translation are just validated. + * + * If not set there is no sign extension and valid VA goes from 0 to 2^N + * - 1. + */ + PT_FEAT_SIGN_EXTEND, + /** + * @PT_FEAT_FLUSH_RANGE: IOTLB maintenance is done by flushing IOVA + * ranges which will clean out any walk cache or any IOPTE fully + * contained by the range. The optimization objective is to minimize the + * number of flushes even if ranges include IOVA gaps that do not need + * to be flushed. + */ + PT_FEAT_FLUSH_RANGE, + /** + * @PT_FEAT_FLUSH_RANGE_NO_GAPS: Like PT_FEAT_FLUSH_RANGE except that + * the optimization objective is to only flush IOVA that has been + * changed. This mode is suitable for cases like hypervisor shadowing + * where flushing unchanged ranges may cause the hypervisor to reparse + * significant amount of page table. + */ + PT_FEAT_FLUSH_RANGE_NO_GAPS, + /* private: */ + PT_FEAT_FMT_START, +}; + +struct pt_amdv1 { + struct pt_common common; +}; + +enum { + /* + * The memory backing the tables is encrypted. Use __sme_set() to adjust + * the page table pointers in the tree. This only works with + * CONFIG_AMD_MEM_ENCRYPT. + */ + PT_FEAT_AMDV1_ENCRYPT_TABLES = PT_FEAT_FMT_START, + /* + * The PTEs are set to prevent cache incoherent traffic, such as PCI no + * snoop. This is set either at creation time or before the first map + * operation. + */ + PT_FEAT_AMDV1_FORCE_COHERENCE, +}; + +struct pt_vtdss { + struct pt_common common; +}; + +enum { + /* + * The PTEs are set to prevent cache incoherent traffic, such as PCI no + * snoop. This is set either at creation time or before the first map + * operation. + */ + PT_FEAT_VTDSS_FORCE_COHERENCE = PT_FEAT_FMT_START, + /* + * Prevent creating read-only PTEs. Used to work around HW errata + * ERRATA_772415_SPR17. + */ + PT_FEAT_VTDSS_FORCE_WRITEABLE, +}; + +struct pt_x86_64 { + struct pt_common common; +}; + +enum { + /* + * The memory backing the tables is encrypted. Use __sme_set() to adjust + * the page table pointers in the tree. This only works with + * CONFIG_AMD_MEM_ENCRYPT. + */ + PT_FEAT_X86_64_AMD_ENCRYPT_TABLES = PT_FEAT_FMT_START, +}; + +#endif diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h new file mode 100644 index 000000000000..9eefbb74efd0 --- /dev/null +++ b/include/linux/generic_pt/iommu.h @@ -0,0 +1,293 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __GENERIC_PT_IOMMU_H +#define __GENERIC_PT_IOMMU_H + +#include <linux/generic_pt/common.h> +#include <linux/iommu.h> +#include <linux/mm_types.h> + +struct iommu_iotlb_gather; +struct pt_iommu_ops; +struct pt_iommu_driver_ops; +struct iommu_dirty_bitmap; + +/** + * DOC: IOMMU Radix Page Table + * + * The IOMMU implementation of the Generic Page Table provides an ops struct + * that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and + * the generic map/unmap interface. + * + * This interface uses a caller provided locking approach. The caller must have + * a VA range lock concept that prevents concurrent threads from calling ops on + * the same VA. Generally the range lock must be at least as large as a single + * map call. + */ + +/** + * struct pt_iommu - Base structure for IOMMU page tables + * + * The format-specific struct will include this as the first member. + */ +struct pt_iommu { + /** + * @domain: The core IOMMU domain. The driver should use a union to + * overlay this memory with its previously existing domain struct to + * create an alias. + */ + struct iommu_domain domain; + + /** + * @ops: Function pointers to access the API + */ + const struct pt_iommu_ops *ops; + + /** + * @driver_ops: Function pointers provided by the HW driver to help + * manage HW details like caches. + */ + const struct pt_iommu_driver_ops *driver_ops; + + /** + * @nid: Node ID to use for table memory allocations. The IOMMU driver + * may want to set the NID to the device's NID, if there are multiple + * table walkers. + */ + int nid; + + /** + * @iommu_device: Device pointer used for any DMA cache flushing when + * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the + * page table which must have dma ops that perform cache flushing. + */ + struct device *iommu_device; +}; + +/** + * struct pt_iommu_info - Details about the IOMMU page table + * + * Returned from pt_iommu_ops->get_info() + */ +struct pt_iommu_info { + /** + * @pgsize_bitmap: A bitmask where each set bit indicates + * a page size that can be natively stored in the page table. + */ + u64 pgsize_bitmap; +}; + +struct pt_iommu_ops { + /** + * @set_dirty: Make the iova write dirty + * @iommu_table: Table to manipulate + * @iova: IO virtual address to start + * + * This is only used by iommufd testing. It makes the iova dirty so that + * read_and_clear_dirty() will see it as dirty. Unlike all the other ops + * this one is safe to call without holding any locking. It may return + * -EAGAIN if there is a race. + */ + int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova); + + /** + * @get_info: Return the pt_iommu_info structure + * @iommu_table: Table to query + * + * Return some basic static information about the page table. + */ + void (*get_info)(struct pt_iommu *iommu_table, + struct pt_iommu_info *info); + + /** + * @deinit: Undo a format specific init operation + * @iommu_table: Table to destroy + * + * Release all of the memory. The caller must have already removed the + * table from all HW access and all caches. + */ + void (*deinit)(struct pt_iommu *iommu_table); +}; + +/** + * struct pt_iommu_driver_ops - HW IOTLB cache flushing operations + * + * The IOMMU driver should implement these using container_of(iommu_table) to + * get to it's iommu_domain derived structure. All ops can be called in atomic + * contexts as they are buried under DMA API calls. + */ +struct pt_iommu_driver_ops { + /** + * @change_top: Update the top of table pointer + * @iommu_table: Table to operate on + * @top_paddr: New CPU physical address of the top pointer + * @top_level: IOMMU PT level of the new top + * + * Called under the get_top_lock() spinlock. The driver must update all + * HW references to this domain with a new top address and + * configuration. On return mappings placed in the new top must be + * reachable by the HW. + * + * top_level encodes the level in IOMMU PT format, level 0 is the + * smallest page size increasing from there. This has to be translated + * to any HW specific format. During this call the new top will not be + * visible to any other API. + * + * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if + * enabled. + */ + void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr, + unsigned int top_level); + + /** + * @get_top_lock: lock to hold when changing the table top + * @iommu_table: Table to operate on + * + * Return a lock to hold when changing the table top page table from + * being stored in HW. The lock will be held prior to calling + * change_top() and released once the top is fully visible. + * + * Typically this would be a lock that protects the iommu_domain's + * attachment list. + * + * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if + * enabled. + */ + spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table); +}; + +static inline void pt_iommu_deinit(struct pt_iommu *iommu_table) +{ + /* + * It is safe to call pt_iommu_deinit() before an init, or if init + * fails. The ops pointer will only become non-NULL if deinit needs to be + * run. + */ + if (iommu_table->ops) + iommu_table->ops->deinit(iommu_table); +} + +/** + * struct pt_iommu_cfg - Common configuration values for all formats + */ +struct pt_iommu_cfg { + /** + * @features: Features required. Only these features will be turned on. + * The feature list should reflect what the IOMMU HW is capable of. + */ + unsigned int features; + /** + * @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will + * imply the top level of the table. + */ + u8 hw_max_vasz_lg2; + /** + * @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format + * might select a lower maximum OA. + */ + u8 hw_max_oasz_lg2; +}; + +/* Generate the exported function signatures from iommu_pt.h */ +#define IOMMU_PROTOTYPES(fmt) \ + phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \ + dma_addr_t iova); \ + int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \ + unsigned long iova, phys_addr_t paddr, \ + size_t pgsize, size_t pgcount, \ + int prot, gfp_t gfp, size_t *mapped); \ + size_t pt_iommu_##fmt##_unmap_pages( \ + struct iommu_domain *domain, unsigned long iova, \ + size_t pgsize, size_t pgcount, \ + struct iommu_iotlb_gather *iotlb_gather); \ + int pt_iommu_##fmt##_read_and_clear_dirty( \ + struct iommu_domain *domain, unsigned long iova, size_t size, \ + unsigned long flags, struct iommu_dirty_bitmap *dirty); \ + int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \ + const struct pt_iommu_##fmt##_cfg *cfg, \ + gfp_t gfp); \ + void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, \ + struct pt_iommu_##fmt##_hw_info *info) +#define IOMMU_FORMAT(fmt, member) \ + struct pt_iommu_##fmt { \ + struct pt_iommu iommu; \ + struct pt_##fmt member; \ + }; \ + IOMMU_PROTOTYPES(fmt) + +/* + * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the + * iommu_pt + */ +#define IOMMU_PT_DOMAIN_OPS(fmt) \ + .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \ + .map_pages = &pt_iommu_##fmt##_map_pages, \ + .unmap_pages = &pt_iommu_##fmt##_unmap_pages +#define IOMMU_PT_DIRTY_OPS(fmt) \ + .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty + +/* + * The driver should setup its domain struct like + * union { + * struct iommu_domain domain; + * struct pt_iommu_xxx xx; + * }; + * PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain); + * + * Which creates an alias between driver_domain.domain and + * driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing + * driver_domain.domain users. + */ +#define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \ + static_assert(offsetof(s, pt_iommu_memb.domain) == \ + offsetof(s, domain_memb)) + +struct pt_iommu_amdv1_cfg { + struct pt_iommu_cfg common; + unsigned int starting_level; +}; + +struct pt_iommu_amdv1_hw_info { + u64 host_pt_root; + u8 mode; +}; + +IOMMU_FORMAT(amdv1, amdpt); + +/* amdv1_mock is used by the iommufd selftest */ +#define pt_iommu_amdv1_mock pt_iommu_amdv1 +#define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg +struct pt_iommu_amdv1_mock_hw_info; +IOMMU_PROTOTYPES(amdv1_mock); + +struct pt_iommu_vtdss_cfg { + struct pt_iommu_cfg common; + /* 4 is a 57 bit 5 level table */ + unsigned int top_level; +}; + +struct pt_iommu_vtdss_hw_info { + u64 ssptptr; + u8 aw; +}; + +IOMMU_FORMAT(vtdss, vtdss_pt); + +struct pt_iommu_x86_64_cfg { + struct pt_iommu_cfg common; + /* 4 is a 57 bit 5 level table */ + unsigned int top_level; +}; + +struct pt_iommu_x86_64_hw_info { + u64 gcr3_pt; + u8 levels; +}; + +IOMMU_FORMAT(x86_64, x86_64_pt); + +#undef IOMMU_PROTOTYPES +#undef IOMMU_FORMAT +#endif diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index c4690e365ade..ca1ec437a3ca 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -99,6 +99,9 @@ #define QM_DEV_ALG_MAX_LEN 256 +#define QM_MIG_REGION_SEL 0x100198 +#define QM_MIG_REGION_EN BIT(0) + /* uacce mode of the driver */ #define UACCE_MODE_NOUACCE 0 /* don't use uacce */ #define UACCE_MODE_SVA 1 /* use uacce sva mode */ diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 8a823c6f2b4a..7a1516011ccf 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -15,8 +15,6 @@ enum io_pgtable_fmt { ARM_64_LPAE_S2, ARM_V7S, ARM_MALI_LPAE, - AMD_IOMMU_V1, - AMD_IOMMU_V2, APPLE_DART, APPLE_DART2, IO_PGTABLE_NUM_FMTS, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c30d12e16473..801b2bd9e8d4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -751,7 +751,8 @@ struct iommu_ops { * @free: Release the domain after use. */ struct iommu_domain_ops { - int (*attach_dev)(struct iommu_domain *domain, struct device *dev); + int (*attach_dev)(struct iommu_domain *domain, struct device *dev, + struct iommu_domain *old); int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid, struct iommu_domain *old); diff --git a/include/linux/irqchip/riscv-imsic.h b/include/linux/irqchip/riscv-imsic.h index 7494952c5518..7f3ff5c5ea53 100644 --- a/include/linux/irqchip/riscv-imsic.h +++ b/include/linux/irqchip/riscv-imsic.h @@ -10,7 +10,6 @@ #include <linux/bitops.h> #include <linux/device.h> #include <linux/fwnode.h> -#include <asm/csr.h> #define IMSIC_MMIO_PAGE_SHIFT 12 #define IMSIC_MMIO_PAGE_SZ BIT(IMSIC_MMIO_PAGE_SHIFT) @@ -86,7 +85,7 @@ static inline const struct imsic_global_config *imsic_get_global_config(void) #endif -#ifdef CONFIG_ACPI +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_RISCV_IMSIC) int imsic_platform_acpi_probe(struct fwnode_handle *fwnode); struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev); #else diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 2e85504ba2ba..48f68c4dcfa5 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -115,8 +115,8 @@ struct pci_epf_driver { * @phys_addr: physical address that should be mapped to the BAR * @addr: virtual address corresponding to the @phys_addr * @size: the size of the address space present in BAR - * @aligned_size: the size actually allocated to accommodate the iATU alignment - * requirement + * @mem_size: the size actually allocated to accommodate the iATU alignment + * requirement * @barno: BAR number * @flags: flags that are set for the BAR */ @@ -124,7 +124,7 @@ struct pci_epf_bar { dma_addr_t phys_addr; void *addr; size_t size; - size_t aligned_size; + size_t mem_size; enum pci_barno barno; int flags; }; @@ -242,6 +242,12 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, enum pci_epc_interface_type type); +int pci_epf_assign_bar_space(struct pci_epf *epf, size_t size, + enum pci_barno bar, + const struct pci_epc_features *epc_features, + enum pci_epc_interface_type type, + dma_addr_t bar_addr); + int pci_epf_align_inbound_addr(struct pci_epf *epf, enum pci_barno bar, u64 addr, dma_addr_t *base, size_t *off); int pci_epf_bind(struct pci_epf *epf); diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 951f81a38f3a..517e121d2598 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -16,7 +16,58 @@ struct block_device; struct scatterlist; +/** + * struct p2pdma_provider + * + * A p2pdma provider is a range of MMIO address space available to the CPU. + */ +struct p2pdma_provider { + struct device *owner; + u64 bus_offset; +}; + +enum pci_p2pdma_map_type { + /* + * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before + * the mapping type has been calculated. Exported routines for the API + * will never return this value. + */ + PCI_P2PDMA_MAP_UNKNOWN = 0, + + /* + * Not a PCI P2PDMA transfer. + */ + PCI_P2PDMA_MAP_NONE, + + /* + * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will + * traverse the host bridge and the host bridge is not in the + * allowlist. DMA Mapping routines should return an error when + * this is returned. + */ + PCI_P2PDMA_MAP_NOT_SUPPORTED, + + /* + * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to + * each other directly through a PCI switch and the transaction will + * not traverse the host bridge. Such a mapping should program + * the DMA engine with PCI bus addresses. + */ + PCI_P2PDMA_MAP_BUS_ADDR, + + /* + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk + * to each other, but the transaction traverses a host bridge on the + * allowlist. In this case, a normal mapping either with CPU physical + * addresses (in the case of dma-direct) or IOVA addresses (in the + * case of IOMMUs) should be used to program the DMA engine. + */ + PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, +}; + #ifdef CONFIG_PCI_P2PDMA +int pcim_p2pdma_init(struct pci_dev *pdev); +struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev, int bar); int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset); int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, @@ -33,7 +84,18 @@ int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev, bool *use_p2pdma); ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev, bool use_p2pdma); +enum pci_p2pdma_map_type pci_p2pdma_map_type(struct p2pdma_provider *provider, + struct device *dev); #else /* CONFIG_PCI_P2PDMA */ +static inline int pcim_p2pdma_init(struct pci_dev *pdev) +{ + return -EOPNOTSUPP; +} +static inline struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev, + int bar) +{ + return NULL; +} static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset) { @@ -85,6 +147,11 @@ static inline ssize_t pci_p2pdma_enable_show(char *page, { return sprintf(page, "none\n"); } +static inline enum pci_p2pdma_map_type +pci_p2pdma_map_type(struct p2pdma_provider *provider, struct device *dev) +{ + return PCI_P2PDMA_MAP_NOT_SUPPORTED; +} #endif /* CONFIG_PCI_P2PDMA */ @@ -99,51 +166,12 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client) return pci_p2pmem_find_many(&client, 1); } -enum pci_p2pdma_map_type { - /* - * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before - * the mapping type has been calculated. Exported routines for the API - * will never return this value. - */ - PCI_P2PDMA_MAP_UNKNOWN = 0, - - /* - * Not a PCI P2PDMA transfer. - */ - PCI_P2PDMA_MAP_NONE, - - /* - * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will - * traverse the host bridge and the host bridge is not in the - * allowlist. DMA Mapping routines should return an error when - * this is returned. - */ - PCI_P2PDMA_MAP_NOT_SUPPORTED, - - /* - * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to - * each other directly through a PCI switch and the transaction will - * not traverse the host bridge. Such a mapping should program - * the DMA engine with PCI bus addresses. - */ - PCI_P2PDMA_MAP_BUS_ADDR, - - /* - * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk - * to each other, but the transaction traverses a host bridge on the - * allowlist. In this case, a normal mapping either with CPU physical - * addresses (in the case of dma-direct) or IOVA addresses (in the - * case of IOMMUs) should be used to program the DMA engine. - */ - PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, -}; - struct pci_p2pdma_map_state { - struct dev_pagemap *pgmap; + struct p2pdma_provider *mem; enum pci_p2pdma_map_type map; - u64 bus_off; }; + /* helper for pci_p2pdma_state(), do not use directly */ void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, struct device *dev, struct page *page); @@ -162,8 +190,7 @@ pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev, struct page *page) { if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { - if (state->pgmap != page_pgmap(page)) - __pci_p2pdma_update_state(state, dev, page); + __pci_p2pdma_update_state(state, dev, page); return state->map; } return PCI_P2PDMA_MAP_NONE; @@ -172,16 +199,15 @@ pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev, /** * pci_p2pdma_bus_addr_map - Translate a physical address to a bus address * for a PCI_P2PDMA_MAP_BUS_ADDR transfer. - * @state: P2P state structure + * @provider: P2P provider structure * @paddr: physical address to map * * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer. */ static inline dma_addr_t -pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr) +pci_p2pdma_bus_addr_map(struct p2pdma_provider *provider, phys_addr_t paddr) { - WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR); - return paddr + state->bus_off; + return paddr + provider->bus_offset; } #endif /* _LINUX_PCI_P2P_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index bf97d49c23cf..b16127c6a7b4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -502,6 +502,8 @@ struct pci_dev { #ifdef CONFIG_PCIE_PTM u16 ptm_cap; /* PTM Capability */ unsigned int ptm_root:1; + unsigned int ptm_responder:1; + unsigned int ptm_requester:1; unsigned int ptm_enabled:1; u8 ptm_granularity; #endif @@ -648,6 +650,7 @@ struct pci_host_bridge *pci_alloc_host_bridge(size_t priv); struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev, size_t priv); void pci_free_host_bridge(struct pci_host_bridge *bridge); +struct device *pci_get_host_bridge_device(struct pci_dev *dev); struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus); void pci_set_host_bridge_release(struct pci_host_bridge *bridge, @@ -831,6 +834,7 @@ struct pci_ops { void __iomem *(*map_bus)(struct pci_bus *bus, unsigned int devfn, int where); int (*read)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val); int (*write)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val); + int (*assert_perst)(struct pci_bus *bus, bool assert); }; /* @@ -1421,16 +1425,16 @@ void pcibios_reset_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_release_resource(struct pci_dev *dev, int resno); -static inline int pci_rebar_bytes_to_size(u64 bytes) -{ - bytes = roundup_pow_of_two(bytes); - /* Return BAR size as defined in the resizable BAR specification */ - return max(ilog2(bytes), 20) - 20; -} +/* Resizable BAR related routines */ +int pci_rebar_bytes_to_size(u64 bytes); +resource_size_t pci_rebar_size_to_bytes(int size); +u64 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar); +bool pci_rebar_size_supported(struct pci_dev *pdev, int bar, int size); +int pci_rebar_get_max_size(struct pci_dev *pdev, int bar); +int __must_check pci_resize_resource(struct pci_dev *dev, int i, int size, + int exclude_bars); -u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar); -int __must_check pci_resize_resource(struct pci_dev *dev, int i, int size); int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); void pci_ignore_hotplug(struct pci_dev *dev); @@ -1958,10 +1962,17 @@ DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) */ #ifdef CONFIG_PCI_DOMAINS extern int pci_domains_supported; +int pci_bus_find_emul_domain_nr(u32 hint, u32 min, u32 max); +void pci_bus_release_emul_domain_nr(int domain_nr); #else enum { pci_domains_supported = 0 }; static inline int pci_domain_nr(struct pci_bus *bus) { return 0; } static inline int pci_proc_domain(struct pci_bus *bus) { return 0; } +static inline int pci_bus_find_emul_domain_nr(u32 hint, u32 min, u32 max) +{ + return 0; +} +static inline void pci_bus_release_emul_domain_nr(int domain_nr) { } #endif /* CONFIG_PCI_DOMAINS */ /* diff --git a/include/linux/power/max77705_charger.h b/include/linux/power/max77705_charger.h index 6653abfdf747..b3950ce0625e 100644 --- a/include/linux/power/max77705_charger.h +++ b/include/linux/power/max77705_charger.h @@ -123,6 +123,8 @@ #define MAX77705_DISABLE_SKIP 1 #define MAX77705_AUTO_SKIP 0 +#define AICL_WORK_DELAY_MS 100 + /* uA */ #define MAX77705_CURRENT_CHGIN_STEP 25000 #define MAX77705_CURRENT_CHG_STEP 50000 diff --git a/include/linux/ras.h b/include/linux/ras.h index a64182bc72ad..468941bfe855 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -24,8 +24,7 @@ int __init parse_cec_param(char *str); void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); -void log_arm_hw_error(struct cper_sec_proc_arm *err); - +void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev); #else static inline void log_non_standard_event(const guid_t *sec_type, @@ -33,7 +32,7 @@ log_non_standard_event(const guid_t *sec_type, const u8 sev, const u8 *err, const u32 len) { return; } static inline void -log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } +log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; } #endif struct atl_err { @@ -53,4 +52,15 @@ static inline unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } #endif /* CONFIG_AMD_ATL */ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#include <asm/smp_plat.h> +/* + * Include ARM-specific SMP header which provides a function mapping mpidr to + * CPU logical index. + */ +#define GET_LOGICAL_INDEX(mpidr) get_logical_index(mpidr & MPIDR_HWID_BITMASK) +#else +#define GET_LOGICAL_INDEX(mpidr) -EINVAL +#endif /* CONFIG_ARM || CONFIG_ARM64 */ + #endif /* __RAS_H__ */ diff --git a/include/linux/sizes.h b/include/linux/sizes.h index 49039494076f..f1f1a055b047 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -67,5 +67,6 @@ #define SZ_16T _AC(0x100000000000, ULL) #define SZ_32T _AC(0x200000000000, ULL) #define SZ_64T _AC(0x400000000000, ULL) +#define SZ_128T _AC(0x800000000000, ULL) #endif /* __LINUX_SIZES_H__ */ diff --git a/include/linux/tpm.h b/include/linux/tpm.h index b15360ff78d7..3d8f7d1ce2b8 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -26,7 +26,9 @@ #include <crypto/aes.h> #define TPM_DIGEST_SIZE 20 /* Max TPM v1.2 PCR size */ -#define TPM_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE + +#define TPM2_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE +#define TPM2_MAX_PCR_BANKS 8 struct tpm_chip; struct trusted_key_payload; @@ -68,7 +70,7 @@ enum tpm2_curves { struct tpm_digest { u16 alg_id; - u8 digest[TPM_MAX_DIGEST_SIZE]; + u8 digest[TPM2_MAX_DIGEST_SIZE]; } __packed; struct tpm_bank_info { @@ -189,7 +191,7 @@ struct tpm_chip { unsigned int groups_cnt; u32 nr_allocated_banks; - struct tpm_bank_info *allocated_banks; + struct tpm_bank_info allocated_banks[TPM2_MAX_PCR_BANKS]; #ifdef CONFIG_ACPI acpi_handle acpi_dev_handle; char ppi_version[TPM_PPI_VERSION_LEN + 1]; @@ -454,8 +456,10 @@ static inline ssize_t tpm_ret_to_err(ssize_t ret) return 0; case TPM2_RC_SESSION_MEMORY: return -ENOMEM; + case TPM2_RC_HASH: + return -EINVAL; default: - return -EFAULT; + return -EPERM; } } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index eb563f538dee..e90859956514 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -21,6 +21,7 @@ struct kvm; struct iommufd_ctx; struct iommufd_device; struct iommufd_access; +struct vfio_info_cap; /* * VFIO devices can be placed in a set, this allows all devices to share this @@ -132,6 +133,9 @@ struct vfio_device_ops { size_t count, loff_t *size); long (*ioctl)(struct vfio_device *vdev, unsigned int cmd, unsigned long arg); + int (*get_region_info_caps)(struct vfio_device *vdev, + struct vfio_region_info *info, + struct vfio_info_cap *caps); int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); @@ -297,6 +301,8 @@ static inline void vfio_put_device(struct vfio_device *device) int vfio_register_group_dev(struct vfio_device *device); int vfio_register_emulated_iommu_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); +bool vfio_device_try_get_registration(struct vfio_device *device); +void vfio_device_put_registration(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index f541044e42a2..706877f998ff 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -12,6 +12,7 @@ #include <linux/pci.h> #include <linux/vfio.h> #include <linux/irqbypass.h> +#include <linux/rcupdate.h> #include <linux/types.h> #include <linux/uuid.h> #include <linux/notifier.h> @@ -26,6 +27,14 @@ struct vfio_pci_core_device; struct vfio_pci_region; +struct p2pdma_provider; +struct dma_buf_phys_vec; +struct dma_buf_attachment; + +struct vfio_pci_eventfd { + struct eventfd_ctx *ctx; + struct rcu_head rcu; +}; struct vfio_pci_regops { ssize_t (*rw)(struct vfio_pci_core_device *vdev, char __user *buf, @@ -49,9 +58,48 @@ struct vfio_pci_region { u32 flags; }; +struct vfio_pci_device_ops { + int (*get_dmabuf_phys)(struct vfio_pci_core_device *vdev, + struct p2pdma_provider **provider, + unsigned int region_index, + struct dma_buf_phys_vec *phys_vec, + struct vfio_region_dma_range *dma_ranges, + size_t nr_ranges); +}; + +#if IS_ENABLED(CONFIG_VFIO_PCI_DMABUF) +int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, + struct vfio_region_dma_range *dma_ranges, + size_t nr_ranges, phys_addr_t start, + phys_addr_t len); +int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev, + struct p2pdma_provider **provider, + unsigned int region_index, + struct dma_buf_phys_vec *phys_vec, + struct vfio_region_dma_range *dma_ranges, + size_t nr_ranges); +#else +static inline int +vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, + struct vfio_region_dma_range *dma_ranges, + size_t nr_ranges, phys_addr_t start, + phys_addr_t len) +{ + return -EINVAL; +} +static inline int vfio_pci_core_get_dmabuf_phys( + struct vfio_pci_core_device *vdev, struct p2pdma_provider **provider, + unsigned int region_index, struct dma_buf_phys_vec *phys_vec, + struct vfio_region_dma_range *dma_ranges, size_t nr_ranges) +{ + return -EOPNOTSUPP; +} +#endif + struct vfio_pci_core_device { struct vfio_device vdev; struct pci_dev *pdev; + const struct vfio_pci_device_ops *pci_ops; void __iomem *barmap[PCI_STD_NUM_BARS]; bool bar_mmap_supported[PCI_STD_NUM_BARS]; u8 *pci_config_map; @@ -83,8 +131,8 @@ struct vfio_pci_core_device { struct pci_saved_state *pci_saved_state; struct pci_saved_state *pm_save; int ioeventfds_nr; - struct eventfd_ctx *err_trigger; - struct eventfd_ctx *req_trigger; + struct vfio_pci_eventfd __rcu *err_trigger; + struct vfio_pci_eventfd __rcu *req_trigger; struct eventfd_ctx *pm_wake_eventfd_ctx; struct list_head dummy_resources_list; struct mutex ioeventfds_lock; @@ -94,6 +142,7 @@ struct vfio_pci_core_device { struct vfio_pci_core_device *sriov_pf_core_dev; struct notifier_block nb; struct rw_semaphore memory_lock; + struct list_head dmabufs; }; /* Will be exported for vfio pci drivers usage */ @@ -115,10 +164,16 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg); int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); +int vfio_pci_ioctl_get_region_info(struct vfio_device *core_vdev, + struct vfio_region_info *info, + struct vfio_info_cap *caps); ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, size_t count, loff_t *ppos); ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, size_t count, loff_t *ppos); +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev, + struct vm_fault *vmf, unsigned long pfn, + unsigned int order); int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); @@ -134,6 +189,7 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start, size_t x_end, bool iswrite); +bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt, loff_t reg_start, size_t reg_cnt, loff_t *buf_offset, @@ -161,4 +217,17 @@ VFIO_IOREAD_DECLARATION(32) VFIO_IOREAD_DECLARATION(64) #endif +static inline bool is_aligned_for_order(struct vm_area_struct *vma, + unsigned long addr, + unsigned long pfn, + unsigned int order) +{ + return !(order && (addr < vma->vm_start || + addr + (PAGE_SIZE << order) > vma->vm_end || + !IS_ALIGNED(pfn, 1 << order))); +} + +int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, + struct dma_buf_phys_vec *phys); + #endif /* VFIO_PCI_CORE_H */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 96c66126c074..132a474e5914 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -177,7 +177,7 @@ struct virtio_device { union virtio_map vmap; #ifdef CONFIG_VIRTIO_DEBUG struct dentry *debugfs_dir; - u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS]; + u64 debugfs_filter_features[VIRTIO_FEATURES_U64S]; #endif }; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 16001e9f9b39..69f84ea85d71 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -24,7 +24,7 @@ typedef void vq_callback_t(struct virtqueue *); * a virtqueue unused by the driver. * @callback: A callback to invoke on a used buffer notification. * NULL for a virtqueue that does not need a callback. - * @ctx: A flag to indicate to maintain an extra context per virtqueue. + * @ctx: whether to maintain an extra context per virtqueue. */ struct virtqueue_info { const char *name; @@ -80,13 +80,13 @@ struct virtqueue_info { * Returns the first 64 feature bits. * @get_extended_features: * vdev: the virtio_device - * Returns the first VIRTIO_FEATURES_MAX feature bits (all we currently + * Returns the first VIRTIO_FEATURES_BITS feature bits (all we currently * need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device * This sends the driver feature bits to the device: it can change * the dev->feature bits if it wants. - * Note that despite the name this can be called any number of + * Note that despite the name this can be called any number of * times. * Returns 0 on success or error status * @bus_name: return the bus name associated with the device (optional) @@ -141,8 +141,8 @@ struct virtio_config_ops { /** * struct virtio_map_ops - operations for mapping buffer for a virtio device - * Note: For transport that has its own mapping logic it must - * implements all of the operations + * Note: For a transport that has its own mapping logic it must + * implement all of the operations * @map_page: map a buffer to the device * map: metadata for performing mapping * page: the page that will be mapped by the device @@ -150,7 +150,7 @@ struct virtio_config_ops { * size: the buffer size * dir: mapping direction * attrs: mapping attributes - * Returns: the mapped address + * Returns the mapped address * @unmap_page: unmap a buffer from the device * map: device specific mapping map * map_handle: the mapped address @@ -172,23 +172,23 @@ struct virtio_config_ops { * size: the size of the buffer * map_handle: the mapping address to sync * gfp: allocation flag (GFP_XXX) - * Returns: virtual address of the allocated buffer + * Returns virtual address of the allocated buffer * @free: free a coherent buffer mapping * map: metadata for performing mapping * size: the size of the buffer * vaddr: virtual address of the buffer - * map_handle: the mapping address to sync + * map_handle: the mapping address that needs to be freed * attrs: unmapping attributes * @need_sync: if the buffer needs synchronization * map: metadata for performing mapping * map_handle: the mapped address - * Returns: whether the buffer needs synchronization + * Returns whether the buffer needs synchronization * @mapping_error: if the mapping address is error * map: metadata for performing mapping * map_handle: the mapped address * @max_mapping_size: get the maximum buffer size that can be mapped * map: metadata for performing mapping - * Returns: the maximum buffer size that can be mapped + * Returns the maximum buffer size that can be mapped */ struct virtio_map_ops { dma_addr_t (*map_page)(union virtio_map map, struct page *page, @@ -362,7 +362,7 @@ void virtio_device_ready(struct virtio_device *dev) * specific set_status() method. * * A well behaved device will only notify a virtqueue after - * DRIVER_OK, this means the device should "see" the coherenct + * DRIVER_OK, this means the device should "see" the coherent * memory write that set vq->broken as false which is done by * the driver when it sees DRIVER_OK, then the following * driver's vring_interrupt() will see vq->broken as false so @@ -384,7 +384,7 @@ const char *virtio_bus_name(struct virtio_device *vdev) * @vq: the virtqueue * @cpu_mask: the cpu mask * - * Pay attention the function are best-effort: the affinity hint may not be set + * Note that this function is best-effort: the affinity hint may not be set * due to config support, irq type and sharing. * */ diff --git a/include/linux/virtio_features.h b/include/linux/virtio_features.h index f748f2f87de8..ea2ad8717882 100644 --- a/include/linux/virtio_features.h +++ b/include/linux/virtio_features.h @@ -4,15 +4,16 @@ #include <linux/bits.h> -#define VIRTIO_FEATURES_DWORDS 2 -#define VIRTIO_FEATURES_MAX (VIRTIO_FEATURES_DWORDS * 64) -#define VIRTIO_FEATURES_WORDS (VIRTIO_FEATURES_DWORDS * 2) +#define VIRTIO_FEATURES_U64S 2 +#define VIRTIO_FEATURES_BITS (VIRTIO_FEATURES_U64S * 64) + #define VIRTIO_BIT(b) BIT_ULL((b) & 0x3f) -#define VIRTIO_DWORD(b) ((b) >> 6) +#define VIRTIO_U64(b) ((b) >> 6) + #define VIRTIO_DECLARE_FEATURES(name) \ union { \ u64 name; \ - u64 name##_array[VIRTIO_FEATURES_DWORDS];\ + u64 name##_array[VIRTIO_FEATURES_U64S];\ } static inline bool virtio_features_chk_bit(unsigned int bit) @@ -22,9 +23,9 @@ static inline bool virtio_features_chk_bit(unsigned int bit) * Don't care returning the correct value: the build * will fail before any bad features access */ - BUILD_BUG_ON(bit >= VIRTIO_FEATURES_MAX); + BUILD_BUG_ON(bit >= VIRTIO_FEATURES_BITS); } else { - if (WARN_ON_ONCE(bit >= VIRTIO_FEATURES_MAX)) + if (WARN_ON_ONCE(bit >= VIRTIO_FEATURES_BITS)) return false; } return true; @@ -34,26 +35,26 @@ static inline bool virtio_features_test_bit(const u64 *features, unsigned int bit) { return virtio_features_chk_bit(bit) && - !!(features[VIRTIO_DWORD(bit)] & VIRTIO_BIT(bit)); + !!(features[VIRTIO_U64(bit)] & VIRTIO_BIT(bit)); } static inline void virtio_features_set_bit(u64 *features, unsigned int bit) { if (virtio_features_chk_bit(bit)) - features[VIRTIO_DWORD(bit)] |= VIRTIO_BIT(bit); + features[VIRTIO_U64(bit)] |= VIRTIO_BIT(bit); } static inline void virtio_features_clear_bit(u64 *features, unsigned int bit) { if (virtio_features_chk_bit(bit)) - features[VIRTIO_DWORD(bit)] &= ~VIRTIO_BIT(bit); + features[VIRTIO_U64(bit)] &= ~VIRTIO_BIT(bit); } static inline void virtio_features_zero(u64 *features) { - memset(features, 0, sizeof(features[0]) * VIRTIO_FEATURES_DWORDS); + memset(features, 0, sizeof(features[0]) * VIRTIO_FEATURES_U64S); } static inline void virtio_features_from_u64(u64 *features, u64 from) @@ -66,7 +67,7 @@ static inline bool virtio_features_equal(const u64 *f1, const u64 *f2) { int i; - for (i = 0; i < VIRTIO_FEATURES_DWORDS; ++i) + for (i = 0; i < VIRTIO_FEATURES_U64S; ++i) if (f1[i] != f2[i]) return false; return true; @@ -74,14 +75,14 @@ static inline bool virtio_features_equal(const u64 *f1, const u64 *f2) static inline void virtio_features_copy(u64 *to, const u64 *from) { - memcpy(to, from, sizeof(to[0]) * VIRTIO_FEATURES_DWORDS); + memcpy(to, from, sizeof(to[0]) * VIRTIO_FEATURES_U64S); } static inline void virtio_features_andnot(u64 *to, const u64 *f1, const u64 *f2) { int i; - for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++) + for (i = 0; i < VIRTIO_FEATURES_U64S; i++) to[i] = f1[i] & ~f2[i]; } diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 48bc12d1045b..9a3f2fc53bd6 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -107,7 +107,7 @@ void vp_modern_set_extended_features(struct virtio_pci_modern_device *mdev, static inline u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev) { - u64 features_array[VIRTIO_FEATURES_DWORDS]; + u64 features_array[VIRTIO_FEATURES_U64S]; vp_modern_get_extended_features(mdev, features_array); return features_array[0]; @@ -116,11 +116,11 @@ vp_modern_get_features(struct virtio_pci_modern_device *mdev) static inline u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev) { - u64 features_array[VIRTIO_FEATURES_DWORDS]; + u64 features_array[VIRTIO_FEATURES_U64S]; int i; vp_modern_get_driver_extended_features(mdev, features_array); - for (i = 1; i < VIRTIO_FEATURES_DWORDS; ++i) + for (i = 1; i < VIRTIO_FEATURES_U64S; ++i) WARN_ON_ONCE(features_array[i]); return features_array[0]; } @@ -128,7 +128,7 @@ vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev) static inline void vp_modern_set_features(struct virtio_pci_modern_device *mdev, u64 features) { - u64 features_array[VIRTIO_FEATURES_DWORDS]; + u64 features_array[VIRTIO_FEATURES_U64S]; virtio_features_from_u64(features_array, features); vp_modern_set_extended_features(mdev, features_array); diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index c8cd0f00c845..c9f0b1018bcc 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -168,11 +168,25 @@ TRACE_EVENT(mc_event, * This event is generated when hardware detects an ARM processor error * has occurred. UEFI 2.6 spec section N.2.4.4. */ +#define APEIL "ARM Processor Err Info data len" +#define APEID "ARM Processor Err Info raw data" +#define APECIL "ARM Processor Err Context Info data len" +#define APECID "ARM Processor Err Context Info raw data" +#define VSEIL "Vendor Specific Err Info data len" +#define VSEID "Vendor Specific Err Info raw data" TRACE_EVENT(arm_event, - TP_PROTO(const struct cper_sec_proc_arm *proc), + TP_PROTO(const struct cper_sec_proc_arm *proc, + const u8 *pei_err, + const u32 pei_len, + const u8 *ctx_err, + const u32 ctx_len, + const u8 *oem, + const u32 oem_len, + u8 sev, + int cpu), - TP_ARGS(proc), + TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev, cpu), TP_STRUCT__entry( __field(u64, mpidr) @@ -180,6 +194,14 @@ TRACE_EVENT(arm_event, __field(u32, running_state) __field(u32, psci_state) __field(u8, affinity) + __field(u32, pei_len) + __dynamic_array(u8, pei_buf, pei_len) + __field(u32, ctx_len) + __dynamic_array(u8, ctx_buf, ctx_len) + __field(u32, oem_len) + __dynamic_array(u8, oem_buf, oem_len) + __field(u8, sev) + __field(int, cpu) ), TP_fast_assign( @@ -199,12 +221,29 @@ TRACE_EVENT(arm_event, __entry->running_state = ~0; __entry->psci_state = ~0; } + __entry->pei_len = pei_len; + memcpy(__get_dynamic_array(pei_buf), pei_err, pei_len); + __entry->ctx_len = ctx_len; + memcpy(__get_dynamic_array(ctx_buf), ctx_err, ctx_len); + __entry->oem_len = oem_len; + memcpy(__get_dynamic_array(oem_buf), oem, oem_len); + __entry->sev = sev; + __entry->cpu = cpu; ), - TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " - "running state: %d; PSCI state: %d", + TP_printk("cpu: %d; error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " + "running state: %d; PSCI state: %d; " + "%s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s", + __entry->cpu, + __entry->sev, __entry->affinity, __entry->mpidr, __entry->midr, - __entry->running_state, __entry->psci_state) + __entry->running_state, __entry->psci_state, + APEIL, __entry->pei_len, APEID, + __print_hex(__get_dynamic_array(pei_buf), __entry->pei_len), + APECIL, __entry->ctx_len, APECID, + __print_hex(__get_dynamic_array(ctx_buf), __entry->ctx_len), + VSEIL, __entry->oem_len, VSEID, + __print_hex(__get_dynamic_array(oem_buf), __entry->oem_len)) ); /* diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 1fa3786f82f4..4808a355de41 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -271,7 +271,7 @@ struct ib_cm_event { #define CM_APR_ATTR_ID cpu_to_be16(0x001A) /** - * ib_cm_handler - User-defined callback to process communication events. + * typedef ib_cm_handler - User-defined callback to process communication events. * @cm_id: Communication identifier associated with the reported event. * @event: Information about the communication event. * @@ -482,7 +482,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, /** * ib_prepare_cm_mra - Prepares to send a message receipt acknowledgment to a - connection message in case duplicates are received. + * connection message in case duplicates are received. * @cm_id: Connection identifier associated with the connection message. */ int ib_prepare_cm_mra(struct ib_cm_id *cm_id); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6139223e92e4..6aad66bc5dd7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -586,10 +586,10 @@ enum ib_stat_flag { }; /** - * struct rdma_stat_desc - * @name - The name of the counter - * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL - * @priv - Driver private information; Core code should not use + * struct rdma_stat_desc - description of one rdma stat/counter + * @name: The name of the counter + * @flags: Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL + * @priv: Driver private information; Core code should not use */ struct rdma_stat_desc { const char *name; @@ -598,24 +598,24 @@ struct rdma_stat_desc { }; /** - * struct rdma_hw_stats - * @lock - Mutex to protect parallel write access to lifespan and values + * struct rdma_hw_stats - collection of hardware stats and their management + * @lock: Mutex to protect parallel write access to lifespan and values * of counters, which are 64bits and not guaranteed to be written * atomicaly on 32bits systems. - * @timestamp - Used by the core code to track when the last update was - * @lifespan - Used by the core code to determine how old the counters + * @timestamp: Used by the core code to track when the last update was + * @lifespan: Used by the core code to determine how old the counters * should be before being updated again. Stored in jiffies, defaults * to 10 milliseconds, drivers can override the default be specifying * their own value during their allocation routine. - * @descs - Array of pointers to static descriptors used for the counters + * @descs: Array of pointers to static descriptors used for the counters * in directory. - * @is_disabled - A bitmap to indicate each counter is currently disabled + * @is_disabled: A bitmap to indicate each counter is currently disabled * or not. - * @num_counters - How many hardware counters there are. If name is + * @num_counters: How many hardware counters there are. If name is * shorter than this number, a kernel oops will result. Driver authors * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) * in their code to prevent this. - * @value - Array of u64 counters that are accessed by the sysfs code and + * @value: Array of u64 counters that are accessed by the sysfs code and * filled in by the drivers get_stats routine */ struct rdma_hw_stats { @@ -859,6 +859,7 @@ enum ib_rate { IB_RATE_400_GBPS = 21, IB_RATE_600_GBPS = 22, IB_RATE_800_GBPS = 23, + IB_RATE_1600_GBPS = 25, }; /** @@ -2405,7 +2406,7 @@ struct ib_device_ops { int (*modify_port)(struct ib_device *device, u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify); - /** + /* * The following mandatory functions are used only at device * registration. Keep functions such as these at the end of this * structure to avoid cache line misses when accessing struct ib_device @@ -2415,7 +2416,7 @@ struct ib_device_ops { struct ib_port_immutable *immutable); enum rdma_link_layer (*get_link_layer)(struct ib_device *device, u32 port_num); - /** + /* * When calling get_netdev, the HW vendor's driver should return the * net device of device @device at port @port_num or NULL if such * a net device doesn't exist. The vendor driver should call dev_hold @@ -2425,7 +2426,7 @@ struct ib_device_ops { */ struct net_device *(*get_netdev)(struct ib_device *device, u32 port_num); - /** + /* * rdma netdev operation * * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params @@ -2439,14 +2440,14 @@ struct ib_device_ops { int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params); - /** + /* * query_gid should be return GID value for @device, when @port_num * link layer is either IB or iWarp. It is no-op if @port_num port * is RoCE link layer. */ int (*query_gid)(struct ib_device *device, u32 port_num, int index, union ib_gid *gid); - /** + /* * When calling add_gid, the HW vendor's driver should add the gid * of device of port at gid index available at @attr. Meta-info of * that gid (for example, the network device related to this gid) is @@ -2460,7 +2461,7 @@ struct ib_device_ops { * roce_gid_table is used. */ int (*add_gid)(const struct ib_gid_attr *attr, void **context); - /** + /* * When calling del_gid, the HW vendor's driver should delete the * gid of device @device at gid index gid_index of port port_num * available in @attr. @@ -2475,7 +2476,7 @@ struct ib_device_ops { struct ib_udata *udata); void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); - /** + /* * This will be called once refcount of an entry in mmap_xa reaches * zero. The type of the memory that was mapped may differ between * entries and is opaque to the rdma_user_mmap interface. @@ -2516,12 +2517,12 @@ struct ib_device_ops { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); - /** + /* * pre_destroy_cq - Prevent a cq from generating any new work * completions, but not free any kernel resources */ int (*pre_destroy_cq)(struct ib_cq *cq); - /** + /* * post_destroy_cq - Free all kernel resources */ void (*post_destroy_cq)(struct ib_cq *cq); @@ -2615,7 +2616,7 @@ struct ib_device_ops { struct scatterlist *meta_sg, int meta_sg_nents, unsigned int *meta_sg_offset); - /** + /* * alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and * fill in the driver initialized data. The struct is kfree()'ed by * the sysfs core when the device is removed. A lifespan of -1 in the @@ -2624,7 +2625,7 @@ struct ib_device_ops { struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device); struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device, u32 port_num); - /** + /* * get_hw_stats - Fill in the counter value(s) in the stats struct. * @index - The index in the value array we wish to have updated, or * num_counters if we want all stats updated @@ -2639,14 +2640,14 @@ struct ib_device_ops { int (*get_hw_stats)(struct ib_device *device, struct rdma_hw_stats *stats, u32 port, int index); - /** + /* * modify_hw_stat - Modify the counter configuration * @enable: true/false when enable/disable a counter * Return codes - 0 on success or error code otherwise. */ int (*modify_hw_stat)(struct ib_device *device, u32 port, unsigned int counter_index, bool enable); - /** + /* * Allows rdma drivers to add their own restrack attributes. */ int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); @@ -2682,39 +2683,39 @@ struct ib_device_ops { u8 pdata_len); int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog); int (*iw_destroy_listen)(struct iw_cm_id *cm_id); - /** + /* * counter_bind_qp - Bind a QP to a counter. * @counter - The counter to be bound. If counter->id is zero then * the driver needs to allocate a new counter and set counter->id */ int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp, u32 port); - /** + /* * counter_unbind_qp - Unbind the qp from the dynamically-allocated * counter and bind it onto the default one */ int (*counter_unbind_qp)(struct ib_qp *qp, u32 port); - /** + /* * counter_dealloc -De-allocate the hw counter */ int (*counter_dealloc)(struct rdma_counter *counter); - /** + /* * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in * the driver initialized data. */ struct rdma_hw_stats *(*counter_alloc_stats)( struct rdma_counter *counter); - /** + /* * counter_update_stats - Query the stats value of this counter */ int (*counter_update_stats)(struct rdma_counter *counter); - /** + /* * counter_init - Initialize the driver specific rdma counter struct. */ void (*counter_init)(struct rdma_counter *counter); - /** + /* * Allows rdma drivers to add their own restrack attributes * dumped via 'rdma stat' iproute2 command. */ @@ -2730,25 +2731,25 @@ struct ib_device_ops { */ int (*get_numa_node)(struct ib_device *dev); - /** + /* * add_sub_dev - Add a sub IB device */ struct ib_device *(*add_sub_dev)(struct ib_device *parent, enum rdma_nl_dev_type type, const char *name); - /** + /* * del_sub_dev - Delete a sub IB device */ void (*del_sub_dev)(struct ib_device *sub_dev); - /** + /* * ufile_cleanup - Attempt to cleanup ubojects HW resources inside * the ufile. */ void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile); - /** + /* * report_port_event - Drivers need to implement this if they have * some private stuff to handle when link status changes. */ @@ -3157,8 +3158,8 @@ static inline u32 rdma_start_port(const struct ib_device *device) /** * rdma_for_each_port - Iterate over all valid port numbers of the IB device - * @device - The struct ib_device * to iterate over - * @iter - The unsigned int to store the port number + * @device: The struct ib_device * to iterate over + * @iter: The unsigned int to store the port number */ #define rdma_for_each_port(device, iter) \ for (iter = rdma_start_port(device + \ @@ -3524,7 +3525,7 @@ static inline bool rdma_core_cap_opa_port(struct ib_device *device, /** * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value. * @device: Device - * @port_num: Port number + * @port: Port number * @mtu: enum value of MTU * * Return the MTU size supported by the port as an integer value. Will return @@ -3542,7 +3543,7 @@ static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port, /** * rdma_mtu_from_attr - Return the mtu of the port from the port attribute. * @device: Device - * @port_num: Port number + * @port: Port number * @attr: port attribute * * Return the MTU size supported by the port as an integer value. @@ -3919,7 +3920,7 @@ static inline int ib_destroy_qp(struct ib_qp *qp) /** * ib_open_qp - Obtain a reference to an existing sharable QP. - * @xrcd - XRC domain + * @xrcd: XRC domain * @qp_open_attr: Attributes identifying the QP to open. * * Returns a reference to a sharable QP. @@ -4273,9 +4274,9 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, /** * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses * @dev: The device for which the DMA addresses are to be created - * @sg: The sg_table object describing the buffer + * @sgt: The sg_table object describing the buffer * @direction: The direction of the DMA - * @attrs: Optional DMA attributes for the map operation + * @dma_attrs: Optional DMA attributes for the map operation */ static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev, struct sg_table *sgt, @@ -4419,8 +4420,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR * R_Key and L_Key. - * @mr - struct ib_mr pointer to be updated. - * @newkey - new key to be used. + * @mr: struct ib_mr pointer to be updated. + * @newkey: new key to be used. */ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) { @@ -4431,7 +4432,7 @@ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) /** * ib_inc_rkey - increments the key portion of the given rkey. Can be used * for calculating a new rkey for type 2 memory windows. - * @rkey - the rkey to increment. + * @rkey: the rkey to increment. */ static inline u32 ib_inc_rkey(u32 rkey) { @@ -4525,7 +4526,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, /** * ib_device_try_get: Hold a registration lock - * device: The device to lock + * @dev: The device to lock * * A device under an active registration lock cannot become unregistered. It * is only possible to obtain a registration lock on a device that is fully @@ -4832,7 +4833,7 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. * - * @device: the rdma device + * @ibdev: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ibdev); void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port); @@ -4885,7 +4886,7 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) /** * ibdev_to_node - return the NUMA node for a given ib_device - * @dev: device to get the NUMA node for. + * @ibdev: device to get the NUMA node for. */ static inline int ibdev_to_node(struct ib_device *ibdev) { @@ -4923,6 +4924,7 @@ static inline struct net *rdma_dev_net(struct ib_device *device) /** * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based * on the flow_label + * @fl: flow_label value * * This function will convert the 20 bit flow_label input to a valid RoCE v2 * UDP src port 14 bit value. All RoCE V2 drivers should use this same diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index d67892944193..71140ea0aeb2 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -144,7 +144,7 @@ #define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1) /** - * rvt_ud_wr - IB UD work plus AH cache + * struct rvt_ud_wr - IB UD work plus AH cache * @wr: valid IB work request * @attr: pointer to an allocated AH attribute * @@ -184,10 +184,10 @@ struct rvt_swqe { * struct rvt_krwq - kernel struct receive work request * @p_lock: lock to protect producer of the kernel buffer * @head: index of next entry to fill - * @c_lock:lock to protect consumer of the kernel buffer + * @c_lock: lock to protect consumer of the kernel buffer * @tail: index of next entry to pull - * @count: count is aproximate of total receive enteries posted - * @rvt_rwqe: struct of receive work request queue entry + * @count: count is approximate of total receive entries posted + * @curr_wq: struct of receive work request queue entry * * This structure is used to contain the head pointer, * tail pointer and receive work queue entries for kernel @@ -309,10 +309,10 @@ struct rvt_ack_entry { #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) /** - * rvt_operation_params - op table entry - * @length - the length to copy into the swqe entry - * @qpt_support - a bit mask indicating QP type support - * @flags - RVT_OPERATION flags (see above) + * struct rvt_operation_params - op table entry + * @length: the length to copy into the swqe entry + * @qpt_support: a bit mask indicating QP type support + * @flags: RVT_OPERATION flags (see above) * * This supports table driven post send so that * the driver can have differing an potentially @@ -552,7 +552,7 @@ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) /** * rvt_is_user_qp - return if this is user mode QP - * @qp - the target QP + * @qp: the target QP */ static inline bool rvt_is_user_qp(struct rvt_qp *qp) { @@ -561,7 +561,7 @@ static inline bool rvt_is_user_qp(struct rvt_qp *qp) /** * rvt_get_qp - get a QP reference - * @qp - the QP to hold + * @qp: the QP to hold */ static inline void rvt_get_qp(struct rvt_qp *qp) { @@ -570,7 +570,7 @@ static inline void rvt_get_qp(struct rvt_qp *qp) /** * rvt_put_qp - release a QP reference - * @qp - the QP to release + * @qp: the QP to release */ static inline void rvt_put_qp(struct rvt_qp *qp) { @@ -580,7 +580,7 @@ static inline void rvt_put_qp(struct rvt_qp *qp) /** * rvt_put_swqe - drop mr refs held by swqe - * @wqe - the send wqe + * @wqe: the send wqe * * This drops any mr references held by the swqe */ @@ -597,8 +597,8 @@ static inline void rvt_put_swqe(struct rvt_swqe *wqe) /** * rvt_qp_wqe_reserve - reserve operation - * @qp - the rvt qp - * @wqe - the send wqe + * @qp: the rvt qp + * @wqe: the send wqe * * This routine used in post send to record * a wqe relative reserved operation use. @@ -612,8 +612,8 @@ static inline void rvt_qp_wqe_reserve( /** * rvt_qp_wqe_unreserve - clean reserved operation - * @qp - the rvt qp - * @flags - send wqe flags + * @qp: the rvt qp + * @flags: send wqe flags * * This decrements the reserve use count. * @@ -653,8 +653,8 @@ u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len); /** * rvt_div_round_up_mtu - round up divide - * @qp - the qp pair - * @len - the length + * @qp: the qp pair + * @len: the length * * Perform a shift based mtu round up divide */ @@ -664,8 +664,9 @@ static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len) } /** - * @qp - the qp pair - * @len - the length + * rvt_div_mtu - shift-based divide + * @qp: the qp pair + * @len: the length * * Perform a shift based mtu divide */ @@ -676,7 +677,7 @@ static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len) /** * rvt_timeout_to_jiffies - Convert a ULP timeout input into jiffies - * @timeout - timeout input(0 - 31). + * @timeout: timeout input(0 - 31). * * Return a timeout value in jiffies. */ @@ -690,7 +691,8 @@ static inline unsigned long rvt_timeout_to_jiffies(u8 timeout) /** * rvt_lookup_qpn - return the QP with the given QPN - * @ibp: the ibport + * @rdi: rvt device info structure + * @rvp: the ibport * @qpn: the QP number to look up * * The caller must hold the rcu_read_lock(), and keep the lock until @@ -716,9 +718,9 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, } /** - * rvt_mod_retry_timer - mod a retry timer - * @qp - the QP - * @shift - timeout shift to wait for multiple packets + * rvt_mod_retry_timer_ext - mod a retry timer + * @qp: the QP + * @shift: timeout shift to wait for multiple packets * Modify a potentially already running retry timer */ static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) @@ -753,7 +755,7 @@ static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe) } /** - * rvt_qp_sqwe_incr - increment ring index + * rvt_qp_swqe_incr - increment ring index * @qp: the qp * @val: the starting value * @@ -811,10 +813,10 @@ static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc, /** * rvt_qp_complete_swqe - insert send completion - * @qp - the qp - * @wqe - the send wqe - * @opcode - wc operation (driver dependent) - * @status - completion status + * @qp: the qp + * @wqe: the send wqe + * @opcode: wc operation (driver dependent) + * @status: completion status * * Update the s_last information, and then insert a send * completion into the completion @@ -891,7 +893,7 @@ void rvt_ruc_loopback(struct rvt_qp *qp); /** * struct rvt_qp_iter - the iterator for QPs - * @qp - the current QP + * @qp: the current QP * * This structure defines the current iterator * state for sequenced access to all QPs relative @@ -913,7 +915,7 @@ struct rvt_qp_iter { /** * ib_cq_tail - Return tail index of cq buffer - * @send_cq - The cq for send + * @send_cq: The cq for send * * This is called in qp_iter_print to get tail * of cq buffer. @@ -929,7 +931,7 @@ static inline u32 ib_cq_tail(struct ib_cq *send_cq) /** * ib_cq_head - Return head index of cq buffer - * @send_cq - The cq for send + * @send_cq: The cq for send * * This is called in qp_iter_print to get head * of cq buffer. @@ -945,7 +947,7 @@ static inline u32 ib_cq_head(struct ib_cq *send_cq) /** * rvt_free_rq - free memory allocated for rvt_rq struct - * @rvt_rq: request queue data structure + * @rq: request queue data structure * * This function should only be called if the rvt_mmap_info() * has not succeeded. diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index c218c89e0e2e..2c41920b641d 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -450,6 +450,16 @@ struct iommu_hwpt_vtd_s1 { * nested domain will translate the same as the nesting parent. The S1 will * install a Context Descriptor Table pointing at userspace memory translated * by the nesting parent. + * + * It's suggested to allocate a vDEVICE object carrying vSID and then re-attach + * the nested domain, as soon as the vSID is available in the VMM level: + * + * - when Cfg=translate, a vDEVICE must be allocated prior to attaching to the + * allocated nested domain, as CD/ATS invalidations and vevents need a vSID. + * - when Cfg=bypass/abort, a vDEVICE is not enforced during the nested domain + * attachment, to support a GBPA case where VM sets CR0.SMMUEN=0. However, if + * VM sets CR0.SMMUEN=1 while missing a vDEVICE object, kernel would fail to + * report events to the VM. E.g. F_TRANSLATION when guest STE.Cfg=abort. */ struct iommu_hwpt_arm_smmuv3 { __aligned_le64 ste[2]; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 75100bf009ba..ac2329f24141 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -14,6 +14,7 @@ #include <linux/types.h> #include <linux/ioctl.h> +#include <linux/stddef.h> #define VFIO_API_VERSION 0 @@ -1478,6 +1479,33 @@ struct vfio_device_feature_bus_master { }; #define VFIO_DEVICE_FEATURE_BUS_MASTER 10 +/** + * Upon VFIO_DEVICE_FEATURE_GET create a dma_buf fd for the + * regions selected. + * + * open_flags are the typical flags passed to open(2), eg O_RDWR, O_CLOEXEC, + * etc. offset/length specify a slice of the region to create the dmabuf from. + * nr_ranges is the total number of (P2P DMA) ranges that comprise the dmabuf. + * + * flags should be 0. + * + * Return: The fd number on success, -1 and errno is set on failure. + */ +#define VFIO_DEVICE_FEATURE_DMA_BUF 11 + +struct vfio_region_dma_range { + __u64 offset; + __u64 length; +}; + +struct vfio_device_feature_dma_buf { + __u32 region_index; + __u32 open_flags; + __u32 flags; + __u32 nr_ranges; + struct vfio_region_dma_range dma_ranges[] __counted_by(nr_ranges); +}; + /* -------- API for Type1 VFIO IOMMU -------- */ /** diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index c691ac210ce2..e732e3456e27 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -40,7 +40,7 @@ #define _LINUX_VIRTIO_PCI_H #include <linux/types.h> -#include <linux/kernel.h> +#include <linux/const.h> #ifndef VIRTIO_PCI_NO_LEGACY |
