diff options
Diffstat (limited to 'include')
112 files changed, 3694 insertions, 1152 deletions
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index dcb8727f2b82..e1a2e1b7c8e7 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -97,14 +97,6 @@ static inline int huge_pte_none(pte_t pte) } #endif -/* Please refer to comments above pte_none_mostly() for the usage */ -#ifndef __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY -static inline int huge_pte_none_mostly(pte_t pte) -{ - return huge_pte_none(pte) || is_pte_marker(pte); -} -#endif - #ifndef __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 02aeca21479a..6628670bcb90 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -2,6 +2,8 @@ #ifndef _ASM_GENERIC_PERCPU_H_ #define _ASM_GENERIC_PERCPU_H_ +#ifndef __ASSEMBLER__ + #include <linux/compiler.h> #include <linux/threads.h> #include <linux/percpu-defs.h> @@ -557,4 +559,5 @@ do { \ this_cpu_generic_cmpxchg(pcp, oval, nval) #endif +#endif /* __ASSEMBLER__ */ #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 3c8ec3bfea44..57137d3ac159 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -18,8 +18,7 @@ */ static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm) { - struct ptdesc *ptdesc = pagetable_alloc_noprof(GFP_PGTABLE_KERNEL & - ~__GFP_HIGHMEM, 0); + struct ptdesc *ptdesc = pagetable_alloc_noprof(GFP_PGTABLE_KERNEL, 0); if (!ptdesc) return NULL; @@ -28,6 +27,8 @@ static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm) return NULL; } + ptdesc_set_kernel(ptdesc); + return ptdesc_address(ptdesc); } #define __pte_alloc_one_kernel(...) alloc_hooks(__pte_alloc_one_kernel_noprof(__VA_ARGS__)) @@ -146,6 +147,10 @@ static inline pmd_t *pmd_alloc_one_noprof(struct mm_struct *mm, unsigned long ad pagetable_free(ptdesc); return NULL; } + + if (mm == &init_mm) + ptdesc_set_kernel(ptdesc); + return ptdesc_address(ptdesc); } #define pmd_alloc_one(...) alloc_hooks(pmd_alloc_one_noprof(__VA_ARGS__)) @@ -172,13 +177,16 @@ static inline pud_t *__pud_alloc_one_noprof(struct mm_struct *mm, unsigned long if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; - gfp &= ~__GFP_HIGHMEM; ptdesc = pagetable_alloc_noprof(gfp, 0); if (!ptdesc) return NULL; pagetable_pud_ctor(ptdesc); + + if (mm == &init_mm) + ptdesc_set_kernel(ptdesc); + return ptdesc_address(ptdesc); } #define __pud_alloc_one(...) alloc_hooks(__pud_alloc_one_noprof(__VA_ARGS__)) @@ -226,13 +234,16 @@ static inline p4d_t *__p4d_alloc_one_noprof(struct mm_struct *mm, unsigned long if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; - gfp &= ~__GFP_HIGHMEM; ptdesc = pagetable_alloc_noprof(gfp, 0); if (!ptdesc) return NULL; pagetable_p4d_ctor(ptdesc); + + if (mm == &init_mm) + ptdesc_set_kernel(ptdesc); + return ptdesc_address(ptdesc); } #define __p4d_alloc_one(...) alloc_hooks(__p4d_alloc_one_noprof(__VA_ARGS__)) @@ -270,13 +281,16 @@ static inline pgd_t *__pgd_alloc_noprof(struct mm_struct *mm, unsigned int order if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; - gfp &= ~__GFP_HIGHMEM; ptdesc = pagetable_alloc_noprof(gfp, order); if (!ptdesc) return NULL; pagetable_pgd_ctor(ptdesc); + + if (mm == &init_mm) + ptdesc_set_kernel(ptdesc); + return ptdesc_address(ptdesc); } #define __pgd_alloc(...) alloc_hooks(__pgd_alloc_noprof(__VA_ARGS__)) diff --git a/include/asm-generic/pgtable_uffd.h b/include/asm-generic/pgtable_uffd.h index 828966d4c281..0d85791efdf7 100644 --- a/include/asm-generic/pgtable_uffd.h +++ b/include/asm-generic/pgtable_uffd.h @@ -1,6 +1,23 @@ #ifndef _ASM_GENERIC_PGTABLE_UFFD_H #define _ASM_GENERIC_PGTABLE_UFFD_H +/* + * Some platforms can customize the uffd-wp bit, making it unavailable + * even if the architecture provides the resource. + * Adding this API allows architectures to add their own checks for the + * devices on which the kernel is running. + * Note: When overriding it, please make sure the + * CONFIG_HAVE_ARCH_USERFAULTFD_WP is part of this macro. + */ +#ifndef pgtable_supports_uffd_wp +#define pgtable_supports_uffd_wp() IS_ENABLED(CONFIG_HAVE_ARCH_USERFAULTFD_WP) +#endif + +static inline bool uffd_supports_wp_marker(void) +{ + return pgtable_supports_uffd_wp() && IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP); +} + #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP static __always_inline int pte_uffd_wp(pte_t pte) { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a464ff6c1a61..8ca130af301f 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1065,6 +1065,7 @@ *(.no_trim_symbol) \ /* ld.bfd warns about .gnu.version* even when not emitted */ \ *(.gnu.version*) \ + *(__tracepoint_check) \ #define DISCARDS \ /DISCARD/ : { \ diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index cb8ce53146f0..8776844e0eeb 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -240,6 +240,7 @@ #define QCOM_ID_SC7280 487 #define QCOM_ID_SC7180P 495 #define QCOM_ID_QCM6490 497 +#define QCOM_ID_QCS6490 498 #define QCOM_ID_SM7325P 499 #define QCOM_ID_IPQ5000 503 #define QCOM_ID_IPQ0509 504 @@ -286,6 +287,7 @@ #define QCOM_ID_IPQ5424 651 #define QCOM_ID_QCM6690 657 #define QCOM_ID_QCS6690 658 +#define QCOM_ID_SM8850 660 #define QCOM_ID_IPQ5404 671 #define QCOM_ID_QCS9100 667 #define QCOM_ID_QCS8300 674 diff --git a/include/dt-bindings/clock/google,gs101-acpm.h b/include/dt-bindings/clock/google,gs101-acpm.h new file mode 100644 index 000000000000..e2ba89e09fa6 --- /dev/null +++ b/include/dt-bindings/clock/google,gs101-acpm.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright 2025 Linaro Ltd. + * + * Device Tree binding constants for Google gs101 ACPM clock controller. + */ + +#ifndef _DT_BINDINGS_CLOCK_GOOGLE_GS101_ACPM_H +#define _DT_BINDINGS_CLOCK_GOOGLE_GS101_ACPM_H + +#define GS101_CLK_ACPM_DVFS_MIF 0 +#define GS101_CLK_ACPM_DVFS_INT 1 +#define GS101_CLK_ACPM_DVFS_CPUCL0 2 +#define GS101_CLK_ACPM_DVFS_CPUCL1 3 +#define GS101_CLK_ACPM_DVFS_CPUCL2 4 +#define GS101_CLK_ACPM_DVFS_G3D 5 +#define GS101_CLK_ACPM_DVFS_G3DL2 6 +#define GS101_CLK_ACPM_DVFS_TPU 7 +#define GS101_CLK_ACPM_DVFS_INTCAM 8 +#define GS101_CLK_ACPM_DVFS_TNR 9 +#define GS101_CLK_ACPM_DVFS_CAM 10 +#define GS101_CLK_ACPM_DVFS_MFC 11 +#define GS101_CLK_ACPM_DVFS_DISP 12 +#define GS101_CLK_ACPM_DVFS_BO 13 + +#endif /* _DT_BINDINGS_CLOCK_GOOGLE_GS101_ACPM_H */ diff --git a/include/dt-bindings/clock/qcom,dispcc-sm6350.h b/include/dt-bindings/clock/qcom,dispcc-sm6350.h index cb54aae2723e..61426a80e620 100644 --- a/include/dt-bindings/clock/qcom,dispcc-sm6350.h +++ b/include/dt-bindings/clock/qcom,dispcc-sm6350.h @@ -42,6 +42,10 @@ #define DISP_CC_SLEEP_CLK 31 #define DISP_CC_XO_CLK 32 +/* Resets */ +#define DISP_CC_MDSS_CORE_BCR 0 +#define DISP_CC_MDSS_RSCC_BCR 1 + /* GDSCs */ #define MDSS_GDSC 0 diff --git a/include/dt-bindings/clock/qcom,ipq5424-gcc.h b/include/dt-bindings/clock/qcom,ipq5424-gcc.h index c15ad16923bd..3ae33a0fa002 100644 --- a/include/dt-bindings/clock/qcom,ipq5424-gcc.h +++ b/include/dt-bindings/clock/qcom,ipq5424-gcc.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* * Copyright (c) 2018,2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef _DT_BINDINGS_CLOCK_IPQ_GCC_IPQ5424_H @@ -152,5 +152,6 @@ #define GCC_PCIE3_RCHNG_CLK 143 #define GCC_IM_SLEEP_CLK 144 #define GCC_XO_CLK 145 +#define GPLL0_OUT_AUX 146 #endif diff --git a/include/dt-bindings/clock/qcom,ipq5424-nsscc.h b/include/dt-bindings/clock/qcom,ipq5424-nsscc.h new file mode 100644 index 000000000000..eeae0dc38042 --- /dev/null +++ b/include/dt-bindings/clock/qcom,ipq5424-nsscc.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef _DT_BINDINGS_CLOCK_QCOM_IPQ5424_NSSCC_H +#define _DT_BINDINGS_CLOCK_QCOM_IPQ5424_NSSCC_H + +/* NSS_CC clocks */ +#define NSS_CC_CE_APB_CLK 0 +#define NSS_CC_CE_AXI_CLK 1 +#define NSS_CC_CE_CLK_SRC 2 +#define NSS_CC_CFG_CLK_SRC 3 +#define NSS_CC_DEBUG_CLK 4 +#define NSS_CC_EIP_BFDCD_CLK_SRC 5 +#define NSS_CC_EIP_CLK 6 +#define NSS_CC_NSS_CSR_CLK 7 +#define NSS_CC_NSSNOC_CE_APB_CLK 8 +#define NSS_CC_NSSNOC_CE_AXI_CLK 9 +#define NSS_CC_NSSNOC_EIP_CLK 10 +#define NSS_CC_NSSNOC_NSS_CSR_CLK 11 +#define NSS_CC_NSSNOC_PPE_CFG_CLK 12 +#define NSS_CC_NSSNOC_PPE_CLK 13 +#define NSS_CC_PORT1_MAC_CLK 14 +#define NSS_CC_PORT1_RX_CLK 15 +#define NSS_CC_PORT1_RX_CLK_SRC 16 +#define NSS_CC_PORT1_RX_DIV_CLK_SRC 17 +#define NSS_CC_PORT1_TX_CLK 18 +#define NSS_CC_PORT1_TX_CLK_SRC 19 +#define NSS_CC_PORT1_TX_DIV_CLK_SRC 20 +#define NSS_CC_PORT2_MAC_CLK 21 +#define NSS_CC_PORT2_RX_CLK 22 +#define NSS_CC_PORT2_RX_CLK_SRC 23 +#define NSS_CC_PORT2_RX_DIV_CLK_SRC 24 +#define NSS_CC_PORT2_TX_CLK 25 +#define NSS_CC_PORT2_TX_CLK_SRC 26 +#define NSS_CC_PORT2_TX_DIV_CLK_SRC 27 +#define NSS_CC_PORT3_MAC_CLK 28 +#define NSS_CC_PORT3_RX_CLK 29 +#define NSS_CC_PORT3_RX_CLK_SRC 30 +#define NSS_CC_PORT3_RX_DIV_CLK_SRC 31 +#define NSS_CC_PORT3_TX_CLK 32 +#define NSS_CC_PORT3_TX_CLK_SRC 33 +#define NSS_CC_PORT3_TX_DIV_CLK_SRC 34 +#define NSS_CC_PPE_CLK_SRC 35 +#define NSS_CC_PPE_EDMA_CFG_CLK 36 +#define NSS_CC_PPE_EDMA_CLK 37 +#define NSS_CC_PPE_SWITCH_BTQ_CLK 38 +#define NSS_CC_PPE_SWITCH_CFG_CLK 39 +#define NSS_CC_PPE_SWITCH_CLK 40 +#define NSS_CC_PPE_SWITCH_IPE_CLK 41 +#define NSS_CC_UNIPHY_PORT1_RX_CLK 42 +#define NSS_CC_UNIPHY_PORT1_TX_CLK 43 +#define NSS_CC_UNIPHY_PORT2_RX_CLK 44 +#define NSS_CC_UNIPHY_PORT2_TX_CLK 45 +#define NSS_CC_UNIPHY_PORT3_RX_CLK 46 +#define NSS_CC_UNIPHY_PORT3_TX_CLK 47 +#define NSS_CC_XGMAC0_PTP_REF_CLK 48 +#define NSS_CC_XGMAC0_PTP_REF_DIV_CLK_SRC 49 +#define NSS_CC_XGMAC1_PTP_REF_CLK 50 +#define NSS_CC_XGMAC1_PTP_REF_DIV_CLK_SRC 51 +#define NSS_CC_XGMAC2_PTP_REF_CLK 52 +#define NSS_CC_XGMAC2_PTP_REF_DIV_CLK_SRC 53 + +#endif diff --git a/include/dt-bindings/clock/qcom,x1e80100-gcc.h b/include/dt-bindings/clock/qcom,x1e80100-gcc.h index 710c340f24a5..62aa12425592 100644 --- a/include/dt-bindings/clock/qcom,x1e80100-gcc.h +++ b/include/dt-bindings/clock/qcom,x1e80100-gcc.h @@ -363,6 +363,30 @@ #define GCC_USB3_PRIM_PHY_PIPE_CLK_SRC 353 #define GCC_USB3_SEC_PHY_PIPE_CLK_SRC 354 #define GCC_USB3_TERT_PHY_PIPE_CLK_SRC 355 +#define GCC_USB34_PRIM_PHY_PIPE_CLK_SRC 356 +#define GCC_USB34_SEC_PHY_PIPE_CLK_SRC 357 +#define GCC_USB34_TERT_PHY_PIPE_CLK_SRC 358 +#define GCC_USB4_0_PHY_DP0_CLK_SRC 359 +#define GCC_USB4_0_PHY_DP1_CLK_SRC 360 +#define GCC_USB4_0_PHY_P2RR2P_PIPE_CLK_SRC 361 +#define GCC_USB4_0_PHY_PCIE_PIPE_MUX_CLK_SRC 362 +#define GCC_USB4_0_PHY_RX0_CLK_SRC 363 +#define GCC_USB4_0_PHY_RX1_CLK_SRC 364 +#define GCC_USB4_0_PHY_SYS_CLK_SRC 365 +#define GCC_USB4_1_PHY_DP0_CLK_SRC 366 +#define GCC_USB4_1_PHY_DP1_CLK_SRC 367 +#define GCC_USB4_1_PHY_P2RR2P_PIPE_CLK_SRC 368 +#define GCC_USB4_1_PHY_PCIE_PIPE_MUX_CLK_SRC 369 +#define GCC_USB4_1_PHY_RX0_CLK_SRC 370 +#define GCC_USB4_1_PHY_RX1_CLK_SRC 371 +#define GCC_USB4_1_PHY_SYS_CLK_SRC 372 +#define GCC_USB4_2_PHY_DP0_CLK_SRC 373 +#define GCC_USB4_2_PHY_DP1_CLK_SRC 374 +#define GCC_USB4_2_PHY_P2RR2P_PIPE_CLK_SRC 375 +#define GCC_USB4_2_PHY_PCIE_PIPE_MUX_CLK_SRC 376 +#define GCC_USB4_2_PHY_RX0_CLK_SRC 377 +#define GCC_USB4_2_PHY_RX1_CLK_SRC 378 +#define GCC_USB4_2_PHY_SYS_CLK_SRC 379 /* GCC power domains */ #define GCC_PCIE_0_TUNNEL_GDSC 0 @@ -484,4 +508,41 @@ #define GCC_VIDEO_BCR 87 #define GCC_VIDEO_AXI0_CLK_ARES 88 #define GCC_VIDEO_AXI1_CLK_ARES 89 +#define GCC_USB4_0_MISC_USB4_SYS_BCR 90 +#define GCC_USB4_0_MISC_RX_CLK_0_BCR 91 +#define GCC_USB4_0_MISC_RX_CLK_1_BCR 92 +#define GCC_USB4_0_MISC_USB_PIPE_BCR 93 +#define GCC_USB4_0_MISC_PCIE_PIPE_BCR 94 +#define GCC_USB4_0_MISC_TMU_BCR 95 +#define GCC_USB4_0_MISC_SB_IF_BCR 96 +#define GCC_USB4_0_MISC_HIA_MSTR_BCR 97 +#define GCC_USB4_0_MISC_AHB_BCR 98 +#define GCC_USB4_0_MISC_DP0_MAX_PCLK_BCR 99 +#define GCC_USB4_0_MISC_DP1_MAX_PCLK_BCR 100 +#define GCC_USB4_1_MISC_USB4_SYS_BCR 101 +#define GCC_USB4_1_MISC_RX_CLK_0_BCR 102 +#define GCC_USB4_1_MISC_RX_CLK_1_BCR 103 +#define GCC_USB4_1_MISC_USB_PIPE_BCR 104 +#define GCC_USB4_1_MISC_PCIE_PIPE_BCR 105 +#define GCC_USB4_1_MISC_TMU_BCR 106 +#define GCC_USB4_1_MISC_SB_IF_BCR 107 +#define GCC_USB4_1_MISC_HIA_MSTR_BCR 108 +#define GCC_USB4_1_MISC_AHB_BCR 109 +#define GCC_USB4_1_MISC_DP0_MAX_PCLK_BCR 110 +#define GCC_USB4_1_MISC_DP1_MAX_PCLK_BCR 111 +#define GCC_USB4_2_MISC_USB4_SYS_BCR 112 +#define GCC_USB4_2_MISC_RX_CLK_0_BCR 113 +#define GCC_USB4_2_MISC_RX_CLK_1_BCR 114 +#define GCC_USB4_2_MISC_USB_PIPE_BCR 115 +#define GCC_USB4_2_MISC_PCIE_PIPE_BCR 116 +#define GCC_USB4_2_MISC_TMU_BCR 117 +#define GCC_USB4_2_MISC_SB_IF_BCR 118 +#define GCC_USB4_2_MISC_HIA_MSTR_BCR 119 +#define GCC_USB4_2_MISC_AHB_BCR 120 +#define GCC_USB4_2_MISC_DP0_MAX_PCLK_BCR 121 +#define GCC_USB4_2_MISC_DP1_MAX_PCLK_BCR 122 +#define GCC_USB4PHY_PHY_PRIM_BCR 123 +#define GCC_USB4PHY_PHY_SEC_BCR 124 +#define GCC_USB4PHY_PHY_TERT_BCR 125 + #endif diff --git a/include/dt-bindings/clock/r8a779a0-cpg-mssr.h b/include/dt-bindings/clock/r8a779a0-cpg-mssr.h index f1d737ca7ca1..124a6b8856df 100644 --- a/include/dt-bindings/clock/r8a779a0-cpg-mssr.h +++ b/include/dt-bindings/clock/r8a779a0-cpg-mssr.h @@ -51,5 +51,6 @@ #define R8A779A0_CLK_CBFUSA 40 #define R8A779A0_CLK_R 41 #define R8A779A0_CLK_OSC 42 +#define R8A779A0_CLK_ZG 43 #endif /* __DT_BINDINGS_CLOCK_R8A779A0_CPG_MSSR_H__ */ diff --git a/include/dt-bindings/clock/rk3568-cru.h b/include/dt-bindings/clock/rk3568-cru.h index 5263085c5b23..18bb8d41d741 100644 --- a/include/dt-bindings/clock/rk3568-cru.h +++ b/include/dt-bindings/clock/rk3568-cru.h @@ -485,6 +485,12 @@ #define CLK_NR_CLKS (PCLK_CORE_PVTM + 1) +/* scmi-clocks indices */ + +#define SCMI_CLK_CPU 0 +#define SCMI_CLK_GPU 1 +#define SCMI_CLK_NPU 2 + /* pmu soft-reset indices */ /* pmucru_softrst_con0 */ #define SRST_P_PDPMU_NIU 0 diff --git a/include/dt-bindings/interconnect/qcom,ipq5424.h b/include/dt-bindings/interconnect/qcom,ipq5424.h index afd7e0683a24..07b786bee7d6 100644 --- a/include/dt-bindings/interconnect/qcom,ipq5424.h +++ b/include/dt-bindings/interconnect/qcom,ipq5424.h @@ -20,8 +20,41 @@ #define SLAVE_CNOC_PCIE3 15 #define MASTER_CNOC_USB 16 #define SLAVE_CNOC_USB 17 +#define MASTER_NSSNOC_NSSCC 18 +#define SLAVE_NSSNOC_NSSCC 19 +#define MASTER_NSSNOC_SNOC_0 20 +#define SLAVE_NSSNOC_SNOC_0 21 +#define MASTER_NSSNOC_SNOC_1 22 +#define SLAVE_NSSNOC_SNOC_1 23 +#define MASTER_NSSNOC_PCNOC_1 24 +#define SLAVE_NSSNOC_PCNOC_1 25 +#define MASTER_NSSNOC_QOSGEN_REF 26 +#define SLAVE_NSSNOC_QOSGEN_REF 27 +#define MASTER_NSSNOC_TIMEOUT_REF 28 +#define SLAVE_NSSNOC_TIMEOUT_REF 29 +#define MASTER_NSSNOC_XO_DCD 30 +#define SLAVE_NSSNOC_XO_DCD 31 +#define MASTER_NSSNOC_ATB 32 +#define SLAVE_NSSNOC_ATB 33 +#define MASTER_CNOC_LPASS_CFG 34 +#define SLAVE_CNOC_LPASS_CFG 35 +#define MASTER_SNOC_LPASS 36 +#define SLAVE_SNOC_LPASS 37 #define MASTER_CPU 0 #define SLAVE_L3 1 +#define MASTER_NSSNOC_PPE 0 +#define SLAVE_NSSNOC_PPE 1 +#define MASTER_NSSNOC_PPE_CFG 2 +#define SLAVE_NSSNOC_PPE_CFG 3 +#define MASTER_NSSNOC_NSS_CSR 4 +#define SLAVE_NSSNOC_NSS_CSR 5 +#define MASTER_NSSNOC_CE_AXI 6 +#define SLAVE_NSSNOC_CE_AXI 7 +#define MASTER_NSSNOC_CE_APB 8 +#define SLAVE_NSSNOC_CE_APB 9 +#define MASTER_NSSNOC_EIP 10 +#define SLAVE_NSSNOC_EIP 11 + #endif /* INTERCONNECT_QCOM_IPQ5424_H */ diff --git a/include/dt-bindings/power/nvidia,tegra264-bpmp.h b/include/dt-bindings/power/nvidia,tegra264-bpmp.h new file mode 100644 index 000000000000..2eef4a2a02b0 --- /dev/null +++ b/include/dt-bindings/power/nvidia,tegra264-bpmp.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. */ + +#ifndef DT_BINDINGS_POWER_NVIDIA_TEGRA264_BPMP_H +#define DT_BINDINGS_POWER_NVIDIA_TEGRA264_BPMP_H + +#define TEGRA264_POWER_DOMAIN_DISP 1 +#define TEGRA264_POWER_DOMAIN_AUD 2 +/* reserved 3:9 */ +#define TEGRA264_POWER_DOMAIN_XUSB_SS 10 +#define TEGRA264_POWER_DOMAIN_XUSB_DEV 11 +#define TEGRA264_POWER_DOMAIN_XUSB_HOST 12 +#define TEGRA264_POWER_DOMAIN_MGBE0 13 +#define TEGRA264_POWER_DOMAIN_MGBE1 14 +#define TEGRA264_POWER_DOMAIN_MGBE2 15 +#define TEGRA264_POWER_DOMAIN_MGBE3 16 +#define TEGRA264_POWER_DOMAIN_VI 17 +#define TEGRA264_POWER_DOMAIN_VIC 18 +#define TEGRA264_POWER_DOMAIN_ISP0 19 +#define TEGRA264_POWER_DOMAIN_ISP1 20 +#define TEGRA264_POWER_DOMAIN_PVA0 21 +#define TEGRA264_POWER_DOMAIN_GPU 22 + +#endif /* DT_BINDINGS_POWER_NVIDIA_TEGRA264_BPMP_H */ diff --git a/include/dt-bindings/reset/eswin,eic7700-reset.h b/include/dt-bindings/reset/eswin,eic7700-reset.h new file mode 100644 index 000000000000..a370c9f74307 --- /dev/null +++ b/include/dt-bindings/reset/eswin,eic7700-reset.h @@ -0,0 +1,298 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright 2025, Beijing ESWIN Computing Technology Co., Ltd.. + * All rights reserved. + * + * Device Tree binding constants for EIC7700 reset controller. + * + * Authors: + * Yifeng Huang <huangyifeng@eswincomputing.com> + * Xuyang Dong <dongxuyang@eswincomputing.com> + */ + +#ifndef __DT_ESWIN_EIC7700_RESET_H__ +#define __DT_ESWIN_EIC7700_RESET_H__ + +#define EIC7700_RESET_NOC_NSP 0 +#define EIC7700_RESET_NOC_CFG 1 +#define EIC7700_RESET_RNOC_NSP 2 +#define EIC7700_RESET_SNOC_TCU 3 +#define EIC7700_RESET_SNOC_U84 4 +#define EIC7700_RESET_SNOC_PCIE_XSR 5 +#define EIC7700_RESET_SNOC_PCIE_XMR 6 +#define EIC7700_RESET_SNOC_PCIE_PR 7 +#define EIC7700_RESET_SNOC_NPU 8 +#define EIC7700_RESET_SNOC_JTAG 9 +#define EIC7700_RESET_SNOC_DSP 10 +#define EIC7700_RESET_SNOC_DDRC1_P2 11 +#define EIC7700_RESET_SNOC_DDRC1_P1 12 +#define EIC7700_RESET_SNOC_DDRC0_P2 13 +#define EIC7700_RESET_SNOC_DDRC0_P1 14 +#define EIC7700_RESET_SNOC_D2D 15 +#define EIC7700_RESET_SNOC_AON 16 +#define EIC7700_RESET_GPU_AXI 17 +#define EIC7700_RESET_GPU_CFG 18 +#define EIC7700_RESET_GPU_GRAY 19 +#define EIC7700_RESET_GPU_JONES 20 +#define EIC7700_RESET_GPU_SPU 21 +#define EIC7700_RESET_DSP_AXI 22 +#define EIC7700_RESET_DSP_CFG 23 +#define EIC7700_RESET_DSP_DIV4 24 +#define EIC7700_RESET_DSP_DIV0 25 +#define EIC7700_RESET_DSP_DIV1 26 +#define EIC7700_RESET_DSP_DIV2 27 +#define EIC7700_RESET_DSP_DIV3 28 +#define EIC7700_RESET_D2D_AXI 29 +#define EIC7700_RESET_D2D_CFG 30 +#define EIC7700_RESET_D2D_PRST 31 +#define EIC7700_RESET_D2D_RAW_PCS 32 +#define EIC7700_RESET_D2D_RX 33 +#define EIC7700_RESET_D2D_TX 34 +#define EIC7700_RESET_D2D_CORE 35 +#define EIC7700_RESET_DDR1_ARST 36 +#define EIC7700_RESET_DDR1_TRACE 37 +#define EIC7700_RESET_DDR0_ARST 38 +#define EIC7700_RESET_DDR_CFG 39 +#define EIC7700_RESET_DDR0_TRACE 40 +#define EIC7700_RESET_DDR_CORE 41 +#define EIC7700_RESET_DDR_PRST 42 +#define EIC7700_RESET_TCU_AXI 43 +#define EIC7700_RESET_TCU_CFG 44 +#define EIC7700_RESET_TCU_TBU0 45 +#define EIC7700_RESET_TCU_TBU1 46 +#define EIC7700_RESET_TCU_TBU2 47 +#define EIC7700_RESET_TCU_TBU3 48 +#define EIC7700_RESET_TCU_TBU4 49 +#define EIC7700_RESET_TCU_TBU5 50 +#define EIC7700_RESET_TCU_TBU6 51 +#define EIC7700_RESET_TCU_TBU7 52 +#define EIC7700_RESET_TCU_TBU8 53 +#define EIC7700_RESET_TCU_TBU9 54 +#define EIC7700_RESET_TCU_TBU10 55 +#define EIC7700_RESET_TCU_TBU11 56 +#define EIC7700_RESET_TCU_TBU12 57 +#define EIC7700_RESET_TCU_TBU13 58 +#define EIC7700_RESET_TCU_TBU14 59 +#define EIC7700_RESET_TCU_TBU15 60 +#define EIC7700_RESET_TCU_TBU16 61 +#define EIC7700_RESET_NPU_AXI 62 +#define EIC7700_RESET_NPU_CFG 63 +#define EIC7700_RESET_NPU_CORE 64 +#define EIC7700_RESET_NPU_E31CORE 65 +#define EIC7700_RESET_NPU_E31BUS 66 +#define EIC7700_RESET_NPU_E31DBG 67 +#define EIC7700_RESET_NPU_LLC 68 +#define EIC7700_RESET_HSP_AXI 69 +#define EIC7700_RESET_HSP_CFG 70 +#define EIC7700_RESET_HSP_POR 71 +#define EIC7700_RESET_MSHC0_PHY 72 +#define EIC7700_RESET_MSHC1_PHY 73 +#define EIC7700_RESET_MSHC2_PHY 74 +#define EIC7700_RESET_MSHC0_TXRX 75 +#define EIC7700_RESET_MSHC1_TXRX 76 +#define EIC7700_RESET_MSHC2_TXRX 77 +#define EIC7700_RESET_SATA_ASIC0 78 +#define EIC7700_RESET_SATA_OOB 79 +#define EIC7700_RESET_SATA_PMALIVE 80 +#define EIC7700_RESET_SATA_RBC 81 +#define EIC7700_RESET_DMA0 82 +#define EIC7700_RESET_HSP_DMA 83 +#define EIC7700_RESET_USB0_VAUX 84 +#define EIC7700_RESET_USB1_VAUX 85 +#define EIC7700_RESET_HSP_SD1_PRST 86 +#define EIC7700_RESET_HSP_SD0_PRST 87 +#define EIC7700_RESET_HSP_EMMC_PRST 88 +#define EIC7700_RESET_HSP_DMA_PRST 89 +#define EIC7700_RESET_HSP_SD1_ARST 90 +#define EIC7700_RESET_HSP_SD0_ARST 91 +#define EIC7700_RESET_HSP_EMMC_ARST 92 +#define EIC7700_RESET_HSP_DMA_ARST 93 +#define EIC7700_RESET_HSP_ETH1_ARST 94 +#define EIC7700_RESET_HSP_ETH0_ARST 95 +#define EIC7700_RESET_SATA_ARST 96 +#define EIC7700_RESET_PCIE_CFG 97 +#define EIC7700_RESET_PCIE_POWEUP 98 +#define EIC7700_RESET_PCIE_PERST 99 +#define EIC7700_RESET_I2C0 100 +#define EIC7700_RESET_I2C1 101 +#define EIC7700_RESET_I2C2 102 +#define EIC7700_RESET_I2C3 103 +#define EIC7700_RESET_I2C4 104 +#define EIC7700_RESET_I2C5 105 +#define EIC7700_RESET_I2C6 106 +#define EIC7700_RESET_I2C7 107 +#define EIC7700_RESET_I2C8 108 +#define EIC7700_RESET_I2C9 109 +#define EIC7700_RESET_FAN 110 +#define EIC7700_RESET_PVT0 111 +#define EIC7700_RESET_PVT1 112 +#define EIC7700_RESET_MBOX0 113 +#define EIC7700_RESET_MBOX1 114 +#define EIC7700_RESET_MBOX2 115 +#define EIC7700_RESET_MBOX3 116 +#define EIC7700_RESET_MBOX4 117 +#define EIC7700_RESET_MBOX5 118 +#define EIC7700_RESET_MBOX6 119 +#define EIC7700_RESET_MBOX7 120 +#define EIC7700_RESET_MBOX8 121 +#define EIC7700_RESET_MBOX9 122 +#define EIC7700_RESET_MBOX10 123 +#define EIC7700_RESET_MBOX11 124 +#define EIC7700_RESET_MBOX12 125 +#define EIC7700_RESET_MBOX13 126 +#define EIC7700_RESET_MBOX14 127 +#define EIC7700_RESET_MBOX15 128 +#define EIC7700_RESET_UART0 129 +#define EIC7700_RESET_UART1 130 +#define EIC7700_RESET_UART2 131 +#define EIC7700_RESET_UART3 132 +#define EIC7700_RESET_UART4 133 +#define EIC7700_RESET_GPIO0 134 +#define EIC7700_RESET_GPIO1 135 +#define EIC7700_RESET_TIMER 136 +#define EIC7700_RESET_SSI0 137 +#define EIC7700_RESET_SSI1 138 +#define EIC7700_RESET_WDT0 139 +#define EIC7700_RESET_WDT1 140 +#define EIC7700_RESET_WDT2 141 +#define EIC7700_RESET_WDT3 142 +#define EIC7700_RESET_LSP_CFG 143 +#define EIC7700_RESET_U84_CORE0 144 +#define EIC7700_RESET_U84_CORE1 145 +#define EIC7700_RESET_U84_CORE2 146 +#define EIC7700_RESET_U84_CORE3 147 +#define EIC7700_RESET_U84_BUS 148 +#define EIC7700_RESET_U84_DBG 149 +#define EIC7700_RESET_U84_TRACECOM 150 +#define EIC7700_RESET_U84_TRACE0 151 +#define EIC7700_RESET_U84_TRACE1 152 +#define EIC7700_RESET_U84_TRACE2 153 +#define EIC7700_RESET_U84_TRACE3 154 +#define EIC7700_RESET_SCPU_CORE 155 +#define EIC7700_RESET_SCPU_BUS 156 +#define EIC7700_RESET_SCPU_DBG 157 +#define EIC7700_RESET_LPCPU_CORE 158 +#define EIC7700_RESET_LPCPU_BUS 159 +#define EIC7700_RESET_LPCPU_DBG 160 +#define EIC7700_RESET_VC_CFG 161 +#define EIC7700_RESET_VC_AXI 162 +#define EIC7700_RESET_VC_MONCFG 163 +#define EIC7700_RESET_JD_CFG 164 +#define EIC7700_RESET_JD_AXI 165 +#define EIC7700_RESET_JE_CFG 166 +#define EIC7700_RESET_JE_AXI 167 +#define EIC7700_RESET_VD_CFG 168 +#define EIC7700_RESET_VD_AXI 169 +#define EIC7700_RESET_VE_AXI 170 +#define EIC7700_RESET_VE_CFG 171 +#define EIC7700_RESET_G2D_CORE 172 +#define EIC7700_RESET_G2D_CFG 173 +#define EIC7700_RESET_G2D_AXI 174 +#define EIC7700_RESET_VI_AXI 175 +#define EIC7700_RESET_VI_CFG 176 +#define EIC7700_RESET_VI_DWE 177 +#define EIC7700_RESET_DVP 178 +#define EIC7700_RESET_ISP0 179 +#define EIC7700_RESET_ISP1 180 +#define EIC7700_RESET_SHUTTR0 181 +#define EIC7700_RESET_SHUTTR1 182 +#define EIC7700_RESET_SHUTTR2 183 +#define EIC7700_RESET_SHUTTR3 184 +#define EIC7700_RESET_SHUTTR4 185 +#define EIC7700_RESET_SHUTTR5 186 +#define EIC7700_RESET_VO_MIPI 187 +#define EIC7700_RESET_VO_PRST 188 +#define EIC7700_RESET_VO_HDMI_PRST 189 +#define EIC7700_RESET_VO_HDMI_PHY 190 +#define EIC7700_RESET_VO_HDMI 191 +#define EIC7700_RESET_VO_I2S 192 +#define EIC7700_RESET_VO_I2S_PRST 193 +#define EIC7700_RESET_VO_AXI 194 +#define EIC7700_RESET_VO_CFG 195 +#define EIC7700_RESET_VO_DC 196 +#define EIC7700_RESET_VO_DC_PRST 197 +#define EIC7700_RESET_BOOTSPI_HRST 198 +#define EIC7700_RESET_BOOTSPI 199 +#define EIC7700_RESET_ANO1 200 +#define EIC7700_RESET_ANO0 201 +#define EIC7700_RESET_DMA1_ARST 202 +#define EIC7700_RESET_DMA1_HRST 203 +#define EIC7700_RESET_FPRT 204 +#define EIC7700_RESET_HBLOCK 205 +#define EIC7700_RESET_SECSR 206 +#define EIC7700_RESET_OTP 207 +#define EIC7700_RESET_PKA 208 +#define EIC7700_RESET_SPACC 209 +#define EIC7700_RESET_TRNG 210 +#define EIC7700_RESET_TIMER0_0 211 +#define EIC7700_RESET_TIMER0_1 212 +#define EIC7700_RESET_TIMER0_2 213 +#define EIC7700_RESET_TIMER0_3 214 +#define EIC7700_RESET_TIMER0_4 215 +#define EIC7700_RESET_TIMER0_5 216 +#define EIC7700_RESET_TIMER0_6 217 +#define EIC7700_RESET_TIMER0_7 218 +#define EIC7700_RESET_TIMER0_N 219 +#define EIC7700_RESET_TIMER1_0 220 +#define EIC7700_RESET_TIMER1_1 221 +#define EIC7700_RESET_TIMER1_2 222 +#define EIC7700_RESET_TIMER1_3 223 +#define EIC7700_RESET_TIMER1_4 224 +#define EIC7700_RESET_TIMER1_5 225 +#define EIC7700_RESET_TIMER1_6 226 +#define EIC7700_RESET_TIMER1_7 227 +#define EIC7700_RESET_TIMER1_N 228 +#define EIC7700_RESET_TIMER2_0 229 +#define EIC7700_RESET_TIMER2_1 230 +#define EIC7700_RESET_TIMER2_2 231 +#define EIC7700_RESET_TIMER2_3 232 +#define EIC7700_RESET_TIMER2_4 233 +#define EIC7700_RESET_TIMER2_5 234 +#define EIC7700_RESET_TIMER2_6 235 +#define EIC7700_RESET_TIMER2_7 236 +#define EIC7700_RESET_TIMER2_N 237 +#define EIC7700_RESET_TIMER3_0 238 +#define EIC7700_RESET_TIMER3_1 239 +#define EIC7700_RESET_TIMER3_2 240 +#define EIC7700_RESET_TIMER3_3 241 +#define EIC7700_RESET_TIMER3_4 242 +#define EIC7700_RESET_TIMER3_5 243 +#define EIC7700_RESET_TIMER3_6 244 +#define EIC7700_RESET_TIMER3_7 245 +#define EIC7700_RESET_TIMER3_N 246 +#define EIC7700_RESET_RTC 247 +#define EIC7700_RESET_MNOC_SNOC_NSP 248 +#define EIC7700_RESET_MNOC_VC 249 +#define EIC7700_RESET_MNOC_CFG 250 +#define EIC7700_RESET_MNOC_HSP 251 +#define EIC7700_RESET_MNOC_GPU 252 +#define EIC7700_RESET_MNOC_DDRC1_P3 253 +#define EIC7700_RESET_MNOC_DDRC0_P3 254 +#define EIC7700_RESET_RNOC_VO 255 +#define EIC7700_RESET_RNOC_VI 256 +#define EIC7700_RESET_RNOC_SNOC_NSP 257 +#define EIC7700_RESET_RNOC_CFG 258 +#define EIC7700_RESET_MNOC_DDRC1_P4 259 +#define EIC7700_RESET_MNOC_DDRC0_P4 260 +#define EIC7700_RESET_CNOC_VO_CFG 261 +#define EIC7700_RESET_CNOC_VI_CFG 262 +#define EIC7700_RESET_CNOC_VC_CFG 263 +#define EIC7700_RESET_CNOC_TCU_CFG 264 +#define EIC7700_RESET_CNOC_PCIE_CFG 265 +#define EIC7700_RESET_CNOC_NPU_CFG 266 +#define EIC7700_RESET_CNOC_LSP_CFG 267 +#define EIC7700_RESET_CNOC_HSP_CFG 268 +#define EIC7700_RESET_CNOC_GPU_CFG 269 +#define EIC7700_RESET_CNOC_DSPT_CFG 270 +#define EIC7700_RESET_CNOC_DDRT1_CFG 271 +#define EIC7700_RESET_CNOC_DDRT0_CFG 272 +#define EIC7700_RESET_CNOC_D2D_CFG 273 +#define EIC7700_RESET_CNOC_CFG 274 +#define EIC7700_RESET_CNOC_CLMM_CFG 275 +#define EIC7700_RESET_CNOC_AON_CFG 276 +#define EIC7700_RESET_LNOC_CFG 277 +#define EIC7700_RESET_LNOC_NPU_LLC 278 +#define EIC7700_RESET_LNOC_DDRC1_P0 279 +#define EIC7700_RESET_LNOC_DDRC0_P0 280 + +#endif /* __DT_ESWIN_EIC7700_RESET_H__ */ diff --git a/include/dt-bindings/reset/qcom,ipq5424-nsscc.h b/include/dt-bindings/reset/qcom,ipq5424-nsscc.h new file mode 100644 index 000000000000..9627e3b0ad30 --- /dev/null +++ b/include/dt-bindings/reset/qcom,ipq5424-nsscc.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef _DT_BINDINGS_RESET_QCOM_IPQ5424_NSSCC_H +#define _DT_BINDINGS_RESET_QCOM_IPQ5424_NSSCC_H + +#define NSS_CC_CE_APB_CLK_ARES 0 +#define NSS_CC_CE_AXI_CLK_ARES 1 +#define NSS_CC_DEBUG_CLK_ARES 2 +#define NSS_CC_EIP_CLK_ARES 3 +#define NSS_CC_NSS_CSR_CLK_ARES 4 +#define NSS_CC_NSSNOC_CE_APB_CLK_ARES 5 +#define NSS_CC_NSSNOC_CE_AXI_CLK_ARES 6 +#define NSS_CC_NSSNOC_EIP_CLK_ARES 7 +#define NSS_CC_NSSNOC_NSS_CSR_CLK_ARES 8 +#define NSS_CC_NSSNOC_PPE_CLK_ARES 9 +#define NSS_CC_NSSNOC_PPE_CFG_CLK_ARES 10 +#define NSS_CC_PORT1_MAC_CLK_ARES 11 +#define NSS_CC_PORT1_RX_CLK_ARES 12 +#define NSS_CC_PORT1_TX_CLK_ARES 13 +#define NSS_CC_PORT2_MAC_CLK_ARES 14 +#define NSS_CC_PORT2_RX_CLK_ARES 15 +#define NSS_CC_PORT2_TX_CLK_ARES 16 +#define NSS_CC_PORT3_MAC_CLK_ARES 17 +#define NSS_CC_PORT3_RX_CLK_ARES 18 +#define NSS_CC_PORT3_TX_CLK_ARES 19 +#define NSS_CC_PPE_BCR 20 +#define NSS_CC_PPE_EDMA_CLK_ARES 21 +#define NSS_CC_PPE_EDMA_CFG_CLK_ARES 22 +#define NSS_CC_PPE_SWITCH_BTQ_CLK_ARES 23 +#define NSS_CC_PPE_SWITCH_CLK_ARES 24 +#define NSS_CC_PPE_SWITCH_CFG_CLK_ARES 25 +#define NSS_CC_PPE_SWITCH_IPE_CLK_ARES 26 +#define NSS_CC_UNIPHY_PORT1_RX_CLK_ARES 27 +#define NSS_CC_UNIPHY_PORT1_TX_CLK_ARES 28 +#define NSS_CC_UNIPHY_PORT2_RX_CLK_ARES 29 +#define NSS_CC_UNIPHY_PORT2_TX_CLK_ARES 30 +#define NSS_CC_UNIPHY_PORT3_RX_CLK_ARES 31 +#define NSS_CC_UNIPHY_PORT3_TX_CLK_ARES 32 +#define NSS_CC_XGMAC0_PTP_REF_CLK_ARES 33 +#define NSS_CC_XGMAC1_PTP_REF_CLK_ARES 34 +#define NSS_CC_XGMAC2_PTP_REF_CLK_ARES 35 + +#endif diff --git a/include/dt-bindings/reset/thead,th1520-reset.h b/include/dt-bindings/reset/thead,th1520-reset.h index ee799286c175..ba6805b6b12a 100644 --- a/include/dt-bindings/reset/thead,th1520-reset.h +++ b/include/dt-bindings/reset/thead,th1520-reset.h @@ -7,11 +7,202 @@ #ifndef _DT_BINDINGS_TH1520_RESET_H #define _DT_BINDINGS_TH1520_RESET_H +/* AO Subsystem */ +#define TH1520_RESET_ID_SYSTEM 0 +#define TH1520_RESET_ID_RTC_APB 1 +#define TH1520_RESET_ID_RTC_REF 2 +#define TH1520_RESET_ID_AOGPIO_DB 3 +#define TH1520_RESET_ID_AOGPIO_APB 4 +#define TH1520_RESET_ID_AOI2C_APB 5 +#define TH1520_RESET_ID_PVT_APB 6 +#define TH1520_RESET_ID_E902_CORE 7 +#define TH1520_RESET_ID_E902_HAD 8 +#define TH1520_RESET_ID_AOTIMER_APB 9 +#define TH1520_RESET_ID_AOTIMER_CORE 10 +#define TH1520_RESET_ID_AOWDT_APB 11 +#define TH1520_RESET_ID_APSYS 12 +#define TH1520_RESET_ID_NPUSYS 13 +#define TH1520_RESET_ID_DDRSYS 14 +#define TH1520_RESET_ID_AXI_AP2CP 15 +#define TH1520_RESET_ID_AXI_CP2AP 16 +#define TH1520_RESET_ID_AXI_CP2SRAM 17 +#define TH1520_RESET_ID_AUDSYS_CORE 18 +#define TH1520_RESET_ID_AUDSYS_IOPMP 19 +#define TH1520_RESET_ID_AUDSYS 20 +#define TH1520_RESET_ID_DSP0 21 +#define TH1520_RESET_ID_DSP1 22 +#define TH1520_RESET_ID_GPU_MODULE 23 +#define TH1520_RESET_ID_VDEC 24 +#define TH1520_RESET_ID_VENC 25 +#define TH1520_RESET_ID_ADC_APB 26 +#define TH1520_RESET_ID_AUDGPIO_DB 27 +#define TH1520_RESET_ID_AUDGPIO_APB 28 +#define TH1520_RESET_ID_AOUART_IF 29 +#define TH1520_RESET_ID_AOUART_APB 30 +#define TH1520_RESET_ID_SRAM_AXI_P0 31 +#define TH1520_RESET_ID_SRAM_AXI_P1 32 +#define TH1520_RESET_ID_SRAM_AXI_P2 33 +#define TH1520_RESET_ID_SRAM_AXI_P3 34 +#define TH1520_RESET_ID_SRAM_AXI_P4 35 +#define TH1520_RESET_ID_SRAM_AXI_CORE 36 +#define TH1520_RESET_ID_SE 37 + +/* AP Subsystem */ +#define TH1520_RESET_ID_BROM 0 +#define TH1520_RESET_ID_C910_TOP 1 +#define TH1520_RESET_ID_NPU 2 +#define TH1520_RESET_ID_WDT0 3 +#define TH1520_RESET_ID_WDT1 4 +#define TH1520_RESET_ID_C910_C0 5 +#define TH1520_RESET_ID_C910_C1 6 +#define TH1520_RESET_ID_C910_C2 7 +#define TH1520_RESET_ID_C910_C3 8 +#define TH1520_RESET_ID_CHIP_DBG_CORE 9 +#define TH1520_RESET_ID_CHIP_DBG_AXI 10 +#define TH1520_RESET_ID_AXI4_CPUSYS2_AXI 11 +#define TH1520_RESET_ID_AXI4_CPUSYS2_APB 12 +#define TH1520_RESET_ID_X2H_CPUSYS 13 +#define TH1520_RESET_ID_AHB2_CPUSYS 14 +#define TH1520_RESET_ID_APB3_CPUSYS 15 +#define TH1520_RESET_ID_MBOX0_APB 16 +#define TH1520_RESET_ID_MBOX1_APB 17 +#define TH1520_RESET_ID_MBOX2_APB 18 +#define TH1520_RESET_ID_MBOX3_APB 19 +#define TH1520_RESET_ID_TIMER0_APB 20 +#define TH1520_RESET_ID_TIMER0_CORE 21 +#define TH1520_RESET_ID_TIMER1_APB 22 +#define TH1520_RESET_ID_TIMER1_CORE 23 +#define TH1520_RESET_ID_PERISYS_AHB 24 +#define TH1520_RESET_ID_PERISYS_APB1 25 +#define TH1520_RESET_ID_PERISYS_APB2 26 +#define TH1520_RESET_ID_GMAC0_APB 27 +#define TH1520_RESET_ID_GMAC0_AHB 28 +#define TH1520_RESET_ID_GMAC0_CLKGEN 29 +#define TH1520_RESET_ID_GMAC0_AXI 30 +#define TH1520_RESET_ID_UART0_APB 31 +#define TH1520_RESET_ID_UART0_IF 32 +#define TH1520_RESET_ID_UART1_APB 33 +#define TH1520_RESET_ID_UART1_IF 34 +#define TH1520_RESET_ID_UART2_APB 35 +#define TH1520_RESET_ID_UART2_IF 36 +#define TH1520_RESET_ID_UART3_APB 37 +#define TH1520_RESET_ID_UART3_IF 38 +#define TH1520_RESET_ID_UART4_APB 39 +#define TH1520_RESET_ID_UART4_IF 40 +#define TH1520_RESET_ID_UART5_APB 41 +#define TH1520_RESET_ID_UART5_IF 42 +#define TH1520_RESET_ID_QSPI0_IF 43 +#define TH1520_RESET_ID_QSPI0_APB 44 +#define TH1520_RESET_ID_QSPI1_IF 45 +#define TH1520_RESET_ID_QSPI1_APB 46 +#define TH1520_RESET_ID_SPI_IF 47 +#define TH1520_RESET_ID_SPI_APB 48 +#define TH1520_RESET_ID_I2C0_APB 49 +#define TH1520_RESET_ID_I2C0_CORE 50 +#define TH1520_RESET_ID_I2C1_APB 51 +#define TH1520_RESET_ID_I2C1_CORE 52 +#define TH1520_RESET_ID_I2C2_APB 53 +#define TH1520_RESET_ID_I2C2_CORE 54 +#define TH1520_RESET_ID_I2C3_APB 55 +#define TH1520_RESET_ID_I2C3_CORE 56 +#define TH1520_RESET_ID_I2C4_APB 57 +#define TH1520_RESET_ID_I2C4_CORE 58 +#define TH1520_RESET_ID_I2C5_APB 59 +#define TH1520_RESET_ID_I2C5_CORE 60 +#define TH1520_RESET_ID_GPIO0_DB 61 +#define TH1520_RESET_ID_GPIO0_APB 62 +#define TH1520_RESET_ID_GPIO1_DB 63 +#define TH1520_RESET_ID_GPIO1_APB 64 +#define TH1520_RESET_ID_GPIO2_DB 65 +#define TH1520_RESET_ID_GPIO2_APB 66 +#define TH1520_RESET_ID_PWM_COUNTER 67 +#define TH1520_RESET_ID_PWM_APB 68 +#define TH1520_RESET_ID_PADCTRL0_APB 69 +#define TH1520_RESET_ID_CPU2PERI_X2H 70 +#define TH1520_RESET_ID_CPU2AON_X2H 71 +#define TH1520_RESET_ID_AON2CPU_A2X 72 +#define TH1520_RESET_ID_NPUSYS_AXI 73 +#define TH1520_RESET_ID_NPUSYS_AXI_APB 74 +#define TH1520_RESET_ID_CPU2VP_X2P 75 +#define TH1520_RESET_ID_CPU2VI_X2H 76 +#define TH1520_RESET_ID_BMU_AXI 77 +#define TH1520_RESET_ID_BMU_APB 78 +#define TH1520_RESET_ID_DMAC_CPUSYS_AXI 79 +#define TH1520_RESET_ID_DMAC_CPUSYS_AHB 80 +#define TH1520_RESET_ID_SPINLOCK 81 +#define TH1520_RESET_ID_CFG2TEE 82 +#define TH1520_RESET_ID_DSMART 83 +#define TH1520_RESET_ID_GPIO3_DB 84 +#define TH1520_RESET_ID_GPIO3_APB 85 +#define TH1520_RESET_ID_PERI_I2S 86 +#define TH1520_RESET_ID_PERI_APB3 87 +#define TH1520_RESET_ID_PERI2PERI1_APB 88 +#define TH1520_RESET_ID_VPSYS_APB 89 +#define TH1520_RESET_ID_PERISYS_APB4 90 +#define TH1520_RESET_ID_GMAC1_APB 91 +#define TH1520_RESET_ID_GMAC1_AHB 92 +#define TH1520_RESET_ID_GMAC1_CLKGEN 93 +#define TH1520_RESET_ID_GMAC1_AXI 94 +#define TH1520_RESET_ID_GMAC_AXI 95 +#define TH1520_RESET_ID_GMAC_AXI_APB 96 +#define TH1520_RESET_ID_PADCTRL1_APB 97 +#define TH1520_RESET_ID_VOSYS_AXI 98 +#define TH1520_RESET_ID_VOSYS_AXI_APB 99 +#define TH1520_RESET_ID_VOSYS_AXI_X2X 100 +#define TH1520_RESET_ID_MISC2VP_X2X 101 +#define TH1520_RESET_ID_DSPSYS 102 +#define TH1520_RESET_ID_VISYS 103 +#define TH1520_RESET_ID_VOSYS 104 +#define TH1520_RESET_ID_VPSYS 105 + +/* DSP Subsystem */ +#define TH1520_RESET_ID_X2X_DSP1 0 +#define TH1520_RESET_ID_X2X_DSP0 1 +#define TH1520_RESET_ID_X2X_SLAVE_DSP1 2 +#define TH1520_RESET_ID_X2X_SLAVE_DSP0 3 +#define TH1520_RESET_ID_DSP0_CORE 4 +#define TH1520_RESET_ID_DSP0_DEBUG 5 +#define TH1520_RESET_ID_DSP0_APB 6 +#define TH1520_RESET_ID_DSP1_CORE 7 +#define TH1520_RESET_ID_DSP1_DEBUG 8 +#define TH1520_RESET_ID_DSP1_APB 9 +#define TH1520_RESET_ID_DSPSYS_APB 10 +#define TH1520_RESET_ID_AXI4_DSPSYS_SLV 11 +#define TH1520_RESET_ID_AXI4_DSPSYS 12 +#define TH1520_RESET_ID_AXI4_DSP_RS 13 + +/* MISC Subsystem */ +#define TH1520_RESET_ID_EMMC_SDIO_CLKGEN 0 +#define TH1520_RESET_ID_EMMC 1 +#define TH1520_RESET_ID_MISCSYS_AXI 2 +#define TH1520_RESET_ID_MISCSYS_AXI_APB 3 +#define TH1520_RESET_ID_SDIO0 4 +#define TH1520_RESET_ID_SDIO1 5 +#define TH1520_RESET_ID_USB3_APB 6 +#define TH1520_RESET_ID_USB3_PHY 7 +#define TH1520_RESET_ID_USB3_VCC 8 + +/* VI Subsystem */ +#define TH1520_RESET_ID_ISP0 0 +#define TH1520_RESET_ID_ISP1 1 +#define TH1520_RESET_ID_CSI0_APB 2 +#define TH1520_RESET_ID_CSI1_APB 3 +#define TH1520_RESET_ID_CSI2_APB 4 +#define TH1520_RESET_ID_MIPI_FIFO 5 +#define TH1520_RESET_ID_ISP_VENC_APB 6 +#define TH1520_RESET_ID_VIPRE_APB 7 +#define TH1520_RESET_ID_VIPRE_AXI 8 +#define TH1520_RESET_ID_DW200_APB 9 +#define TH1520_RESET_ID_VISYS3_AXI 10 +#define TH1520_RESET_ID_VISYS2_AXI 11 +#define TH1520_RESET_ID_VISYS1_AXI 12 +#define TH1520_RESET_ID_VISYS_AXI 13 +#define TH1520_RESET_ID_VISYS_APB 14 +#define TH1520_RESET_ID_ISP_VENC_AXI 15 + +/* VO Subsystem */ #define TH1520_RESET_ID_GPU 0 #define TH1520_RESET_ID_GPU_CLKGEN 1 -#define TH1520_RESET_ID_NPU 2 -#define TH1520_RESET_ID_WDT0 3 -#define TH1520_RESET_ID_WDT1 4 #define TH1520_RESET_ID_DPU_AHB 5 #define TH1520_RESET_ID_DPU_AXI 6 #define TH1520_RESET_ID_DPU_CORE 7 @@ -19,5 +210,27 @@ #define TH1520_RESET_ID_DSI1_APB 9 #define TH1520_RESET_ID_HDMI 10 #define TH1520_RESET_ID_HDMI_APB 11 +#define TH1520_RESET_ID_VOAXI 12 +#define TH1520_RESET_ID_VOAXI_APB 13 +#define TH1520_RESET_ID_X2H_DPU_AXI 14 +#define TH1520_RESET_ID_X2H_DPU_AHB 15 +#define TH1520_RESET_ID_X2H_DPU1_AXI 16 +#define TH1520_RESET_ID_X2H_DPU1_AHB 17 + +/* VP Subsystem */ +#define TH1520_RESET_ID_VPSYS_AXI_APB 0 +#define TH1520_RESET_ID_VPSYS_AXI 1 +#define TH1520_RESET_ID_FCE_APB 2 +#define TH1520_RESET_ID_FCE_CORE 3 +#define TH1520_RESET_ID_FCE_X2X_MASTER 4 +#define TH1520_RESET_ID_FCE_X2X_SLAVE 5 +#define TH1520_RESET_ID_G2D_APB 6 +#define TH1520_RESET_ID_G2D_ACLK 7 +#define TH1520_RESET_ID_G2D_CORE 8 +#define TH1520_RESET_ID_VDEC_APB 9 +#define TH1520_RESET_ID_VDEC_ACLK 10 +#define TH1520_RESET_ID_VDEC_CORE 11 +#define TH1520_RESET_ID_VENC_APB 12 +#define TH1520_RESET_ID_VENC_CORE 13 #endif /* _DT_BINDINGS_TH1520_RESET_H */ diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7a0b972eb1b1..b261fb3968d0 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -59,6 +59,9 @@ struct vgic_global { /* virtual control interface mapping, HYP VA */ void __iomem *vctrl_hyp; + /* Physical CPU interface, kernel VA */ + void __iomem *gicc_base; + /* Number of implemented list registers */ int nr_lr; @@ -120,6 +123,7 @@ struct irq_ops { struct vgic_irq { raw_spinlock_t irq_lock; /* Protects the content of the struct */ + u32 intid; /* Guest visible INTID */ struct rcu_head rcu; struct list_head ap_list; @@ -134,17 +138,18 @@ struct vgic_irq { * affinity reg (v3). */ - u32 intid; /* Guest visible INTID */ - bool line_level; /* Level only */ - bool pending_latch; /* The pending latch state used to calculate - * the pending state for both level - * and edge triggered IRQs. */ - bool active; - bool pending_release; /* Used for LPIs only, unreferenced IRQ + bool pending_release:1; /* Used for LPIs only, unreferenced IRQ * pending a release */ - bool enabled; - bool hw; /* Tied to HW IRQ */ + bool pending_latch:1; /* The pending latch state used to calculate + * the pending state for both level + * and edge triggered IRQs. */ + enum vgic_irq_config config:1; /* Level or edge */ + bool line_level:1; /* Level only */ + bool enabled:1; + bool active:1; + bool hw:1; /* Tied to HW IRQ */ + bool on_lr:1; /* Present in a CPU LR */ refcount_t refcount; /* Used for LPIs */ u32 hwintid; /* HW INTID number */ unsigned int host_irq; /* linux irq corresponding to hwintid */ @@ -156,7 +161,6 @@ struct vgic_irq { u8 active_source; /* GICv2 SGIs only */ u8 priority; u8 group; /* 0 == group 0, 1 == group 1 */ - enum vgic_irq_config config; /* Level or edge */ struct irq_ops *ops; @@ -259,6 +263,9 @@ struct vgic_dist { /* The GIC maintenance IRQ for nested hypervisors. */ u32 mi_intid; + /* Track the number of in-flight active SPIs */ + atomic_t active_spis; + /* base addresses in guest physical address space: */ gpa_t vgic_dist_base; /* distributor */ union { @@ -280,6 +287,7 @@ struct vgic_dist { struct vgic_irq *spis; struct vgic_io_device dist_iodev; + struct vgic_io_device cpuif_iodev; bool has_its; bool table_write_in_progress; @@ -417,6 +425,7 @@ bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid); +void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1); diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 0c2a8b846c20..ebd7f8935f96 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -80,6 +80,11 @@ extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) #define topology_cluster_cpumask(cpu) (&cpu_topology[cpu].cluster_sibling) #define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_sibling) + +#ifndef arch_cpu_is_threaded +#define arch_cpu_is_threaded() (0) +#endif + void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); diff --git a/include/linux/ata.h b/include/linux/ata.h index c9013e472aa3..54b416e26995 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -29,6 +29,7 @@ enum { ATA_MAX_SECTORS_128 = 128, ATA_MAX_SECTORS = 256, ATA_MAX_SECTORS_1024 = 1024, + ATA_MAX_SECTORS_8191 = 8191, ATA_MAX_SECTORS_LBA48 = 65535,/* avoid count to be 0000h */ ATA_MAX_SECTORS_TAPE = 65535, ATA_MAX_TRIM_RNUM = 64, /* 512-byte payload / (6-byte LBA + 2-byte range per entry) */ diff --git a/include/linux/cache_coherency.h b/include/linux/cache_coherency.h new file mode 100644 index 000000000000..cc81c5733e31 --- /dev/null +++ b/include/linux/cache_coherency.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Cache coherency maintenance operation device drivers + * + * Copyright Huawei 2025 + */ +#ifndef _LINUX_CACHE_COHERENCY_H_ +#define _LINUX_CACHE_COHERENCY_H_ + +#include <linux/list.h> +#include <linux/kref.h> +#include <linux/types.h> + +struct cc_inval_params { + phys_addr_t addr; + size_t size; +}; + +struct cache_coherency_ops_inst; + +struct cache_coherency_ops { + int (*wbinv)(struct cache_coherency_ops_inst *cci, + struct cc_inval_params *invp); + int (*done)(struct cache_coherency_ops_inst *cci); +}; + +struct cache_coherency_ops_inst { + struct kref kref; + struct list_head node; + const struct cache_coherency_ops *ops; +}; + +int cache_coherency_ops_instance_register(struct cache_coherency_ops_inst *cci); +void cache_coherency_ops_instance_unregister(struct cache_coherency_ops_inst *cci); + +struct cache_coherency_ops_inst * +_cache_coherency_ops_instance_alloc(const struct cache_coherency_ops *ops, + size_t size); +/** + * cache_coherency_ops_instance_alloc - Allocate cache coherency ops instance + * @ops: Cache maintenance operations + * @drv_struct: structure that contains the struct cache_coherency_ops_inst + * @member: Name of the struct cache_coherency_ops_inst member in @drv_struct. + * + * This allocates a driver specific structure and initializes the + * cache_coherency_ops_inst embedded in the drv_struct. Upon success the + * pointer must be freed via cache_coherency_ops_instance_put(). + * + * Returns a &drv_struct * on success, %NULL on error. + */ +#define cache_coherency_ops_instance_alloc(ops, drv_struct, member) \ + ({ \ + static_assert(__same_type(struct cache_coherency_ops_inst, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_cache_coherency_ops_instance_alloc(ops, \ + sizeof(drv_struct)); \ + }) +void cache_coherency_ops_instance_put(struct cache_coherency_ops_inst *cci); + +#endif diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 698520b1bfdb..ef65c75beeaa 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -64,8 +64,8 @@ extern void config_item_put(struct config_item *); struct config_item_type { struct module *ct_owner; - struct configfs_item_operations *ct_item_ops; - struct configfs_group_operations *ct_group_ops; + const struct configfs_item_operations *ct_item_ops; + const struct configfs_group_operations *ct_group_ops; struct configfs_attribute **ct_attrs; struct configfs_bin_attribute **ct_bin_attrs; }; diff --git a/include/linux/damon.h b/include/linux/damon.h index cae8c613c5fc..3813373a9200 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -91,17 +91,23 @@ struct damon_region { * @nr_regions: Number of monitoring target regions of this target. * @regions_list: Head of the monitoring target regions of this target. * @list: List head for siblings. + * @obsolete: Whether the commit destination target is obsolete. * * Each monitoring context could have multiple targets. For example, a context * for virtual memory address spaces could have multiple target processes. The * @pid should be set for appropriate &struct damon_operations including the * virtual address spaces monitoring operations. + * + * @obsolete is used only for damon_commit_targets() source targets, to specify + * the matching destination targets are obsolete. Read damon_commit_targets() + * to see how it is handled. */ struct damon_target { struct pid *pid; unsigned int nr_regions; struct list_head regions_list; struct list_head list; + bool obsolete; }; /** @@ -147,6 +153,8 @@ enum damos_action { * @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us. * @DAMOS_QUOTA_NODE_MEM_USED_BP: MemUsed ratio of a node. * @DAMOS_QUOTA_NODE_MEM_FREE_BP: MemFree ratio of a node. + * @DAMOS_QUOTA_NODE_MEMCG_USED_BP: MemUsed ratio of a node for a cgroup. + * @DAMOS_QUOTA_NODE_MEMCG_FREE_BP: MemFree ratio of a node for a cgroup. * @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics. * * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported. @@ -156,6 +164,8 @@ enum damos_quota_goal_metric { DAMOS_QUOTA_SOME_MEM_PSI_US, DAMOS_QUOTA_NODE_MEM_USED_BP, DAMOS_QUOTA_NODE_MEM_FREE_BP, + DAMOS_QUOTA_NODE_MEMCG_USED_BP, + DAMOS_QUOTA_NODE_MEMCG_FREE_BP, NR_DAMOS_QUOTA_GOAL_METRICS, }; @@ -166,6 +176,7 @@ enum damos_quota_goal_metric { * @current_value: Current value of @metric. * @last_psi_total: Last measured total PSI * @nid: Node id. + * @memcg_id: Memcg id. * @list: List head for siblings. * * Data structure for getting the current score of the quota tuning goal. The @@ -176,6 +187,12 @@ enum damos_quota_goal_metric { * If @metric is DAMOS_QUOTA_USER_INPUT, @current_value should be manually * entered by the user, probably inside the kdamond callbacks. Otherwise, * DAMON sets @current_value with self-measured value of @metric. + * + * If @metric is DAMOS_QUOTA_NODE_MEM_{USED,FREE}_BP, @nid represents the node + * id of the target node to account the used/free memory. + * + * If @metric is DAMOS_QUOTA_NODE_MEMCG_{USED,FREE}_BP, @nid and @memcg_id + * represents the node id and the cgroup to account the used memory for. */ struct damos_quota_goal { enum damos_quota_goal_metric metric; @@ -184,7 +201,10 @@ struct damos_quota_goal { /* metric-dependent fields */ union { u64 last_psi_total; - int nid; + struct { + int nid; + unsigned short memcg_id; + }; }; struct list_head list; }; @@ -472,7 +492,7 @@ struct damos_migrate_dests { * @wmarks: Watermarks for automated (in)activation of this scheme. * @migrate_dests: Destination nodes if @action is "migrate_{hot,cold}". * @target_nid: Destination node if @action is "migrate_{hot,cold}". - * @filters: Additional set of &struct damos_filter for &action. + * @core_filters: Additional set of &struct damos_filter for &action. * @ops_filters: ops layer handling &struct damos_filter objects list. * @last_applied: Last @action applied ops-managing entity. * @stat: Statistics of this scheme. @@ -498,7 +518,7 @@ struct damos_migrate_dests { * * Before applying the &action to a memory region, &struct damon_operations * implementation could check pages of the region and skip &action to respect - * &filters + * &core_filters * * The minimum entity that @action can be applied depends on the underlying * &struct damon_operations. Since it may not be aligned with the core layer @@ -542,7 +562,7 @@ struct damos { struct damos_migrate_dests migrate_dests; }; }; - struct list_head filters; + struct list_head core_filters; struct list_head ops_filters; void *last_applied; struct damos_stat stat; @@ -851,11 +871,11 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damos_for_each_quota_goal_safe(goal, next, quota) \ list_for_each_entry_safe(goal, next, &(quota)->goals, list) -#define damos_for_each_filter(f, scheme) \ - list_for_each_entry(f, &(scheme)->filters, list) +#define damos_for_each_core_filter(f, scheme) \ + list_for_each_entry(f, &(scheme)->core_filters, list) -#define damos_for_each_filter_safe(f, next, scheme) \ - list_for_each_entry_safe(f, next, &(scheme)->filters, list) +#define damos_for_each_core_filter_safe(f, next, scheme) \ + list_for_each_entry_safe(f, next, &(scheme)->core_filters, list) #define damos_for_each_ops_filter(f, scheme) \ list_for_each_entry(f, &(scheme)->ops_filters, list) @@ -947,7 +967,8 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control); int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); int damon_set_region_biggest_system_ram_default(struct damon_target *t, - unsigned long *start, unsigned long *end); + unsigned long *start, unsigned long *end, + unsigned long min_sz_region); #endif /* CONFIG_DAMON */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c83e02b94389..898c60d21c92 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -198,7 +198,6 @@ enum dentry_flags { DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ DCACHE_CANT_MOUNT = BIT(8), - DCACHE_GENOCIDE = BIT(9), DCACHE_SHRINK_LIST = BIT(10), DCACHE_OP_WEAK_REVALIDATE = BIT(11), /* @@ -225,6 +224,7 @@ enum dentry_flags { DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ DCACHE_DENTRY_CURSOR = BIT(25), DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ + DCACHE_PERSISTENT = BIT(27) }; #define DCACHE_MANAGED_DENTRY \ @@ -268,6 +268,8 @@ extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); extern void d_prune_aliases(struct inode *); +extern void d_dispose_if_unused(struct dentry *, struct list_head *); +extern void shrink_dentry_list(struct list_head *); extern struct dentry *d_find_alias_rcu(struct inode *); @@ -610,5 +612,7 @@ static inline struct dentry *d_next_sibling(const struct dentry *dentry) } void set_default_d_op(struct super_block *, const struct dentry_operations *); +struct dentry *d_make_persistent(struct dentry *, struct inode *); +void d_make_discardable(struct dentry *dentry); #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/device.h b/include/linux/device.h index b031ff71a5bd..0be95294b6e6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -281,25 +281,6 @@ int __must_check device_create_bin_file(struct device *dev, void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); -/** - * devm_alloc_percpu - Resource-managed alloc_percpu - * @dev: Device to allocate per-cpu memory for - * @type: Type to allocate per-cpu memory for - * - * Managed alloc_percpu. Per-cpu memory allocated with this function is - * automatically freed on driver detach. - * - * RETURNS: - * Pointer to allocated memory on success, NULL on failure. - */ -#define devm_alloc_percpu(dev, type) \ - ((typeof(type) __percpu *)__devm_alloc_percpu((dev), sizeof(type), \ - __alignof__(type))) - -void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, - size_t align); -void devm_free_percpu(struct device *dev, void __percpu *pdata); - struct device_dma_parameters { /* * a low level driver may set these to teach IOMMU code about diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h index 8c5f57e0d613..9c1e3d643d69 100644 --- a/include/linux/device/devres.h +++ b/include/linux/device/devres.h @@ -9,6 +9,7 @@ #include <linux/stdarg.h> #include <linux/types.h> #include <asm/bug.h> +#include <asm/percpu.h> struct device; struct device_node; @@ -96,6 +97,22 @@ devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap); char * __printf(3, 4) __malloc devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...); +/** + * devm_alloc_percpu - Resource-managed alloc_percpu + * @dev: Device to allocate per-cpu memory for + * @type: Type to allocate per-cpu memory for + * + * Managed alloc_percpu. Per-cpu memory allocated with this function is + * automatically freed on driver detach. + * + * RETURNS: + * Pointer to allocated memory on success, NULL on failure. + */ +#define devm_alloc_percpu(dev, type) \ + ((typeof(type) __percpu *)__devm_alloc_percpu((dev), sizeof(type), __alignof__(type))) + +void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, size_t align); + unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); void devm_free_pages(struct device *dev, unsigned long addr); diff --git a/include/linux/err.h b/include/linux/err.h index 1d60aa86db53..8c37be0620ab 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -41,6 +41,14 @@ static inline void * __must_check ERR_PTR(long error) return (void *) error; } +/** + * INIT_ERR_PTR - Init a const error pointer. + * @error: A negative error code. + * + * Like ERR_PTR(), but usable to initialize static variables. + */ +#define INIT_ERR_PTR(error) ((void *)(error)) + /* Return the pointer in the percpu address space. */ #define ERR_PTR_PCPU(error) ((void __percpu *)(unsigned long)ERR_PTR(error)) diff --git a/include/linux/firmware/qcom/qcom_tzmem.h b/include/linux/firmware/qcom/qcom_tzmem.h index 48ac0e5454c7..23173e0c3ddd 100644 --- a/include/linux/firmware/qcom/qcom_tzmem.h +++ b/include/linux/firmware/qcom/qcom_tzmem.h @@ -17,11 +17,20 @@ struct qcom_tzmem_pool; * enum qcom_tzmem_policy - Policy for pool growth. */ enum qcom_tzmem_policy { - /**< Static pool, never grow above initial size. */ + /** + * @QCOM_TZMEM_POLICY_STATIC: Static pool, + * never grow above initial size. + */ QCOM_TZMEM_POLICY_STATIC = 1, - /**< When out of memory, add increment * current size of memory. */ + /** + * @QCOM_TZMEM_POLICY_MULTIPLIER: When out of memory, + * add increment * current size of memory. + */ QCOM_TZMEM_POLICY_MULTIPLIER, - /**< When out of memory add as much as is needed until max_size. */ + /** + * @QCOM_TZMEM_POLICY_ON_DEMAND: When out of memory + * add as much as is needed until max_size. + */ QCOM_TZMEM_POLICY_ON_DEMAND, }; diff --git a/include/linux/firmware/xlnx-zynqmp-ufs.h b/include/linux/firmware/xlnx-zynqmp-ufs.h new file mode 100644 index 000000000000..d3538dd5822a --- /dev/null +++ b/include/linux/firmware/xlnx-zynqmp-ufs.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Firmware layer for UFS APIs. + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __FIRMWARE_XLNX_ZYNQMP_UFS_H__ +#define __FIRMWARE_XLNX_ZYNQMP_UFS_H__ + +#if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) +int zynqmp_pm_is_mphy_tx_rx_config_ready(bool *is_ready); +int zynqmp_pm_is_sram_init_done(bool *is_done); +int zynqmp_pm_set_sram_bypass(void); +int zynqmp_pm_get_ufs_calibration_values(u32 *val); +#else +static inline int zynqmp_pm_is_mphy_tx_rx_config_ready(bool *is_ready) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_is_sram_init_done(bool *is_done) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_set_sram_bypass(void) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_get_ufs_calibration_values(u32 *val) +{ + return -ENODEV; +} +#endif + +#endif /* __FIRMWARE_XLNX_ZYNQMP_UFS_H__ */ diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index ae48d619c4e0..15fdbd089bbf 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -3,7 +3,7 @@ * Xilinx Zynq MPSoC Firmware layer * * Copyright (C) 2014-2021 Xilinx - * Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. + * Copyright (C) 2022 - 2025 Advanced Micro Devices, Inc. * * Michal Simek <michal.simek@amd.com> * Davorin Mista <davorin.mista@aggios.com> @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/err.h> +#include <linux/firmware/xlnx-zynqmp-ufs.h> #define ZYNQMP_PM_VERSION_MAJOR 1 #define ZYNQMP_PM_VERSION_MINOR 0 @@ -51,16 +52,10 @@ #define PM_PINCTRL_PARAM_SET_VERSION 2 -#define ZYNQMP_FAMILY_CODE 0x23 -#define VERSAL_FAMILY_CODE 0x26 - -/* When all subfamily of platform need to support */ -#define ALL_SUB_FAMILY_CODE 0x00 -#define VERSAL_SUB_FAMILY_CODE 0x01 -#define VERSALNET_SUB_FAMILY_CODE 0x03 - -#define FAMILY_CODE_MASK GENMASK(27, 21) -#define SUB_FAMILY_CODE_MASK GENMASK(20, 19) +/* Family codes */ +#define PM_ZYNQMP_FAMILY_CODE 0x1 /* ZynqMP family code */ +#define PM_VERSAL_FAMILY_CODE 0x2 /* Versal family code */ +#define PM_VERSAL_NET_FAMILY_CODE 0x3 /* Versal NET family code */ #define API_ID_MASK GENMASK(7, 0) #define MODULE_ID_MASK GENMASK(11, 8) @@ -164,6 +159,7 @@ enum pm_api_cb_id { enum pm_api_id { PM_API_FEATURES = 0, PM_GET_API_VERSION = 1, + PM_GET_NODE_STATUS = 3, PM_REGISTER_NOTIFIER = 5, PM_FORCE_POWERDOWN = 8, PM_REQUEST_WAKEUP = 10, @@ -241,6 +237,7 @@ enum pm_ioctl_id { IOCTL_GET_FEATURE_CONFIG = 27, /* IOCTL for Secure Read/Write Interface */ IOCTL_READ_REG = 28, + IOCTL_MASK_WRITE_REG = 29, /* Dynamic SD/GEM configuration */ IOCTL_SET_SD_CONFIG = 30, IOCTL_SET_GEM_CONFIG = 31, @@ -564,7 +561,7 @@ int zynqmp_pm_invoke_fw_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...); #if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) int zynqmp_pm_get_api_version(u32 *version); int zynqmp_pm_get_chipid(u32 *idcode, u32 *version); -int zynqmp_pm_get_family_info(u32 *family, u32 *subfamily); +int zynqmp_pm_get_family_info(u32 *family); int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out); int zynqmp_pm_clock_enable(u32 clock_id); int zynqmp_pm_clock_disable(u32 clock_id); @@ -619,6 +616,9 @@ int zynqmp_pm_feature(const u32 api_id); int zynqmp_pm_is_function_supported(const u32 api_id, const u32 id); int zynqmp_pm_set_feature_config(enum pm_feature_config_id id, u32 value); int zynqmp_pm_get_feature_config(enum pm_feature_config_id id, u32 *payload); +int zynqmp_pm_sec_read_reg(u32 node_id, u32 offset, u32 *ret_value); +int zynqmp_pm_sec_mask_write_reg(const u32 node_id, const u32 offset, + u32 mask, u32 value); int zynqmp_pm_register_sgi(u32 sgi_num, u32 reset); int zynqmp_pm_force_pwrdwn(const u32 target, const enum zynqmp_pm_request_ack ack); @@ -629,6 +629,8 @@ int zynqmp_pm_request_wake(const u32 node, int zynqmp_pm_get_rpu_mode(u32 node_id, enum rpu_oper_mode *rpu_mode); int zynqmp_pm_set_rpu_mode(u32 node_id, enum rpu_oper_mode rpu_mode); int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mode); +int zynqmp_pm_get_node_status(const u32 node, u32 *const status, + u32 *const requirements, u32 *const usage); int zynqmp_pm_set_sd_config(u32 node, enum pm_sd_config_type config, u32 value); int zynqmp_pm_set_gem_config(u32 node, enum pm_gem_config_type config, u32 value); @@ -643,7 +645,7 @@ static inline int zynqmp_pm_get_chipid(u32 *idcode, u32 *version) return -ENODEV; } -static inline int zynqmp_pm_get_family_info(u32 *family, u32 *subfamily) +static inline int zynqmp_pm_get_family_info(u32 *family) { return -ENODEV; } @@ -916,6 +918,17 @@ static inline int zynqmp_pm_request_wake(const u32 node, return -ENODEV; } +static inline int zynqmp_pm_sec_read_reg(u32 node_id, u32 offset, u32 *ret_value) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_sec_mask_write_reg(const u32 node_id, const u32 offset, + u32 mask, u32 value) +{ + return -ENODEV; +} + static inline int zynqmp_pm_get_rpu_mode(u32 node_id, enum rpu_oper_mode *rpu_mode) { return -ENODEV; @@ -931,6 +944,13 @@ static inline int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mo return -ENODEV; } +static inline int zynqmp_pm_get_node_status(const u32 node, u32 *const status, + u32 *const requirements, + u32 *const usage) +{ + return -ENODEV; +} + static inline int zynqmp_pm_set_sd_config(u32 node, enum pm_sd_config_type config, u32 value) diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index 7964db96e41a..0a3bcd1718f3 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -7,6 +7,7 @@ #include <linux/ftrace.h> #include <linux/rcupdate.h> #include <linux/refcount.h> +#include <linux/rhashtable.h> #include <linux/slab.h> struct fprobe; @@ -26,7 +27,7 @@ typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip, * @fp: The fprobe which owns this. */ struct fprobe_hlist_node { - struct hlist_node hlist; + struct rhlist_head hlist; unsigned long addr; struct fprobe *fp; }; diff --git a/include/linux/fs.h b/include/linux/fs.h index ce25feb06727..04ceeca12a0d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2041,14 +2041,14 @@ static inline bool can_mmap_file(struct file *file) return true; } -int __compat_vma_mmap_prepare(const struct file_operations *f_op, +int __compat_vma_mmap(const struct file_operations *f_op, struct file *file, struct vm_area_struct *vma); -int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); +int compat_vma_mmap(struct file *file, struct vm_area_struct *vma); static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) { if (file->f_op->mmap_prepare) - return compat_vma_mmap_prepare(file, vma); + return compat_vma_mmap(file, vma); return file->f_op->mmap(file, vma); } @@ -2310,7 +2310,6 @@ void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); -void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); @@ -3191,6 +3190,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); +extern void __simple_unlink(struct inode *, struct dentry *); +extern void __simple_rmdir(struct inode *, struct dentry *); void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, @@ -3200,6 +3201,8 @@ extern int simple_rename(struct mnt_idmap *, struct inode *, unsigned int); extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); +extern void simple_remove_by_name(struct dentry *, const char *, + void (*callback)(struct dentry *)); extern void locked_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); @@ -3229,6 +3232,7 @@ extern int simple_fill_super(struct super_block *, unsigned long, extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); struct dentry *simple_start_creating(struct dentry *, const char *); +void simple_done_creating(struct dentry *); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015dd1049bea..770f0dc993cc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1167,17 +1167,14 @@ static inline void ftrace_init(void) { } */ struct ftrace_graph_ent { unsigned long func; /* Current function */ - int depth; + unsigned long depth; } __packed; /* * Structure that defines an entry function trace with retaddr. - * It's already packed but the attribute "packed" is needed - * to remove extra padding at the end. */ struct fgraph_retaddr_ent { - unsigned long func; /* Current function */ - int depth; + struct ftrace_graph_ent ent; unsigned long retaddr; /* Return address */ } __packed; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 623bee335383..b155929af5b1 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -387,7 +387,7 @@ extern void free_pages(unsigned long addr, unsigned int order); #define free_page(addr) free_pages((addr), 0) void page_alloc_init_cpuhp(void); -int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp); +bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp); void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 11cab07f322a..ae7f21aad0ac 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -364,20 +364,35 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); +enum split_type { + SPLIT_TYPE_UNIFORM, + SPLIT_TYPE_NON_UNIFORM, +}; + bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins); -int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, +int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); +int folio_split_unmapped(struct folio *folio, unsigned int new_order); int min_order_for_split(struct folio *folio); int split_folio_to_list(struct folio *folio, struct list_head *list); -bool uniform_split_supported(struct folio *folio, unsigned int new_order, - bool warns); -bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, - bool warns); +bool folio_split_supported(struct folio *folio, unsigned int new_order, + enum split_type split_type, bool warns); int folio_split(struct folio *folio, unsigned int new_order, struct page *page, struct list_head *list); -/* - * try_folio_split_to_order - try to split a @folio at @page to @new_order using - * non uniform split. + +static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order) +{ + return __split_huge_page_to_list_to_order(page, list, new_order); +} +static inline int split_huge_page_to_order(struct page *page, unsigned int new_order) +{ + return split_huge_page_to_list_to_order(page, NULL, new_order); +} + +/** + * try_folio_split_to_order() - try to split a @folio at @page to @new_order + * using non uniform split. * @folio: folio to be split * @page: split to @new_order at the given page * @new_order: the target split order @@ -387,14 +402,13 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page, * folios are put back to LRU list. Use min_order_for_split() to get the lower * bound of @new_order. * - * Return: 0: split is successful, otherwise split failed. + * Return: 0 - split is successful, otherwise split failed. */ static inline int try_folio_split_to_order(struct folio *folio, struct page *page, unsigned int new_order) { - if (!non_uniform_split_supported(folio, new_order, /* warns= */ false)) - return split_huge_page_to_list_to_order(&folio->page, NULL, - new_order); + if (!folio_split_supported(folio, new_order, SPLIT_TYPE_NON_UNIFORM, /* warns= */ false)) + return split_huge_page_to_order(&folio->page, new_order); return folio_split(folio, new_order, page, NULL); } static inline int split_huge_page(struct page *page) @@ -402,14 +416,43 @@ static inline int split_huge_page(struct page *page) return split_huge_page_to_list_to_order(page, NULL, 0); } void deferred_split_folio(struct folio *folio, bool partially_mapped); +#ifdef CONFIG_MEMCG +void reparent_deferred_split_queue(struct mem_cgroup *memcg); +#endif void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze); +/** + * pmd_is_huge() - Is this PMD either a huge PMD entry or a software leaf entry? + * @pmd: The PMD to check. + * + * A huge PMD entry is a non-empty entry which is present and marked huge or a + * software leaf entry. This check be performed without the appropriate locks + * held, in which case the condition should be rechecked after they are + * acquired. + * + * Returns: true if this PMD is huge, false otherwise. + */ +static inline bool pmd_is_huge(pmd_t pmd) +{ + if (pmd_present(pmd)) { + return pmd_trans_huge(pmd); + } else if (!pmd_none(pmd)) { + /* + * Non-present PMDs must be valid huge non-present entries. We + * cannot assert that here due to header dependency issues. + */ + return true; + } + + return false; +} + #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ - if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)) \ + if (pmd_is_huge(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false); \ } while (0) @@ -447,19 +490,14 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); -static inline int is_swap_pmd(pmd_t pmd) -{ - return !pmd_none(pmd) && !pmd_present(pmd); -} - /* mmap_lock must be held on entry */ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) + if (pmd_is_huge(*pmd)) return __pmd_trans_huge_lock(pmd, vma); - else - return NULL; + + return NULL; } static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) @@ -473,6 +511,8 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, /** * folio_test_pmd_mappable - Can we map this folio with a PMD? * @folio: The folio to test + * + * Return: true - @folio can be mapped, false - @folio cannot be mapped. */ static inline bool folio_test_pmd_mappable(struct folio *folio) { @@ -481,6 +521,8 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); +vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf); + extern struct folio *huge_zero_folio; extern unsigned long huge_zero_pfn; @@ -524,6 +566,8 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, bool freeze); bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, struct folio *folio); +void map_anon_folio_pmd_nopf(struct folio *folio, pmd_t *pmd, + struct vm_area_struct *vma, unsigned long haddr); #else /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -576,6 +620,11 @@ split_huge_page_to_list_to_order(struct page *page, struct list_head *list, VM_WARN_ON_ONCE_PAGE(1, page); return -EINVAL; } +static inline int split_huge_page_to_order(struct page *page, unsigned int new_order) +{ + VM_WARN_ON_ONCE_PAGE(1, page); + return -EINVAL; +} static inline int split_huge_page(struct page *page) { VM_WARN_ON_ONCE_PAGE(1, page); @@ -602,6 +651,7 @@ static inline int try_folio_split_to_order(struct folio *folio, } static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} +static inline void reparent_deferred_split_queue(struct mem_cgroup *memcg) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) @@ -642,10 +692,6 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, struct vm_area_struct *next) { } -static inline int is_swap_pmd(pmd_t pmd) -{ - return 0; -} static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { @@ -662,6 +708,11 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) return 0; } +static inline vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) +{ + return 0; +} + static inline bool is_huge_zero_folio(const struct folio *folio) { return false; @@ -682,12 +733,6 @@ static inline void mm_put_huge_zero_folio(struct mm_struct *mm) return; } -static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, - unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap) -{ - return NULL; -} - static inline bool thp_migration_supported(void) { return false; @@ -720,6 +765,11 @@ static inline struct folio *get_persistent_huge_zero_folio(void) { return NULL; } + +static inline bool pmd_is_huge(pmd_t pmd) +{ + return false; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int split_folio_to_list_to_order(struct folio *folio, diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8e63e46b8e1f..019a1c5281e4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -150,8 +150,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, struct folio **foliop); #endif /* CONFIG_USERFAULTFD */ long hugetlb_reserve_pages(struct inode *inode, long from, long to, - struct vm_area_struct *vma, - vm_flags_t vm_flags); + struct vm_area_desc *desc, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); @@ -172,7 +171,7 @@ bool hugetlbfs_pagecache_present(struct hstate *h, struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); -extern int sysctl_hugetlb_shm_group; +extern int sysctl_hugetlb_shm_group __read_mostly; extern struct list_head huge_boot_pages[MAX_NUMNODES]; void hugetlb_bootmem_alloc(void); @@ -275,11 +274,10 @@ void hugetlb_vma_lock_release(struct kref *kref); long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags); -bool is_hugetlb_entry_migration(pte_t pte); -bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); void fixup_hugetlb_reservations(struct vm_area_struct *vma); void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); +int hugetlb_vma_lock_alloc(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -466,6 +464,11 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} +static inline int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) +{ + return 0; +} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h index 0660a03d37d9..a27aa0162918 100644 --- a/include/linux/hugetlb_inline.h +++ b/include/linux/hugetlb_inline.h @@ -2,22 +2,27 @@ #ifndef _LINUX_HUGETLB_INLINE_H #define _LINUX_HUGETLB_INLINE_H -#ifdef CONFIG_HUGETLB_PAGE - #include <linux/mm.h> -static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) +#ifdef CONFIG_HUGETLB_PAGE + +static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags) { - return !!(vma->vm_flags & VM_HUGETLB); + return !!(vm_flags & VM_HUGETLB); } #else -static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) +static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags) { return false; } #endif +static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) +{ + return is_vm_hugetlb_flags(vma->vm_flags); +} + #endif diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 801b2bd9e8d4..8c66284a91a8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1135,7 +1135,9 @@ struct iommu_sva { struct iommu_mm_data { u32 pasid; + struct mm_struct *mm; struct list_head sva_domains; + struct list_head mm_list_elm; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode); @@ -1616,6 +1618,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1640,6 +1643,7 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) } static inline void mm_pasid_drop(struct mm_struct *mm) {} +static inline void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end) {} #endif /* CONFIG_IOMMU_SVA */ #ifdef CONFIG_IOMMU_IOPF diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 2223f95079ce..d45fa19f9e47 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -86,7 +86,13 @@ #define GICH_HCR_EN (1 << 0) #define GICH_HCR_UIE (1 << 1) +#define GICH_HCR_LRENPIE (1 << 2) #define GICH_HCR_NPIE (1 << 3) +#define GICH_HCR_VGrp0EIE (1 << 4) +#define GICH_HCR_VGrp0DIE (1 << 5) +#define GICH_HCR_VGrp1EIE (1 << 6) +#define GICH_HCR_VGrp1DIE (1 << 7) +#define GICH_HCR_EOICOUNT GENMASK(31, 27) #define GICH_LR_VIRTUALID (0x3ff << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index a470a73a805a..67d9d960273b 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -24,6 +24,8 @@ struct gic_kvm_info { enum gic_type type; /* Virtual CPU interface */ struct resource vcpu; + /* GICv2 GICC VA */ + void __iomem *gicc_base; /* Interrupt number */ unsigned int maint_irq; /* No interrupt mask, no need to use the above field */ diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 0d1927da8055..fdef2c155c27 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -611,4 +611,16 @@ extern unsigned long nsecs_to_jiffies(u64 n); #define TIMESTAMP_SIZE 30 +struct ctl_table; +int proc_dointvec_jiffies(const struct ctl_table *table, int dir, void *buffer, + size_t *lenp, loff_t *ppos); +int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int dir, + void *buffer, size_t *lenp, loff_t *ppos); +int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int dir, + void *buffer, size_t *lenp, loff_t *ppos); +int proc_dointvec_ms_jiffies(const struct ctl_table *table, int dir, void *buffer, + size_t *lenp, loff_t *ppos); +int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int dir, + void *buffer, size_t *lenp, loff_t *ppos); + #endif diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d12e1a5f5a9a..f335c1d7b61d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -571,11 +571,27 @@ static inline void kasan_init_hw_tags(void) { } #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); -int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask); -void kasan_release_vmalloc(unsigned long start, unsigned long end, +int __kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask); +static inline int kasan_populate_vmalloc(unsigned long addr, + unsigned long size, gfp_t gfp_mask) +{ + if (kasan_enabled()) + return __kasan_populate_vmalloc(addr, size, gfp_mask); + return 0; +} +void __kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end, unsigned long flags); +static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end, + unsigned long flags) +{ + if (kasan_enabled()) + return __kasan_release_vmalloc(start, end, free_region_start, + free_region_end, flags); +} #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h index f2fd221107bb..7da9fd506b39 100644 --- a/include/linux/kmsan.h +++ b/include/linux/kmsan.h @@ -133,6 +133,7 @@ void kmsan_kfree_large(const void *ptr); * @prot: page protection flags used for vmap. * @pages: array of pages. * @page_shift: page_shift passed to vmap_range_noflush(). + * @gfp_mask: gfp_mask to use internally. * * KMSAN maps shadow and origin pages of @pages into contiguous ranges in * vmalloc metadata address range. Returns 0 on success, callers must check @@ -142,7 +143,8 @@ int __must_check kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end, pgprot_t prot, struct page **pages, - unsigned int page_shift); + unsigned int page_shift, + gfp_t gfp_mask); /** * kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap. @@ -347,7 +349,7 @@ static inline void kmsan_kfree_large(const void *ptr) static inline int __must_check kmsan_vmap_pages_range_noflush( unsigned long start, unsigned long end, pgprot_t prot, - struct page **pages, unsigned int page_shift) + struct page **pages, unsigned int page_shift, gfp_t gfp_mask) { return 0; } diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 067538fc4d58..c982694c987b 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -17,7 +17,7 @@ #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, vm_flags_t *vm_flags); -vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file, +vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file, vm_flags_t vm_flags); int ksm_enable_merge_any(struct mm_struct *mm); int ksm_disable_merge_any(struct mm_struct *mm); @@ -103,7 +103,7 @@ bool ksm_process_mergeable(struct mm_struct *mm); #else /* !CONFIG_KSM */ -static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, +static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file, vm_flags_t vm_flags) { return vm_flags; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5bd76cf394fa..d93f75b05ae2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1557,6 +1557,8 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); +long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg); vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); @@ -2437,18 +2439,6 @@ static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_NO_POLL */ -#ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL -long kvm_arch_vcpu_async_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg); -#else -static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, - unsigned int ioctl, - unsigned long arg) -{ - return -ENOIOCTLCMD; -} -#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ - void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE diff --git a/include/linux/leafops.h b/include/linux/leafops.h new file mode 100644 index 000000000000..cfafe7a5e7b1 --- /dev/null +++ b/include/linux/leafops.h @@ -0,0 +1,619 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Describes operations that can be performed on software-defined page table + * leaf entries. These are abstracted from the hardware page table entries + * themselves by the softleaf_t type, see mm_types.h. + */ +#ifndef _LINUX_LEAFOPS_H +#define _LINUX_LEAFOPS_H + +#include <linux/mm_types.h> +#include <linux/swapops.h> +#include <linux/swap.h> + +#ifdef CONFIG_MMU + +/* Temporary until swp_entry_t eliminated. */ +#define LEAF_TYPE_SHIFT SWP_TYPE_SHIFT + +enum softleaf_type { + /* Fundamental types. */ + SOFTLEAF_NONE, + SOFTLEAF_SWAP, + /* Migration types. */ + SOFTLEAF_MIGRATION_READ, + SOFTLEAF_MIGRATION_READ_EXCLUSIVE, + SOFTLEAF_MIGRATION_WRITE, + /* Device types. */ + SOFTLEAF_DEVICE_PRIVATE_READ, + SOFTLEAF_DEVICE_PRIVATE_WRITE, + SOFTLEAF_DEVICE_EXCLUSIVE, + /* H/W posion types. */ + SOFTLEAF_HWPOISON, + /* Marker types. */ + SOFTLEAF_MARKER, +}; + +/** + * softleaf_mk_none() - Create an empty ('none') leaf entry. + * Returns: empty leaf entry. + */ +static inline softleaf_t softleaf_mk_none(void) +{ + return ((softleaf_t) { 0 }); +} + +/** + * softleaf_from_pte() - Obtain a leaf entry from a PTE entry. + * @pte: PTE entry. + * + * If @pte is present (therefore not a leaf entry) the function returns an empty + * leaf entry. Otherwise, it returns a leaf entry. + * + * Returns: Leaf entry. + */ +static inline softleaf_t softleaf_from_pte(pte_t pte) +{ + softleaf_t arch_entry; + + if (pte_present(pte) || pte_none(pte)) + return softleaf_mk_none(); + + pte = pte_swp_clear_flags(pte); + arch_entry = __pte_to_swp_entry(pte); + + /* Temporary until swp_entry_t eliminated. */ + return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); +} + +/** + * softleaf_to_pte() - Obtain a PTE entry from a leaf entry. + * @entry: Leaf entry. + * + * This generates an architecture-specific PTE entry that can be utilised to + * encode the metadata the leaf entry encodes. + * + * Returns: Architecture-specific PTE entry encoding leaf entry. + */ +static inline pte_t softleaf_to_pte(softleaf_t entry) +{ + /* Temporary until swp_entry_t eliminated. */ + return swp_entry_to_pte(entry); +} + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +/** + * softleaf_from_pmd() - Obtain a leaf entry from a PMD entry. + * @pmd: PMD entry. + * + * If @pmd is present (therefore not a leaf entry) the function returns an empty + * leaf entry. Otherwise, it returns a leaf entry. + * + * Returns: Leaf entry. + */ +static inline softleaf_t softleaf_from_pmd(pmd_t pmd) +{ + softleaf_t arch_entry; + + if (pmd_present(pmd) || pmd_none(pmd)) + return softleaf_mk_none(); + + if (pmd_swp_soft_dirty(pmd)) + pmd = pmd_swp_clear_soft_dirty(pmd); + if (pmd_swp_uffd_wp(pmd)) + pmd = pmd_swp_clear_uffd_wp(pmd); + arch_entry = __pmd_to_swp_entry(pmd); + + /* Temporary until swp_entry_t eliminated. */ + return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); +} + +#else + +static inline softleaf_t softleaf_from_pmd(pmd_t pmd) +{ + return softleaf_mk_none(); +} + +#endif + +/** + * softleaf_is_none() - Is the leaf entry empty? + * @entry: Leaf entry. + * + * Empty entries are typically the result of a 'none' page table leaf entry + * being converted to a leaf entry. + * + * Returns: true if the entry is empty, false otherwise. + */ +static inline bool softleaf_is_none(softleaf_t entry) +{ + return entry.val == 0; +} + +/** + * softleaf_type() - Identify the type of leaf entry. + * @enntry: Leaf entry. + * + * Returns: the leaf entry type associated with @entry. + */ +static inline enum softleaf_type softleaf_type(softleaf_t entry) +{ + unsigned int type_num; + + if (softleaf_is_none(entry)) + return SOFTLEAF_NONE; + + type_num = entry.val >> LEAF_TYPE_SHIFT; + + if (type_num < MAX_SWAPFILES) + return SOFTLEAF_SWAP; + + switch (type_num) { +#ifdef CONFIG_MIGRATION + case SWP_MIGRATION_READ: + return SOFTLEAF_MIGRATION_READ; + case SWP_MIGRATION_READ_EXCLUSIVE: + return SOFTLEAF_MIGRATION_READ_EXCLUSIVE; + case SWP_MIGRATION_WRITE: + return SOFTLEAF_MIGRATION_WRITE; +#endif +#ifdef CONFIG_DEVICE_PRIVATE + case SWP_DEVICE_WRITE: + return SOFTLEAF_DEVICE_PRIVATE_WRITE; + case SWP_DEVICE_READ: + return SOFTLEAF_DEVICE_PRIVATE_READ; + case SWP_DEVICE_EXCLUSIVE: + return SOFTLEAF_DEVICE_EXCLUSIVE; +#endif +#ifdef CONFIG_MEMORY_FAILURE + case SWP_HWPOISON: + return SOFTLEAF_HWPOISON; +#endif + case SWP_PTE_MARKER: + return SOFTLEAF_MARKER; + } + + /* Unknown entry type. */ + VM_WARN_ON_ONCE(1); + return SOFTLEAF_NONE; +} + +/** + * softleaf_is_swap() - Is this leaf entry a swap entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a swap entry, otherwise false. + */ +static inline bool softleaf_is_swap(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_SWAP; +} + +/** + * softleaf_is_migration_write() - Is this leaf entry a writable migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a writable migration entry, otherwise + * false. + */ +static inline bool softleaf_is_migration_write(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MIGRATION_WRITE; +} + +/** + * softleaf_is_migration_read() - Is this leaf entry a readable migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a readable migration entry, otherwise + * false. + */ +static inline bool softleaf_is_migration_read(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ; +} + +/** + * softleaf_is_migration_read_exclusive() - Is this leaf entry an exclusive + * readable migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is an exclusive readable migration entry, + * otherwise false. + */ +static inline bool softleaf_is_migration_read_exclusive(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ_EXCLUSIVE; +} + +/** + * softleaf_is_migration() - Is this leaf entry a migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a migration entry, otherwise false. + */ +static inline bool softleaf_is_migration(softleaf_t entry) +{ + switch (softleaf_type(entry)) { + case SOFTLEAF_MIGRATION_READ: + case SOFTLEAF_MIGRATION_READ_EXCLUSIVE: + case SOFTLEAF_MIGRATION_WRITE: + return true; + default: + return false; + } +} + +/** + * softleaf_is_device_private_write() - Is this leaf entry a device private + * writable entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a device private writable entry, otherwise + * false. + */ +static inline bool softleaf_is_device_private_write(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_DEVICE_PRIVATE_WRITE; +} + +/** + * softleaf_is_device_private() - Is this leaf entry a device private entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a device private entry, otherwise false. + */ +static inline bool softleaf_is_device_private(softleaf_t entry) +{ + switch (softleaf_type(entry)) { + case SOFTLEAF_DEVICE_PRIVATE_WRITE: + case SOFTLEAF_DEVICE_PRIVATE_READ: + return true; + default: + return false; + } +} + +/** + * softleaf_is_device_exclusive() - Is this leaf entry a device-exclusive entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a device-exclusive entry, otherwise false. + */ +static inline bool softleaf_is_device_exclusive(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_DEVICE_EXCLUSIVE; +} + +/** + * softleaf_is_hwpoison() - Is this leaf entry a hardware poison entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a hardware poison entry, otherwise false. + */ +static inline bool softleaf_is_hwpoison(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_HWPOISON; +} + +/** + * softleaf_is_marker() - Is this leaf entry a marker? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a marker entry, otherwise false. + */ +static inline bool softleaf_is_marker(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MARKER; +} + +/** + * softleaf_to_marker() - Obtain marker associated with leaf entry. + * @entry: Leaf entry, softleaf_is_marker(@entry) must return true. + * + * Returns: Marker associated with the leaf entry. + */ +static inline pte_marker softleaf_to_marker(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_is_marker(entry)); + + return swp_offset(entry) & PTE_MARKER_MASK; +} + +/** + * softleaf_has_pfn() - Does this leaf entry encode a valid PFN number? + * @entry: Leaf entry. + * + * A pfn swap entry is a special type of swap entry that always has a pfn stored + * in the swap offset. They can either be used to represent unaddressable device + * memory, to restrict access to a page undergoing migration or to represent a + * pfn which has been hwpoisoned and unmapped. + * + * Returns: true if the leaf entry encodes a PFN, otherwise false. + */ +static inline bool softleaf_has_pfn(softleaf_t entry) +{ + /* Make sure the swp offset can always store the needed fields. */ + BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); + + if (softleaf_is_migration(entry)) + return true; + if (softleaf_is_device_private(entry)) + return true; + if (softleaf_is_device_exclusive(entry)) + return true; + if (softleaf_is_hwpoison(entry)) + return true; + + return false; +} + +/** + * softleaf_to_pfn() - Obtain PFN encoded within leaf entry. + * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. + * + * Returns: The PFN associated with the leaf entry. + */ +static inline unsigned long softleaf_to_pfn(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); + + /* Temporary until swp_entry_t eliminated. */ + return swp_offset(entry) & SWP_PFN_MASK; +} + +/** + * softleaf_to_page() - Obtains struct page for PFN encoded within leaf entry. + * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. + * + * Returns: Pointer to the struct page associated with the leaf entry's PFN. + */ +static inline struct page *softleaf_to_page(softleaf_t entry) +{ + struct page *page = pfn_to_page(softleaf_to_pfn(entry)); + + VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); + /* + * Any use of migration entries may only occur while the + * corresponding page is locked + */ + VM_WARN_ON_ONCE(softleaf_is_migration(entry) && !PageLocked(page)); + + return page; +} + +/** + * softleaf_to_folio() - Obtains struct folio for PFN encoded within leaf entry. + * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. + * + * Returns: Pointer to the struct folio associated with the leaf entry's PFN. + */ +static inline struct folio *softleaf_to_folio(softleaf_t entry) +{ + struct folio *folio = pfn_folio(softleaf_to_pfn(entry)); + + VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); + /* + * Any use of migration entries may only occur while the + * corresponding folio is locked. + */ + VM_WARN_ON_ONCE(softleaf_is_migration(entry) && + !folio_test_locked(folio)); + + return folio; +} + +/** + * softleaf_is_poison_marker() - Is this leaf entry a poison marker? + * @entry: Leaf entry. + * + * The poison marker is set via UFFDIO_POISON. Userfaultfd-specific. + * + * Returns: true if the leaf entry is a poison marker, otherwise false. + */ +static inline bool softleaf_is_poison_marker(softleaf_t entry) +{ + if (!softleaf_is_marker(entry)) + return false; + + return softleaf_to_marker(entry) & PTE_MARKER_POISONED; +} + +/** + * softleaf_is_guard_marker() - Is this leaf entry a guard region marker? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a guard marker, otherwise false. + */ +static inline bool softleaf_is_guard_marker(softleaf_t entry) +{ + if (!softleaf_is_marker(entry)) + return false; + + return softleaf_to_marker(entry) & PTE_MARKER_GUARD; +} + +/** + * softleaf_is_uffd_wp_marker() - Is this leaf entry a userfautlfd write protect + * marker? + * @entry: Leaf entry. + * + * Userfaultfd-specific. + * + * Returns: true if the leaf entry is a UFFD WP marker, otherwise false. + */ +static inline bool softleaf_is_uffd_wp_marker(softleaf_t entry) +{ + if (!softleaf_is_marker(entry)) + return false; + + return softleaf_to_marker(entry) & PTE_MARKER_UFFD_WP; +} + +#ifdef CONFIG_MIGRATION + +/** + * softleaf_is_migration_young() - Does this migration entry contain an accessed + * bit? + * @entry: Leaf entry. + * + * If the architecture can support storing A/D bits in migration entries, this + * determines whether the accessed (or 'young') bit was set on the migrated page + * table entry. + * + * Returns: true if the entry contains an accessed bit, otherwise false. + */ +static inline bool softleaf_is_migration_young(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_is_migration(entry)); + + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_YOUNG; + /* Keep the old behavior of aging page after migration */ + return false; +} + +/** + * softleaf_is_migration_dirty() - Does this migration entry contain a dirty bit? + * @entry: Leaf entry. + * + * If the architecture can support storing A/D bits in migration entries, this + * determines whether the dirty bit was set on the migrated page table entry. + * + * Returns: true if the entry contains a dirty bit, otherwise false. + */ +static inline bool softleaf_is_migration_dirty(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_is_migration(entry)); + + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_DIRTY; + /* Keep the old behavior of clean page after migration */ + return false; +} + +#else /* CONFIG_MIGRATION */ + +static inline bool softleaf_is_migration_young(softleaf_t entry) +{ + return false; +} + +static inline bool softleaf_is_migration_dirty(softleaf_t entry) +{ + return false; +} +#endif /* CONFIG_MIGRATION */ + +/** + * pte_is_marker() - Does the PTE entry encode a marker leaf entry? + * @pte: PTE entry. + * + * Returns: true if this PTE is a marker leaf entry, otherwise false. + */ +static inline bool pte_is_marker(pte_t pte) +{ + return softleaf_is_marker(softleaf_from_pte(pte)); +} + +/** + * pte_is_uffd_wp_marker() - Does this PTE entry encode a userfaultfd write + * protect marker leaf entry? + * @pte: PTE entry. + * + * Returns: true if this PTE is a UFFD WP marker leaf entry, otherwise false. + */ +static inline bool pte_is_uffd_wp_marker(pte_t pte) +{ + const softleaf_t entry = softleaf_from_pte(pte); + + return softleaf_is_uffd_wp_marker(entry); +} + +/** + * pte_is_uffd_marker() - Does this PTE entry encode a userfault-specific marker + * leaf entry? + * @entry: Leaf entry. + * + * It's useful to be able to determine which leaf entries encode UFFD-specific + * markers so we can handle these correctly. + * + * Returns: true if this PTE entry is a UFFD-specific marker, otherwise false. + */ +static inline bool pte_is_uffd_marker(pte_t pte) +{ + const softleaf_t entry = softleaf_from_pte(pte); + + if (!softleaf_is_marker(entry)) + return false; + + /* UFFD WP, poisoned swap entries are UFFD-handled. */ + if (softleaf_is_uffd_wp_marker(entry)) + return true; + if (softleaf_is_poison_marker(entry)) + return true; + + return false; +} + +#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) + +/** + * pmd_is_device_private_entry() - Check if PMD contains a device private swap + * entry. + * @pmd: The PMD to check. + * + * Returns true if the PMD contains a swap entry that represents a device private + * page mapping. This is used for zone device private pages that have been + * swapped out but still need special handling during various memory management + * operations. + * + * Return: true if PMD contains device private entry, false otherwise + */ +static inline bool pmd_is_device_private_entry(pmd_t pmd) +{ + return softleaf_is_device_private(softleaf_from_pmd(pmd)); +} + +#else /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ + +static inline bool pmd_is_device_private_entry(pmd_t pmd) +{ + return false; +} + +#endif /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ + +/** + * pmd_is_migration_entry() - Does this PMD entry encode a migration entry? + * @pmd: PMD entry. + * + * Returns: true if the PMD encodes a migration entry, otherwise false. + */ +static inline bool pmd_is_migration_entry(pmd_t pmd) +{ + return softleaf_is_migration(softleaf_from_pmd(pmd)); +} + +/** + * pmd_is_valid_softleaf() - Is this PMD entry a valid leaf entry? + * @pmd: PMD entry. + * + * PMD leaf entries are valid only if they are device private or migration + * entries. This function asserts that a PMD leaf entry is valid in this + * respect. + * + * Returns: true if the PMD entry is a valid leaf entry, otherwise false. + */ +static inline bool pmd_is_valid_softleaf(pmd_t pmd) +{ + const softleaf_t entry = softleaf_from_pmd(pmd); + + /* Only device private, migration entries valid for PMD. */ + return softleaf_is_device_private(entry) || + softleaf_is_migration(entry); +} + +#endif /* CONFIG_MMU */ +#endif /* _LINUX_LEAFOPS_H */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 7a98de1cc995..39534fafa36a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -75,6 +75,7 @@ enum ata_quirks { __ATA_QUIRK_NO_DMA_LOG, /* Do not use DMA for log read */ __ATA_QUIRK_NOTRIM, /* Do not use TRIM */ __ATA_QUIRK_MAX_SEC_1024, /* Limit max sects to 1024 */ + __ATA_QUIRK_MAX_SEC_8191, /* Limit max sects to 8191 */ __ATA_QUIRK_MAX_TRIM_128M, /* Limit max trim size to 128M */ __ATA_QUIRK_NO_NCQ_ON_ATI, /* Disable NCQ on ATI chipset */ __ATA_QUIRK_NO_LPM_ON_ATI, /* Disable LPM on ATI chipset */ @@ -85,6 +86,45 @@ enum ata_quirks { __ATA_QUIRK_MAX, }; +/* + * Quirk flags: may be set by libata or controller drivers on drives. + * Some quirks may be drive/controller pair dependent. + */ +enum { + ATA_QUIRK_DIAGNOSTIC = (1U << __ATA_QUIRK_DIAGNOSTIC), + ATA_QUIRK_NODMA = (1U << __ATA_QUIRK_NODMA), + ATA_QUIRK_NONCQ = (1U << __ATA_QUIRK_NONCQ), + ATA_QUIRK_MAX_SEC_128 = (1U << __ATA_QUIRK_MAX_SEC_128), + ATA_QUIRK_BROKEN_HPA = (1U << __ATA_QUIRK_BROKEN_HPA), + ATA_QUIRK_DISABLE = (1U << __ATA_QUIRK_DISABLE), + ATA_QUIRK_HPA_SIZE = (1U << __ATA_QUIRK_HPA_SIZE), + ATA_QUIRK_IVB = (1U << __ATA_QUIRK_IVB), + ATA_QUIRK_STUCK_ERR = (1U << __ATA_QUIRK_STUCK_ERR), + ATA_QUIRK_BRIDGE_OK = (1U << __ATA_QUIRK_BRIDGE_OK), + ATA_QUIRK_ATAPI_MOD16_DMA = (1U << __ATA_QUIRK_ATAPI_MOD16_DMA), + ATA_QUIRK_FIRMWARE_WARN = (1U << __ATA_QUIRK_FIRMWARE_WARN), + ATA_QUIRK_1_5_GBPS = (1U << __ATA_QUIRK_1_5_GBPS), + ATA_QUIRK_NOSETXFER = (1U << __ATA_QUIRK_NOSETXFER), + ATA_QUIRK_BROKEN_FPDMA_AA = (1U << __ATA_QUIRK_BROKEN_FPDMA_AA), + ATA_QUIRK_DUMP_ID = (1U << __ATA_QUIRK_DUMP_ID), + ATA_QUIRK_MAX_SEC_LBA48 = (1U << __ATA_QUIRK_MAX_SEC_LBA48), + ATA_QUIRK_ATAPI_DMADIR = (1U << __ATA_QUIRK_ATAPI_DMADIR), + ATA_QUIRK_NO_NCQ_TRIM = (1U << __ATA_QUIRK_NO_NCQ_TRIM), + ATA_QUIRK_NOLPM = (1U << __ATA_QUIRK_NOLPM), + ATA_QUIRK_WD_BROKEN_LPM = (1U << __ATA_QUIRK_WD_BROKEN_LPM), + ATA_QUIRK_ZERO_AFTER_TRIM = (1U << __ATA_QUIRK_ZERO_AFTER_TRIM), + ATA_QUIRK_NO_DMA_LOG = (1U << __ATA_QUIRK_NO_DMA_LOG), + ATA_QUIRK_NOTRIM = (1U << __ATA_QUIRK_NOTRIM), + ATA_QUIRK_MAX_SEC_1024 = (1U << __ATA_QUIRK_MAX_SEC_1024), + ATA_QUIRK_MAX_SEC_8191 = (1U << __ATA_QUIRK_MAX_SEC_8191), + ATA_QUIRK_MAX_TRIM_128M = (1U << __ATA_QUIRK_MAX_TRIM_128M), + ATA_QUIRK_NO_NCQ_ON_ATI = (1U << __ATA_QUIRK_NO_NCQ_ON_ATI), + ATA_QUIRK_NO_LPM_ON_ATI = (1U << __ATA_QUIRK_NO_LPM_ON_ATI), + ATA_QUIRK_NO_ID_DEV_LOG = (1U << __ATA_QUIRK_NO_ID_DEV_LOG), + ATA_QUIRK_NO_LOG_DIR = (1U << __ATA_QUIRK_NO_LOG_DIR), + ATA_QUIRK_NO_FUA = (1U << __ATA_QUIRK_NO_FUA), +}; + enum { /* various global constants */ LIBATA_MAX_PRD = ATA_MAX_PRD / 2, @@ -390,42 +430,6 @@ enum { */ ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8, - /* - * Quirk flags: may be set by libata or controller drivers on drives. - * Some quirks may be drive/controller pair dependent. - */ - ATA_QUIRK_DIAGNOSTIC = (1U << __ATA_QUIRK_DIAGNOSTIC), - ATA_QUIRK_NODMA = (1U << __ATA_QUIRK_NODMA), - ATA_QUIRK_NONCQ = (1U << __ATA_QUIRK_NONCQ), - ATA_QUIRK_MAX_SEC_128 = (1U << __ATA_QUIRK_MAX_SEC_128), - ATA_QUIRK_BROKEN_HPA = (1U << __ATA_QUIRK_BROKEN_HPA), - ATA_QUIRK_DISABLE = (1U << __ATA_QUIRK_DISABLE), - ATA_QUIRK_HPA_SIZE = (1U << __ATA_QUIRK_HPA_SIZE), - ATA_QUIRK_IVB = (1U << __ATA_QUIRK_IVB), - ATA_QUIRK_STUCK_ERR = (1U << __ATA_QUIRK_STUCK_ERR), - ATA_QUIRK_BRIDGE_OK = (1U << __ATA_QUIRK_BRIDGE_OK), - ATA_QUIRK_ATAPI_MOD16_DMA = (1U << __ATA_QUIRK_ATAPI_MOD16_DMA), - ATA_QUIRK_FIRMWARE_WARN = (1U << __ATA_QUIRK_FIRMWARE_WARN), - ATA_QUIRK_1_5_GBPS = (1U << __ATA_QUIRK_1_5_GBPS), - ATA_QUIRK_NOSETXFER = (1U << __ATA_QUIRK_NOSETXFER), - ATA_QUIRK_BROKEN_FPDMA_AA = (1U << __ATA_QUIRK_BROKEN_FPDMA_AA), - ATA_QUIRK_DUMP_ID = (1U << __ATA_QUIRK_DUMP_ID), - ATA_QUIRK_MAX_SEC_LBA48 = (1U << __ATA_QUIRK_MAX_SEC_LBA48), - ATA_QUIRK_ATAPI_DMADIR = (1U << __ATA_QUIRK_ATAPI_DMADIR), - ATA_QUIRK_NO_NCQ_TRIM = (1U << __ATA_QUIRK_NO_NCQ_TRIM), - ATA_QUIRK_NOLPM = (1U << __ATA_QUIRK_NOLPM), - ATA_QUIRK_WD_BROKEN_LPM = (1U << __ATA_QUIRK_WD_BROKEN_LPM), - ATA_QUIRK_ZERO_AFTER_TRIM = (1U << __ATA_QUIRK_ZERO_AFTER_TRIM), - ATA_QUIRK_NO_DMA_LOG = (1U << __ATA_QUIRK_NO_DMA_LOG), - ATA_QUIRK_NOTRIM = (1U << __ATA_QUIRK_NOTRIM), - ATA_QUIRK_MAX_SEC_1024 = (1U << __ATA_QUIRK_MAX_SEC_1024), - ATA_QUIRK_MAX_TRIM_128M = (1U << __ATA_QUIRK_MAX_TRIM_128M), - ATA_QUIRK_NO_NCQ_ON_ATI = (1U << __ATA_QUIRK_NO_NCQ_ON_ATI), - ATA_QUIRK_NO_LPM_ON_ATI = (1U << __ATA_QUIRK_NO_LPM_ON_ATI), - ATA_QUIRK_NO_ID_DEV_LOG = (1U << __ATA_QUIRK_NO_ID_DEV_LOG), - ATA_QUIRK_NO_LOG_DIR = (1U << __ATA_QUIRK_NO_LOG_DIR), - ATA_QUIRK_NO_FUA = (1U << __ATA_QUIRK_NO_FUA), - /* User visible DMA mask for DMA control. DO NOT renumber. */ ATA_DMA_MASK_ATA = (1 << 0), /* DMA on ATA Disk */ ATA_DMA_MASK_ATAPI = (1 << 1), /* DMA on ATAPI */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 873e510d6f8d..0651865a4564 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -52,6 +52,7 @@ enum memcg_memory_event { MEMCG_SWAP_HIGH, MEMCG_SWAP_MAX, MEMCG_SWAP_FAIL, + MEMCG_SOCK_THROTTLED, MEMCG_NR_MEMORY_EVENTS, }; @@ -956,17 +957,7 @@ unsigned long lruvec_page_state_local(struct lruvec *lruvec, void mem_cgroup_flush_stats(struct mem_cgroup *memcg); void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg); -void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); - -static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, - int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_lruvec_kmem_state(p, idx, val); - local_irq_restore(flags); -} +void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count); @@ -1001,36 +992,8 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, count_memcg_events_mm(mm, idx, 1); } -static inline void __memcg_memory_event(struct mem_cgroup *memcg, - enum memcg_memory_event event, - bool allow_spinning) -{ - bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || - event == MEMCG_SWAP_FAIL; - - /* For now only MEMCG_MAX can happen with !allow_spinning context. */ - VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX); - - atomic_long_inc(&memcg->memory_events_local[event]); - if (!swap_event && allow_spinning) - cgroup_file_notify(&memcg->events_local_file); - - do { - atomic_long_inc(&memcg->memory_events[event]); - if (allow_spinning) { - if (swap_event) - cgroup_file_notify(&memcg->swap_events_file); - else - cgroup_file_notify(&memcg->events_file); - } - - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) - break; - if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) - break; - } while ((memcg = parent_mem_cgroup(memcg)) && - !mem_cgroup_is_root(memcg)); -} +void __memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event, bool allow_spinning); static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) @@ -1430,14 +1393,6 @@ static inline void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg) { } -static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, - int val) -{ - struct page *page = virt_to_head_page(p); - - __mod_node_page_state(page_pgdat(page), idx, val); -} - static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { @@ -1497,16 +1452,6 @@ struct slabobj_ext { #endif } __aligned(8); -static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) -{ - __mod_lruvec_kmem_state(p, idx, 1); -} - -static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx) -{ - __mod_lruvec_kmem_state(p, idx, -1); -} - static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) { struct mem_cgroup *memcg; @@ -1674,6 +1619,11 @@ int alloc_shrinker_info(struct mem_cgroup *memcg); void free_shrinker_info(struct mem_cgroup *memcg); void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); void reparent_shrinker_deferred(struct mem_cgroup *memcg); + +static inline int shrinker_id(struct shrinker *shrinker) +{ + return shrinker->id; +} #else #define mem_cgroup_sockets_enabled 0 @@ -1705,6 +1655,11 @@ static inline void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) { } + +static inline int shrinker_id(struct shrinker *shrinker) +{ + return -1; +} #endif #ifdef CONFIG_MEMCG @@ -1791,6 +1746,13 @@ static inline void count_objcg_events(struct obj_cgroup *objcg, bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid); +void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg); + +static inline bool memcg_is_dying(struct mem_cgroup *memcg) +{ + return memcg ? css_is_dying(&memcg->css) : false; +} + #else static inline bool mem_cgroup_kmem_disabled(void) { @@ -1857,6 +1819,15 @@ static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid) { return true; } + +static inline void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg) +{ +} + +static inline bool memcg_is_dying(struct mem_cgroup *memcg) +{ + return false; +} #endif /* CONFIG_MEMCG */ #if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP) diff --git a/include/linux/memory-failure.h b/include/linux/memory-failure.h new file mode 100644 index 000000000000..bc326503d2d2 --- /dev/null +++ b/include/linux/memory-failure.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MEMORY_FAILURE_H +#define _LINUX_MEMORY_FAILURE_H + +#include <linux/interval_tree.h> + +struct pfn_address_space; + +struct pfn_address_space { + struct interval_tree_node node; + struct address_space *mapping; +}; + +int register_pfn_address_space(struct pfn_address_space *pfn_space); +void unregister_pfn_address_space(struct pfn_address_space *pfn_space); + +#endif /* _LINUX_MEMORY_FAILURE_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index ba1515160894..faeaa921e55b 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -64,9 +64,19 @@ struct memory_group { }; }; +enum memory_block_state { + /* These states are exposed to userspace as text strings in sysfs */ + MEM_ONLINE, /* exposed to userspace */ + MEM_GOING_OFFLINE, /* exposed to userspace */ + MEM_OFFLINE, /* exposed to userspace */ + MEM_GOING_ONLINE, + MEM_CANCEL_ONLINE, + MEM_CANCEL_OFFLINE, +}; + struct memory_block { unsigned long start_section_nr; - unsigned long state; /* serialized by the dev->lock */ + enum memory_block_state state; /* serialized by the dev->lock */ int online_type; /* for passing data to online routine */ int nid; /* NID for this memory block */ /* @@ -89,14 +99,6 @@ int arch_get_memory_phys_device(unsigned long start_pfn); unsigned long memory_block_size_bytes(void); int set_memory_block_size_order(unsigned int order); -/* These states are exposed to userspace as text strings in sysfs */ -#define MEM_ONLINE (1<<0) /* exposed to userspace */ -#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ -#define MEM_OFFLINE (1<<2) /* exposed to userspace */ -#define MEM_GOING_ONLINE (1<<3) -#define MEM_CANCEL_ONLINE (1<<4) -#define MEM_CANCEL_OFFLINE (1<<5) - struct memory_notify { unsigned long start_pfn; unsigned long nr_pages; @@ -130,7 +132,7 @@ static inline int register_memory_notifier(struct notifier_block *nb) static inline void unregister_memory_notifier(struct notifier_block *nb) { } -static inline int memory_notify(unsigned long val, void *v) +static inline int memory_notify(enum memory_block_state state, void *v) { return 0; } @@ -154,7 +156,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size, struct memory_group *group); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); -extern int memory_notify(unsigned long val, void *v); +extern int memory_notify(enum memory_block_state state, void *v); extern struct memory_block *find_memory_block(unsigned long section_nr); typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); extern int walk_memory_blocks(unsigned long start, unsigned long size, diff --git a/include/linux/memregion.h b/include/linux/memregion.h index c01321467789..a55f62cc5266 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -26,8 +26,10 @@ static inline void memregion_free(int id) /** * cpu_cache_invalidate_memregion - drop any CPU cached data for - * memregions described by @res_desc - * @res_desc: one of the IORES_DESC_* types + * memregion + * @start: start physical address of the target memory region. + * @len: length of the target memory region. -1 for all the regions of + * the target type. * * Perform cache maintenance after a memory event / operation that * changes the contents of physical memory in a cache-incoherent manner. @@ -46,7 +48,7 @@ static inline void memregion_free(int id) * the cache maintenance. */ #ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION -int cpu_cache_invalidate_memregion(int res_desc); +int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len); bool cpu_cache_has_invalidate_memregion(void); #else static inline bool cpu_cache_has_invalidate_memregion(void) @@ -54,10 +56,16 @@ static inline bool cpu_cache_has_invalidate_memregion(void) return false; } -static inline int cpu_cache_invalidate_memregion(int res_desc) +static inline int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len) { WARN_ON_ONCE("CPU cache invalidation required"); return -ENXIO; } #endif + +static inline int cpu_cache_invalidate_all(void) +{ + return cpu_cache_invalidate_memregion(0, -1); +} + #endif /* _MEMREGION_H_ */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 30c7aecbd245..713ec0435b48 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -76,11 +76,11 @@ enum memory_type { struct dev_pagemap_ops { /* - * Called once the page refcount reaches 0. The reference count will be + * Called once the folio refcount reaches 0. The reference count will be * reset to one by the core code after the method is called to prepare - * for handing out the page again. + * for handing out the folio again. */ - void (*page_free)(struct page *page); + void (*folio_free)(struct folio *folio); /* * Used for private (un-addressable) device memory only. Must migrate @@ -99,6 +99,13 @@ struct dev_pagemap_ops { */ int (*memory_failure)(struct dev_pagemap *pgmap, unsigned long pfn, unsigned long nr_pages, int mf_flags); + + /* + * Used for private (un-addressable) device memory only. + * This callback is used when a folio is split into + * a smaller folio + */ + void (*folio_split)(struct folio *head, struct folio *tail); }; #define PGMAP_ALTMAP_VALID (1 << 0) @@ -176,6 +183,18 @@ static inline bool folio_is_pci_p2pdma(const struct folio *folio) folio->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; } +static inline void *folio_zone_device_data(const struct folio *folio) +{ + VM_WARN_ON_FOLIO(!folio_is_device_private(folio), folio); + return folio->page.zone_device_data; +} + +static inline void folio_set_zone_device_data(struct folio *folio, void *data) +{ + VM_WARN_ON_FOLIO(!folio_is_device_private(folio), folio); + folio->page.zone_device_data = data; +} + static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && @@ -205,7 +224,7 @@ static inline bool is_fsdax_page(const struct page *page) } #ifdef CONFIG_ZONE_DEVICE -void zone_device_page_init(struct page *page); +void zone_device_page_init(struct page *page, unsigned int order); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); @@ -214,6 +233,31 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long memremap_compat_align(void); + +static inline void zone_device_folio_init(struct folio *folio, unsigned int order) +{ + zone_device_page_init(&folio->page, order); + if (order) + folio_set_large_rmappable(folio); +} + +static inline void zone_device_private_split_cb(struct folio *original_folio, + struct folio *new_folio) +{ + if (folio_is_device_private(original_folio)) { + if (!original_folio->pgmap->ops->folio_split) { + if (new_folio) { + new_folio->pgmap = original_folio->pgmap; + new_folio->page.mapping = + original_folio->page.mapping; + } + } else { + original_folio->pgmap->ops->folio_split(original_folio, + new_folio); + } + } +} + #else static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) @@ -247,6 +291,11 @@ static inline unsigned long memremap_compat_align(void) { return PAGE_SIZE; } + +static inline void zone_device_private_split_cb(struct folio *original_folio, + struct folio *new_folio) +{ +} #endif /* CONFIG_ZONE_DEVICE */ static inline void put_dev_pagemap(struct dev_pagemap *pgmap) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 1f0ac122c3bf..26ca00c325d9 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) +void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, @@ -125,6 +125,7 @@ static inline int migrate_misplaced_folio(struct folio *folio, int node) #define MIGRATE_PFN_VALID (1UL << 0) #define MIGRATE_PFN_MIGRATE (1UL << 1) #define MIGRATE_PFN_WRITE (1UL << 3) +#define MIGRATE_PFN_COMPOUND (1UL << 4) #define MIGRATE_PFN_SHIFT 6 static inline struct page *migrate_pfn_to_page(unsigned long mpfn) @@ -143,6 +144,7 @@ enum migrate_vma_direction { MIGRATE_VMA_SELECT_SYSTEM = 1 << 0, MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1, MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2, + MIGRATE_VMA_SELECT_COMPOUND = 1 << 3, }; struct migrate_vma { diff --git a/include/linux/mm.h b/include/linux/mm.h index 8dc0a07570cc..7a1819c20643 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -105,6 +105,8 @@ extern int mmap_rnd_compat_bits __read_mostly; # endif #endif +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) + #include <asm/page.h> #include <asm/processor.h> @@ -273,178 +275,235 @@ extern unsigned int kobjsize(const void *objp); * vm_flags in vm_area_struct, see mm_types.h. * When changing, update also include/trace/events/mmflags.h */ -#define VM_NONE 0x00000000 -#define VM_READ 0x00000001 /* currently active flags */ -#define VM_WRITE 0x00000002 -#define VM_EXEC 0x00000004 -#define VM_SHARED 0x00000008 +#define VM_NONE 0x00000000 -/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ -#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ -#define VM_MAYWRITE 0x00000020 -#define VM_MAYEXEC 0x00000040 -#define VM_MAYSHARE 0x00000080 +/** + * typedef vma_flag_t - specifies an individual VMA flag by bit number. + * + * This value is made type safe by sparse to avoid passing invalid flag values + * around. + */ +typedef int __bitwise vma_flag_t; -#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ +#define DECLARE_VMA_BIT(name, bitnum) \ + VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum) +#define DECLARE_VMA_BIT_ALIAS(name, aliased) \ + VMA_ ## name ## _BIT = (VMA_ ## aliased ## _BIT) +enum { + DECLARE_VMA_BIT(READ, 0), + DECLARE_VMA_BIT(WRITE, 1), + DECLARE_VMA_BIT(EXEC, 2), + DECLARE_VMA_BIT(SHARED, 3), + /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ + DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */ + DECLARE_VMA_BIT(MAYWRITE, 5), + DECLARE_VMA_BIT(MAYEXEC, 6), + DECLARE_VMA_BIT(MAYSHARE, 7), + DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */ #ifdef CONFIG_MMU -#define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */ -#else /* CONFIG_MMU */ -#define VM_MAYOVERLAY 0x00000200 /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ -#define VM_UFFD_MISSING 0 + DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */ +#else + /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ + DECLARE_VMA_BIT(MAYOVERLAY, 9), #endif /* CONFIG_MMU */ -#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ -#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */ - -#define VM_LOCKED 0x00002000 -#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ - - /* Used by sys_madvise() */ -#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ -#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ - -#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ -#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ -#define VM_LOCKONFAULT 0x00080000 /* Lock the pages covered when they are faulted in */ -#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ -#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ -#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ -#define VM_SYNC 0x00800000 /* Synchronous page faults */ -#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ -#define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */ -#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ - + /* Page-ranges managed without "struct page", just pure PFN */ + DECLARE_VMA_BIT(PFNMAP, 10), + DECLARE_VMA_BIT(MAYBE_GUARD, 11), + DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */ + DECLARE_VMA_BIT(LOCKED, 13), + DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */ + DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */ + DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */ + DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */ + DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */ + DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */ + DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */ + DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */ + DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */ + DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */ + DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */ + DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */ + DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */ + DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */ + DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */ + DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */ + DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */ + DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */ + /* These bits are reused, we define specific uses below. */ + DECLARE_VMA_BIT(HIGH_ARCH_0, 32), + DECLARE_VMA_BIT(HIGH_ARCH_1, 33), + DECLARE_VMA_BIT(HIGH_ARCH_2, 34), + DECLARE_VMA_BIT(HIGH_ARCH_3, 35), + DECLARE_VMA_BIT(HIGH_ARCH_4, 36), + DECLARE_VMA_BIT(HIGH_ARCH_5, 37), + DECLARE_VMA_BIT(HIGH_ARCH_6, 38), + /* + * This flag is used to connect VFIO to arch specific KVM code. It + * indicates that the memory under this VMA is safe for use with any + * non-cachable memory type inside KVM. Some VFIO devices, on some + * platforms, are thought to be unsafe and can cause machine crashes + * if KVM does not lock down the memory type. + */ + DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39), +#ifdef CONFIG_PPC32 + DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1), +#else + DECLARE_VMA_BIT(DROPPABLE, 40), +#endif + DECLARE_VMA_BIT(UFFD_MINOR, 41), + DECLARE_VMA_BIT(SEALED, 42), + /* Flags that reuse flags above. */ + DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0), + DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1), + DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2), + DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3), + DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4), +#if defined(CONFIG_X86_USER_SHADOW_STACK) + /* + * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of + * support core mm. + * + * These VMAs will get a single end guard page. This helps userspace + * protect itself from attacks. A single page is enough for current + * shadow stack archs (x86). See the comments near alloc_shstk() in + * arch/x86/kernel/shstk.c for more details on the guard size. + */ + DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5), +#elif defined(CONFIG_ARM64_GCS) + /* + * arm64's Guarded Control Stack implements similar functionality and + * has similar constraints to shadow stacks. + */ + DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6), +#endif + DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */ + DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */ + DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */ + DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */ + DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */ + DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */ + DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */ + DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */ +#ifdef CONFIG_STACK_GROWSUP + DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP), + DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN), +#else + DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN), +#endif +}; +#undef DECLARE_VMA_BIT +#undef DECLARE_VMA_BIT_ALIAS + +#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT) +#define VM_READ INIT_VM_FLAG(READ) +#define VM_WRITE INIT_VM_FLAG(WRITE) +#define VM_EXEC INIT_VM_FLAG(EXEC) +#define VM_SHARED INIT_VM_FLAG(SHARED) +#define VM_MAYREAD INIT_VM_FLAG(MAYREAD) +#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE) +#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC) +#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE) +#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN) +#ifdef CONFIG_MMU +#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING) +#else +#define VM_UFFD_MISSING VM_NONE +#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY) +#endif +#define VM_PFNMAP INIT_VM_FLAG(PFNMAP) +#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD) +#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP) +#define VM_LOCKED INIT_VM_FLAG(LOCKED) +#define VM_IO INIT_VM_FLAG(IO) +#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ) +#define VM_RAND_READ INIT_VM_FLAG(RAND_READ) +#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY) +#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND) +#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT) +#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT) +#define VM_NORESERVE INIT_VM_FLAG(NORESERVE) +#define VM_HUGETLB INIT_VM_FLAG(HUGETLB) +#define VM_SYNC INIT_VM_FLAG(SYNC) +#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1) +#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK) +#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP) #ifdef CONFIG_MEM_SOFT_DIRTY -# define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */ +#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY) #else -# define VM_SOFTDIRTY 0 +#define VM_SOFTDIRTY VM_NONE +#endif +#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP) +#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE) +#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE) +#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE) +#define VM_STACK INIT_VM_FLAG(STACK) +#ifdef CONFIG_STACK_GROWS_UP +#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY) +#else +#define VM_STACK_EARLY VM_NONE #endif - -#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ -#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ -#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ -#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */ - -#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS -#define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_1 33 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) -#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) -#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) -#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) -#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) -#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) -#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6) -#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ - #ifdef CONFIG_ARCH_HAS_PKEYS -# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 -# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 -# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 -# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 +#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT) +/* Despite the naming, these are FLAGS not bits. */ +#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0) +#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1) +#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2) #if CONFIG_ARCH_PKEY_BITS > 3 -# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 +#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3) #else -# define VM_PKEY_BIT3 0 -#endif +#define VM_PKEY_BIT3 VM_NONE +#endif /* CONFIG_ARCH_PKEY_BITS > 3 */ #if CONFIG_ARCH_PKEY_BITS > 4 -# define VM_PKEY_BIT4 VM_HIGH_ARCH_4 +#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4) #else -# define VM_PKEY_BIT4 0 -#endif +#define VM_PKEY_BIT4 VM_NONE +#endif /* CONFIG_ARCH_PKEY_BITS > 4 */ #endif /* CONFIG_ARCH_HAS_PKEYS */ - -#ifdef CONFIG_X86_USER_SHADOW_STACK -/* - * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of - * support core mm. - * - * These VMAs will get a single end guard page. This helps userspace protect - * itself from attacks. A single page is enough for current shadow stack archs - * (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c - * for more details on the guard size. - */ -# define VM_SHADOW_STACK VM_HIGH_ARCH_5 -#endif - -#if defined(CONFIG_ARM64_GCS) -/* - * arm64's Guarded Control Stack implements similar functionality and - * has similar constraints to shadow stacks. - */ -# define VM_SHADOW_STACK VM_HIGH_ARCH_6 -#endif - -#ifndef VM_SHADOW_STACK -# define VM_SHADOW_STACK VM_NONE +#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) +#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK) +#else +#define VM_SHADOW_STACK VM_NONE #endif - #if defined(CONFIG_PPC64) -# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ +#define VM_SAO INIT_VM_FLAG(SAO) #elif defined(CONFIG_PARISC) -# define VM_GROWSUP VM_ARCH_1 +#define VM_GROWSUP INIT_VM_FLAG(GROWSUP) #elif defined(CONFIG_SPARC64) -# define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ -# define VM_ARCH_CLEAR VM_SPARC_ADI +#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI) +#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) #elif defined(CONFIG_ARM64) -# define VM_ARM64_BTI VM_ARCH_1 /* BTI guarded page, a.k.a. GP bit */ -# define VM_ARCH_CLEAR VM_ARM64_BTI +#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI) +#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) #elif !defined(CONFIG_MMU) -# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ -#endif - -#if defined(CONFIG_ARM64_MTE) -# define VM_MTE VM_HIGH_ARCH_4 /* Use Tagged memory for access control */ -# define VM_MTE_ALLOWED VM_HIGH_ARCH_5 /* Tagged memory permitted */ -#else -# define VM_MTE VM_NONE -# define VM_MTE_ALLOWED VM_NONE +#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY) #endif - #ifndef VM_GROWSUP -# define VM_GROWSUP VM_NONE +#define VM_GROWSUP VM_NONE +#endif +#ifdef CONFIG_ARM64_MTE +#define VM_MTE INIT_VM_FLAG(MTE) +#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED) +#else +#define VM_MTE VM_NONE +#define VM_MTE_ALLOWED VM_NONE #endif - #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 41 -# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ -#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ -# define VM_UFFD_MINOR VM_NONE -#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ - -/* - * This flag is used to connect VFIO to arch specific KVM code. It - * indicates that the memory under this VMA is safe for use with any - * non-cachable memory type inside KVM. Some VFIO devices, on some - * platforms, are thought to be unsafe and can cause machine crashes - * if KVM does not lock down the memory type. - */ -#ifdef CONFIG_64BIT -#define VM_ALLOW_ANY_UNCACHED_BIT 39 -#define VM_ALLOW_ANY_UNCACHED BIT(VM_ALLOW_ANY_UNCACHED_BIT) +#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR) #else -#define VM_ALLOW_ANY_UNCACHED VM_NONE +#define VM_UFFD_MINOR VM_NONE #endif - #ifdef CONFIG_64BIT -#define VM_DROPPABLE_BIT 40 -#define VM_DROPPABLE BIT(VM_DROPPABLE_BIT) -#elif defined(CONFIG_PPC32) -#define VM_DROPPABLE VM_ARCH_1 +#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED) +#define VM_SEALED INIT_VM_FLAG(SEALED) #else -#define VM_DROPPABLE VM_NONE +#define VM_ALLOW_ANY_UNCACHED VM_NONE +#define VM_SEALED VM_NONE #endif - -#ifdef CONFIG_64BIT -#define VM_SEALED_BIT 42 -#define VM_SEALED BIT(VM_SEALED_BIT) +#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) +#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE) #else -#define VM_SEALED VM_NONE +#define VM_DROPPABLE VM_NONE #endif /* Bits set in the VMA until the stack is in its final location */ @@ -470,12 +529,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) -#ifdef CONFIG_STACK_GROWSUP -#define VM_STACK VM_GROWSUP -#define VM_STACK_EARLY VM_GROWSDOWN +#ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS +#define VM_SEALED_SYSMAP VM_SEALED #else -#define VM_STACK VM_GROWSDOWN -#define VM_STACK_EARLY 0 +#define VM_SEALED_SYSMAP VM_NONE #endif #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) @@ -483,12 +540,26 @@ extern unsigned int kobjsize(const void *objp); /* VMA basic access permission flags */ #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) - /* * Special vmas that are non-mergable, non-mlock()able. */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +/* + * Physically remapped pages are special. Tell the + * rest of the world about it: + * VM_IO tells people not to look at these pages + * (accesses can have side effects). + * VM_PFNMAP tells the core MM that the base pages are just + * raw PFN mappings, and do not have a "struct page" associated + * with them. + * VM_DONTEXPAND + * Disable vma merging and expanding with mremap(). + * VM_DONTDUMP + * Omit vma from core dump, even when VM_IO turned off. + */ +#define VM_REMAP_FLAGS (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) + /* This mask prevents VMA from being scanned with khugepaged */ #define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB) @@ -498,13 +569,69 @@ extern unsigned int kobjsize(const void *objp); /* This mask represents all the VMA flag bits used by mlock */ #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) +/* These flags can be updated atomically via VMA/mmap read lock. */ +#define VM_ATOMIC_SET_ALLOWED VM_MAYBE_GUARD + /* Arch-specific flags to clear when updating VM flags on protection change */ #ifndef VM_ARCH_CLEAR -# define VM_ARCH_CLEAR VM_NONE +#define VM_ARCH_CLEAR VM_NONE #endif #define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR) /* + * Flags which should be 'sticky' on merge - that is, flags which, when one VMA + * possesses it but the other does not, the merged VMA should nonetheless have + * applied to it: + * + * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its + * references cleared via /proc/$pid/clear_refs, any merged VMA + * should be considered soft-dirty also as it operates at a VMA + * granularity. + * + * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that + * mapped page tables may contain metadata not described by the + * VMA and thus any merged VMA may also contain this metadata, + * and thus we must make this flag sticky. + */ +#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD) + +/* + * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one + * of these flags and the other not does not preclude a merge. + * + * VM_STICKY - When merging VMAs, VMA flags must match, unless they are + * 'sticky'. If any sticky flags exist in either VMA, we simply + * set all of them on the merged VMA. + */ +#define VM_IGNORE_MERGE VM_STICKY + +/* + * Flags which should result in page tables being copied on fork. These are + * flags which indicate that the VMA maps page tables which cannot be + * reconsistuted upon page fault, so necessitate page table copying upon + * + * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be + * reasonably reconstructed on page fault. + * + * VM_UFFD_WP - Encodes metadata about an installed uffd + * write protect handler, which cannot be + * reconstructed on page fault. + * + * We always copy pgtables when dst_vma has uffd-wp + * enabled even if it's file-backed + * (e.g. shmem). Because when uffd-wp is enabled, + * pgtable contains uffd-wp protection information, + * that's something we can't retrieve from page cache, + * and skip copying will lose those info. + * + * VM_MAYBE_GUARD - Could contain page guard region markers which + * by design are a property of the page tables + * only and thus cannot be reconstructed on page + * fault. + */ +#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD) + +/* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. */ @@ -783,7 +910,9 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) static inline void vm_flags_init(struct vm_area_struct *vma, vm_flags_t flags) { - ACCESS_PRIVATE(vma, __vm_flags) = flags; + VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY)); + vma_flags_clear_all(&vma->flags); + vma_flags_overwrite_word(&vma->flags, flags); } /* @@ -794,6 +923,7 @@ static inline void vm_flags_init(struct vm_area_struct *vma, static inline void vm_flags_reset(struct vm_area_struct *vma, vm_flags_t flags) { + VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY)); vma_assert_write_locked(vma); vm_flags_init(vma, flags); } @@ -802,21 +932,33 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma, vm_flags_t flags) { vma_assert_write_locked(vma); - WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); + /* + * If VMA flags exist beyond the first system word, also clear these. It + * is assumed the write once behaviour is required only for the first + * system word. + */ + if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) { + unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags); + + bitmap_zero(&bitmap[1], NUM_VMA_FLAG_BITS - BITS_PER_LONG); + } + + vma_flags_overwrite_word_once(&vma->flags, flags); } static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) { vma_start_write(vma); - ACCESS_PRIVATE(vma, __vm_flags) |= flags; + vma_flags_set_word(&vma->flags, flags); } static inline void vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags) { + VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY)); vma_start_write(vma); - ACCESS_PRIVATE(vma, __vm_flags) &= ~flags; + vma_flags_clear_word(&vma->flags, flags); } /* @@ -840,6 +982,51 @@ static inline void vm_flags_mod(struct vm_area_struct *vma, __vm_flags_mod(vma, set, clear); } +static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma, + vma_flag_t bit) +{ + const vm_flags_t mask = BIT((__force int)bit); + + /* Only specific flags are permitted */ + if (WARN_ON_ONCE(!(mask & VM_ATOMIC_SET_ALLOWED))) + return false; + + return true; +} + +/* + * Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific + * valid flags are allowed to do this. + */ +static inline void vma_flag_set_atomic(struct vm_area_struct *vma, + vma_flag_t bit) +{ + unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags); + + /* mmap read lock/VMA read lock must be held. */ + if (!rwsem_is_locked(&vma->vm_mm->mmap_lock)) + vma_assert_locked(vma); + + if (__vma_flag_atomic_valid(vma, bit)) + set_bit((__force int)bit, bitmap); +} + +/* + * Test for VMA flag atomically. Requires no locks. Only specific valid flags + * are allowed to do this. + * + * This is necessarily racey, so callers must ensure that serialisation is + * achieved through some other means, or that races are permissible. + */ +static inline bool vma_flag_test_atomic(struct vm_area_struct *vma, + vma_flag_t bit) +{ + if (__vma_flag_atomic_valid(vma, bit)) + return test_bit((__force int)bit, &vma->vm_flags); + + return false; +} + static inline void vma_set_anonymous(struct vm_area_struct *vma) { vma->vm_ops = NULL; @@ -2438,7 +2625,7 @@ static inline void zap_vma_pages(struct vm_area_struct *vma) } void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *start_vma, unsigned long start, - unsigned long end, unsigned long tree_end, bool mm_wr_locked); + unsigned long end, unsigned long tree_end); struct mmu_notifier_range; @@ -2922,6 +3109,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #endif /* CONFIG_MMU */ enum pt_flags { + PT_kernel = PG_referenced, PT_reserved = PG_reserved, /* High bits are used for zone/node/section */ }; @@ -2948,6 +3136,46 @@ static inline bool pagetable_is_reserved(struct ptdesc *pt) } /** + * ptdesc_set_kernel - Mark a ptdesc used to map the kernel + * @ptdesc: The ptdesc to be marked + * + * Kernel page tables often need special handling. Set a flag so that + * the handling code knows this ptdesc will not be used for userspace. + */ +static inline void ptdesc_set_kernel(struct ptdesc *ptdesc) +{ + set_bit(PT_kernel, &ptdesc->pt_flags.f); +} + +/** + * ptdesc_clear_kernel - Mark a ptdesc as no longer used to map the kernel + * @ptdesc: The ptdesc to be unmarked + * + * Use when the ptdesc is no longer used to map the kernel and no longer + * needs special handling. + */ +static inline void ptdesc_clear_kernel(struct ptdesc *ptdesc) +{ + /* + * Note: the 'PG_referenced' bit does not strictly need to be + * cleared before freeing the page. But this is nice for + * symmetry. + */ + clear_bit(PT_kernel, &ptdesc->pt_flags.f); +} + +/** + * ptdesc_test_kernel - Check if a ptdesc is used to map the kernel + * @ptdesc: The ptdesc being tested + * + * Call to tell if the ptdesc used to map the kernel. + */ +static inline bool ptdesc_test_kernel(const struct ptdesc *ptdesc) +{ + return test_bit(PT_kernel, &ptdesc->pt_flags.f); +} + +/** * pagetable_alloc - Allocate pagetables * @gfp: GFP flags * @order: desired pagetable order @@ -2965,6 +3193,21 @@ static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int orde } #define pagetable_alloc(...) alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__)) +static inline void __pagetable_free(struct ptdesc *pt) +{ + struct page *page = ptdesc_page(pt); + + __free_pages(page, compound_order(page)); +} + +#ifdef CONFIG_ASYNC_KERNEL_PGTABLE_FREE +void pagetable_free_kernel(struct ptdesc *pt); +#else +static inline void pagetable_free_kernel(struct ptdesc *pt) +{ + __pagetable_free(pt); +} +#endif /** * pagetable_free - Free pagetables * @pt: The page table descriptor @@ -2974,9 +3217,12 @@ static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int orde */ static inline void pagetable_free(struct ptdesc *pt) { - struct page *page = ptdesc_page(pt); - - __free_pages(page, compound_order(page)); + if (ptdesc_test_kernel(pt)) { + ptdesc_clear_kernel(pt); + pagetable_free_kernel(pt); + } else { + __pagetable_free(pt); + } } #if defined(CONFIG_SPLIT_PTE_PTLOCKS) @@ -3560,6 +3806,90 @@ static inline unsigned long vma_pages(const struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } +static inline unsigned long vma_desc_size(const struct vm_area_desc *desc) +{ + return desc->end - desc->start; +} + +static inline unsigned long vma_desc_pages(const struct vm_area_desc *desc) +{ + return vma_desc_size(desc) >> PAGE_SHIFT; +} + +/** + * mmap_action_remap - helper for mmap_prepare hook to specify that a pure PFN + * remap is required. + * @desc: The VMA descriptor for the VMA requiring remap. + * @start: The virtual address to start the remap from, must be within the VMA. + * @start_pfn: The first PFN in the range to remap. + * @size: The size of the range to remap, in bytes, at most spanning to the end + * of the VMA. + */ +static inline void mmap_action_remap(struct vm_area_desc *desc, + unsigned long start, + unsigned long start_pfn, + unsigned long size) +{ + struct mmap_action *action = &desc->action; + + /* [start, start + size) must be within the VMA. */ + WARN_ON_ONCE(start < desc->start || start >= desc->end); + WARN_ON_ONCE(start + size > desc->end); + + action->type = MMAP_REMAP_PFN; + action->remap.start = start; + action->remap.start_pfn = start_pfn; + action->remap.size = size; + action->remap.pgprot = desc->page_prot; +} + +/** + * mmap_action_remap_full - helper for mmap_prepare hook to specify that the + * entirety of a VMA should be PFN remapped. + * @desc: The VMA descriptor for the VMA requiring remap. + * @start_pfn: The first PFN in the range to remap. + */ +static inline void mmap_action_remap_full(struct vm_area_desc *desc, + unsigned long start_pfn) +{ + mmap_action_remap(desc, desc->start, start_pfn, vma_desc_size(desc)); +} + +/** + * mmap_action_ioremap - helper for mmap_prepare hook to specify that a pure PFN + * I/O remap is required. + * @desc: The VMA descriptor for the VMA requiring remap. + * @start: The virtual address to start the remap from, must be within the VMA. + * @start_pfn: The first PFN in the range to remap. + * @size: The size of the range to remap, in bytes, at most spanning to the end + * of the VMA. + */ +static inline void mmap_action_ioremap(struct vm_area_desc *desc, + unsigned long start, + unsigned long start_pfn, + unsigned long size) +{ + mmap_action_remap(desc, start, start_pfn, size); + desc->action.type = MMAP_IO_REMAP_PFN; +} + +/** + * mmap_action_ioremap_full - helper for mmap_prepare hook to specify that the + * entirety of a VMA should be PFN I/O remapped. + * @desc: The VMA descriptor for the VMA requiring remap. + * @start_pfn: The first PFN in the range to remap. + */ +static inline void mmap_action_ioremap_full(struct vm_area_desc *desc, + unsigned long start_pfn) +{ + mmap_action_ioremap(desc, desc->start, start_pfn, vma_desc_size(desc)); +} + +void mmap_action_prepare(struct mmap_action *action, + struct vm_area_desc *desc); +int mmap_action_complete(struct mmap_action *action, + struct vm_area_struct *vma); + /* Look up the first VMA which exactly match the interval vm_start ... vm_end */ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, unsigned long vm_start, unsigned long vm_end) @@ -3601,10 +3931,9 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, struct vm_area_struct *find_extend_vma_locked(struct mm_struct *, unsigned long addr); -int remap_pfn_range(struct vm_area_struct *, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t); -int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t prot); +int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t pgprot); + int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, struct page **pages, unsigned long *num); @@ -3637,15 +3966,24 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } -#ifndef io_remap_pfn_range -static inline int io_remap_pfn_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, - unsigned long size, pgprot_t prot) +#ifndef io_remap_pfn_range_pfn +static inline unsigned long io_remap_pfn_range_pfn(unsigned long pfn, + unsigned long size) { - return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot)); + return pfn; } #endif +static inline int io_remap_pfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long orig_pfn, + unsigned long size, pgprot_t orig_prot) +{ + const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size); + const pgprot_t prot = pgprot_decrypted(orig_prot); + + return remap_pfn_range(vma, addr, pfn, size, prot); +} + static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) @@ -4094,6 +4432,7 @@ enum mf_action_page_type { MF_MSG_DAX, MF_MSG_UNSPLIT_THP, MF_MSG_ALREADY_POISONED, + MF_MSG_PFN_MAP, MF_MSG_UNKNOWN, }; @@ -4222,16 +4561,6 @@ int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *st int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); - -/* - * mseal of userspace process's system mappings. - */ -#ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS -#define VM_SEALED_SYSMAP VM_SEALED -#else -#define VM_SEALED_SYSMAP VM_NONE -#endif - /* * DMA mapping IDs for page_pool * diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index f6a2b2d20016..fa2d6ba811b5 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -8,7 +8,7 @@ #include <linux/swap.h> #include <linux/string.h> #include <linux/userfaultfd_k.h> -#include <linux/swapops.h> +#include <linux/leafops.h> /** * folio_is_file_lru - Should the folio be on a file LRU or anon LRU? @@ -44,7 +44,7 @@ static __always_inline void __update_lru_size(struct lruvec *lruvec, lockdep_assert_held(&lruvec->lru_lock); WARN_ON_ONCE(nr_pages != (int)nr_pages); - __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages); + mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages); __mod_zone_page_state(&pgdat->node_zones[zid], NR_ZONE_LRU_BASE + lru, nr_pages); } @@ -541,9 +541,9 @@ static inline bool mm_tlb_flush_nested(const struct mm_struct *mm) * The caller should insert a new pte created with make_pte_marker(). */ static inline pte_marker copy_pte_marker( - swp_entry_t entry, struct vm_area_struct *dst_vma) + softleaf_t entry, struct vm_area_struct *dst_vma) { - pte_marker srcm = pte_marker_get(entry); + const pte_marker srcm = softleaf_to_marker(entry); /* Always copy error entries. */ pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD); @@ -553,7 +553,6 @@ static inline pte_marker copy_pte_marker( return dstm; } -#endif /* * If this pte is wr-protected by uffd-wp in any form, arm the special pte to @@ -571,9 +570,11 @@ static inline bool pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, pte_t pteval) { -#ifdef CONFIG_PTE_MARKER_UFFD_WP bool arm_uffd_pte = false; + if (!uffd_supports_wp_marker()) + return false; + /* The current status of the pte should be "cleared" before calling */ WARN_ON_ONCE(!pte_none(ptep_get(pte))); @@ -602,7 +603,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, make_pte_marker(PTE_MARKER_UFFD_WP)); return true; } -#endif + return false; } @@ -616,6 +617,7 @@ static inline bool vma_has_recency(const struct vm_area_struct *vma) return true; } +#endif /** * num_pages_contiguous() - determine the number of contiguous pages diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3b7d05e7169c..9f6de068295d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -286,6 +286,31 @@ typedef struct { unsigned long val; } swp_entry_t; +/** + * typedef softleaf_t - Describes a page table software leaf entry, abstracted + * from its architecture-specific encoding. + * + * Page table leaf entries are those which do not reference any descendent page + * tables but rather either reference a data page, are an empty (or 'none' + * entry), or contain a non-present entry. + * + * If referencing another page table or a data page then the page table entry is + * pertinent to hardware - that is it tells the hardware how to decode the page + * table entry. + * + * Otherwise it is a software-defined leaf page table entry, which this type + * describes. See leafops.h and specifically @softleaf_type for a list of all + * possible kinds of software leaf entry. + * + * A softleaf_t entry is abstracted from the hardware page table entry, so is + * not architecture-specific. + * + * NOTE: While we transition from the confusing swp_entry_t type used for this + * purpose, we simply alias this type. This will be removed once the + * transition is complete. + */ +typedef swp_entry_t softleaf_t; + #if defined(CONFIG_MEMCG) || defined(CONFIG_SLAB_OBJ_EXT) /* We have some extra room after the refcount in tail pages. */ #define NR_PAGES_IN_LARGE_FOLIO @@ -774,6 +799,65 @@ struct pfnmap_track_ctx { }; #endif +/* What action should be taken after an .mmap_prepare call is complete? */ +enum mmap_action_type { + MMAP_NOTHING, /* Mapping is complete, no further action. */ + MMAP_REMAP_PFN, /* Remap PFN range. */ + MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */ +}; + +/* + * Describes an action an mmap_prepare hook can instruct to be taken to complete + * the mapping of a VMA. Specified in vm_area_desc. + */ +struct mmap_action { + union { + /* Remap range. */ + struct { + unsigned long start; + unsigned long start_pfn; + unsigned long size; + pgprot_t pgprot; + } remap; + }; + enum mmap_action_type type; + + /* + * If specified, this hook is invoked after the selected action has been + * successfully completed. Note that the VMA write lock still held. + * + * The absolute minimum ought to be done here. + * + * Returns 0 on success, or an error code. + */ + int (*success_hook)(const struct vm_area_struct *vma); + + /* + * If specified, this hook is invoked when an error occurred when + * attempting the selection action. + * + * The hook can return an error code in order to filter the error, but + * it is not valid to clear the error here. + */ + int (*error_hook)(int err); + + /* + * This should be set in rare instances where the operation required + * that the rmap should not be able to access the VMA until + * completely set up. + */ + bool hide_from_rmap_until_complete :1; +}; + +/* + * Opaque type representing current VMA (vm_area_struct) flag state. Must be + * accessed via vma_flags_xxx() helper functions. + */ +#define NUM_VMA_FLAG_BITS BITS_PER_LONG +typedef struct { + DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS); +} __private vma_flags_t; + /* * Describes a VMA that is about to be mmap()'ed. Drivers may choose to * manipulate mutable fields which will cause those fields to be updated in the @@ -791,12 +875,18 @@ struct vm_area_desc { /* Mutable fields. Populated with initial state. */ pgoff_t pgoff; struct file *vm_file; - vm_flags_t vm_flags; + union { + vm_flags_t vm_flags; + vma_flags_t vma_flags; + }; pgprot_t page_prot; /* Write-only fields. */ const struct vm_operations_struct *vm_ops; void *private_data; + + /* Take further action? */ + struct mmap_action action; }; /* @@ -833,10 +923,12 @@ struct vm_area_struct { /* * Flags, see mm.h. * To modify use vm_flags_{init|reset|set|clear|mod} functions. + * Preferably, use vma_flags_xxx() functions. */ union { + /* Temporary while VMA flags are being converted. */ const vm_flags_t vm_flags; - vm_flags_t __private __vm_flags; + vma_flags_t flags; }; #ifdef CONFIG_PER_VMA_LOCK @@ -917,6 +1009,52 @@ struct vm_area_struct { #endif } __randomize_layout; +/* Clears all bits in the VMA flags bitmap, non-atomically. */ +static inline void vma_flags_clear_all(vma_flags_t *flags) +{ + bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS); +} + +/* + * Copy value to the first system word of VMA flags, non-atomically. + * + * IMPORTANT: This does not overwrite bytes past the first system word. The + * caller must account for this. + */ +static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value) +{ + *ACCESS_PRIVATE(flags, __vma_flags) = value; +} + +/* + * Copy value to the first system word of VMA flags ONCE, non-atomically. + * + * IMPORTANT: This does not overwrite bytes past the first system word. The + * caller must account for this. + */ +static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value) +{ + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); + + WRITE_ONCE(*bitmap, value); +} + +/* Update the first system word of VMA flags setting bits, non-atomically. */ +static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value) +{ + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); + + *bitmap |= value; +} + +/* Update the first system word of VMA flags clearing bits, non-atomically. */ +static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value) +{ + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); + + *bitmap &= ~value; +} + #ifdef CONFIG_NUMA #define vma_policy(vma) ((vma)->vm_policy) #else @@ -1194,15 +1332,13 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; -/* Set the first system word of mm flags, non-atomically. */ -static inline void __mm_flags_set_word(struct mm_struct *mm, unsigned long value) +/* Copy value to the first system word of mm flags, non-atomically. */ +static inline void __mm_flags_overwrite_word(struct mm_struct *mm, unsigned long value) { - unsigned long *bitmap = ACCESS_PRIVATE(&mm->flags, __mm_flags); - - bitmap_copy(bitmap, &value, BITS_PER_LONG); + *ACCESS_PRIVATE(&mm->flags, __mm_flags) = value; } -/* Obtain a read-only view of the bitmap. */ +/* Obtain a read-only view of the mm flags bitmap. */ static inline const unsigned long *__mm_flags_get_bitmap(const struct mm_struct *mm) { return (const unsigned long *)ACCESS_PRIVATE(&mm->flags, __mm_flags); @@ -1211,9 +1347,7 @@ static inline const unsigned long *__mm_flags_get_bitmap(const struct mm_struct /* Read the first system word of mm flags, non-atomically. */ static inline unsigned long __mm_flags_get_word(const struct mm_struct *mm) { - const unsigned long *bitmap = __mm_flags_get_bitmap(mm); - - return bitmap_read(bitmap, 0, BITS_PER_LONG); + return *__mm_flags_get_bitmap(mm); } /* diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 2c9fffa58714..d53f72dba7fe 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -130,7 +130,7 @@ static inline bool is_vma_writer_only(int refcnt) * a detached vma happens only in vma_mark_detached() and is a rare * case, therefore most of the time there will be no unnecessary wakeup. */ - return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1; + return (refcnt & VMA_LOCK_OFFSET) && refcnt <= VMA_LOCK_OFFSET + 1; } static inline void vma_refcount_put(struct vm_area_struct *vma) @@ -183,7 +183,7 @@ static inline void vma_end_read(struct vm_area_struct *vma) } /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ -static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) +static inline bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) { mmap_assert_write_locked(vma->vm_mm); @@ -195,7 +195,8 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_l return (vma->vm_lock_seq == *mm_lock_seq); } -void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq); +int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq, + int state); /* * Begin writing to a VMA. @@ -209,7 +210,30 @@ static inline void vma_start_write(struct vm_area_struct *vma) if (__is_vma_write_locked(vma, &mm_lock_seq)) return; - __vma_start_write(vma, mm_lock_seq); + __vma_start_write(vma, mm_lock_seq, TASK_UNINTERRUPTIBLE); +} + +/** + * vma_start_write_killable - Begin writing to a VMA. + * @vma: The VMA we are going to modify. + * + * Exclude concurrent readers under the per-VMA lock until the currently + * write-locked mmap_lock is dropped or downgraded. + * + * Context: May sleep while waiting for readers to drop the vma read lock. + * Caller must already hold the mmap_lock for write. + * + * Return: 0 for a successful acquisition. -EINTR if a fatal signal was + * received. + */ +static inline __must_check +int vma_start_write_killable(struct vm_area_struct *vma) +{ + unsigned int mm_lock_seq; + + if (__is_vma_write_locked(vma, &mm_lock_seq)) + return 0; + return __vma_start_write(vma, mm_lock_seq, TASK_KILLABLE); } static inline void vma_assert_write_locked(struct vm_area_struct *vma) @@ -281,11 +305,10 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int return true; } static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} -static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, - struct vm_area_struct *vma) - { return NULL; } static inline void vma_end_read(struct vm_area_struct *vma) {} static inline void vma_start_write(struct vm_area_struct *vma) {} +static inline __must_check +int vma_start_write_killable(struct vm_area_struct *vma) { return 0; } static inline void vma_assert_write_locked(struct vm_area_struct *vma) { mmap_assert_write_locked(vma->vm_mm); } static inline void vma_assert_attached(struct vm_area_struct *vma) {} diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7fb7331c5725..4398e027f450 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1060,10 +1060,6 @@ struct zone { } ____cacheline_internodealigned_in_smp; enum pgdat_flags { - PGDAT_DIRTY, /* reclaim scanning has recently found - * many dirty file pages at the tail - * of the LRU. - */ PGDAT_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 6077972e8b45..24eb5a88a5c5 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -867,7 +867,7 @@ struct mhi_device_id { kernel_ulong_t driver_data; }; -#define AUXILIARY_NAME_SIZE 32 +#define AUXILIARY_NAME_SIZE 40 #define AUXILIARY_MODULE_PREFIX "auxiliary:" struct auxiliary_device_id { diff --git a/include/linux/node.h b/include/linux/node.h index 866e3323f1fd..0269b064ba65 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -132,8 +132,6 @@ static inline void register_memory_blocks_under_nodes(void) } #endif -extern void unregister_node(struct node *node); - struct node_notify { int nid; }; @@ -176,8 +174,8 @@ static inline int hotplug_node_notifier(notifier_fn_t fn, int pri) #ifdef CONFIG_NUMA extern void node_dev_init(void); /* Core of the node registration - only memory hotplug should use this */ -extern int register_one_node(int nid); -extern void unregister_one_node(int nid); +int register_node(int nid); +void unregister_node(int nid); extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); @@ -189,11 +187,11 @@ extern int register_memory_node_under_compute_node(unsigned int mem_nid, static inline void node_dev_init(void) { } -static inline int register_one_node(int nid) +static inline int register_node(int nid) { return 0; } -static inline int unregister_one_node(int nid) +static inline int unregister_node(int nid) { return 0; } diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 725f95f7e416..736f633b2d5f 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -459,6 +459,18 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) struct_size((type *)NULL, member, count) /** + * struct_offset() - Calculate the offset of a member within a struct + * @p: Pointer to the struct + * @member: Name of the member to get the offset of + * + * Calculates the offset of a particular @member of the structure pointed + * to by @p. + * + * Return: number of bytes to the location of @member. + */ +#define struct_offset(p, member) (offsetof(typeof(*(p)), member)) + +/** * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. * Enables caller macro to pass arbitrary trailing expressions * diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e601a3144f28..31a848485ad9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -651,9 +651,11 @@ static inline void *detach_page_private(struct page *page) } #ifdef CONFIG_NUMA -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order); +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *policy); #else -static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) +static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *policy) { return folio_alloc_noprof(gfp, order); } @@ -664,7 +666,7 @@ static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int o static inline struct page *__page_cache_alloc(gfp_t gfp) { - return &filemap_alloc_folio(gfp, 0)->page; + return &filemap_alloc_folio(gfp, 0, NULL)->page; } static inline gfp_t readahead_gfp_mask(struct address_space *x) @@ -750,11 +752,17 @@ static inline fgf_t fgf_set_order(size_t size) } void *filemap_get_entry(struct address_space *mapping, pgoff_t index); -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - fgf_t fgp_flags, gfp_t gfp); +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, + pgoff_t index, fgf_t fgf_flags, gfp_t gfp, struct mempolicy *policy); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); +static inline struct folio *__filemap_get_folio(struct address_space *mapping, + pgoff_t index, fgf_t fgf_flags, gfp_t gfp) +{ + return __filemap_get_folio_mpol(mapping, index, fgf_flags, gfp, NULL); +} + /** * write_begin_get_folio - Get folio for write_begin with flags. * @iocb: The kiocb passed from write_begin (may be NULL). diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ee3148ef87f6..652f287c1ef6 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1557,6 +1557,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define arch_start_context_switch(prev) do {} while (0) #endif +/* + * Some platforms can customize the PTE soft-dirty bit making it unavailable + * even if the architecture provides the resource. + * Adding this API allows architectures to add their own checks for the + * devices on which the kernel is running. + * Note: When overriding it, please make sure the CONFIG_MEM_SOFT_DIRTY + * is part of this macro. + */ +#ifndef pgtable_supports_soft_dirty +#define pgtable_supports_soft_dirty() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) +#endif + #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 93c945331f39..813da101b5bf 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -80,7 +80,7 @@ static inline void __iomem * devm_platform_get_and_ioremap_resource(struct platform_device *pdev, unsigned int index, struct resource **res) { - return ERR_PTR(-EINVAL); + return IOMEM_ERR_PTR(-EINVAL); } @@ -88,14 +88,14 @@ static inline void __iomem * devm_platform_ioremap_resource(struct platform_device *pdev, unsigned int index) { - return ERR_PTR(-EINVAL); + return IOMEM_ERR_PTR(-EINVAL); } static inline void __iomem * devm_platform_ioremap_resource_byname(struct platform_device *pdev, const char *name) { - return ERR_PTR(-EINVAL); + return IOMEM_ERR_PTR(-EINVAL); } #endif diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index f139377f4b31..19d1c5e5f335 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -66,8 +66,6 @@ enum proc_pidonly { struct proc_fs_info { struct pid_namespace *pid_ns; - struct dentry *proc_self; /* For /proc/self */ - struct dentry *proc_thread_self; /* For /proc/thread-self */ kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index e0dbcb4b4fd9..abcdee256c65 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -14,6 +14,39 @@ #include <uapi/linux/psp-sev.h> +/* As defined by SEV API, under "Guest Policy". */ +#define SEV_POLICY_MASK_NODBG BIT(0) +#define SEV_POLICY_MASK_NOKS BIT(1) +#define SEV_POLICY_MASK_ES BIT(2) +#define SEV_POLICY_MASK_NOSEND BIT(3) +#define SEV_POLICY_MASK_DOMAIN BIT(4) +#define SEV_POLICY_MASK_SEV BIT(5) +#define SEV_POLICY_MASK_API_MAJOR GENMASK(23, 16) +#define SEV_POLICY_MASK_API_MINOR GENMASK(31, 24) + +/* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */ +#define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0) +#define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8) +#define SNP_POLICY_MASK_SMT BIT_ULL(16) +#define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17) +#define SNP_POLICY_MASK_MIGRATE_MA BIT_ULL(18) +#define SNP_POLICY_MASK_DEBUG BIT_ULL(19) +#define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20) +#define SNP_POLICY_MASK_CXL_ALLOW BIT_ULL(21) +#define SNP_POLICY_MASK_MEM_AES_256_XTS BIT_ULL(22) +#define SNP_POLICY_MASK_RAPL_DIS BIT_ULL(23) +#define SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM BIT_ULL(24) +#define SNP_POLICY_MASK_PAGE_SWAP_DISABLE BIT_ULL(25) + +/* Base SEV-SNP policy bitmask for minimum supported SEV firmware version */ +#define SNP_POLICY_MASK_BASE (SNP_POLICY_MASK_API_MINOR | \ + SNP_POLICY_MASK_API_MAJOR | \ + SNP_POLICY_MASK_SMT | \ + SNP_POLICY_MASK_RSVD_MBO | \ + SNP_POLICY_MASK_MIGRATE_MA | \ + SNP_POLICY_MASK_DEBUG | \ + SNP_POLICY_MASK_SINGLE_SOCKET) + #define SEV_FW_BLOB_MAX_SIZE 0x4000 /* 16KB */ /** @@ -849,7 +882,10 @@ struct snp_feature_info { u32 edx; } __packed; +#define SNP_RAPL_DISABLE_SUPPORTED BIT(2) #define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3) +#define SNP_AES_256_XTS_POLICY_SUPPORTED BIT(4) +#define SNP_CXL_ALLOW_POLICY_SUPPORTED BIT(5) #ifdef CONFIG_CRYPTO_DEV_SP_PSP @@ -995,6 +1031,7 @@ void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); void sev_platform_shutdown(void); bool sev_is_snp_ciphertext_hiding_supported(void); +u64 sev_get_snp_policy_bits(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index 357df16ede32..46514cb1b9e0 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -27,31 +27,6 @@ struct device_node; struct of_phandle_args; /** - * struct reset_control_lookup - represents a single lookup entry - * - * @list: internal list of all reset lookup entries - * @provider: name of the reset controller device controlling this reset line - * @index: ID of the reset controller in the reset controller device - * @dev_id: name of the device associated with this reset line - * @con_id: name of the reset line (can be NULL) - */ -struct reset_control_lookup { - struct list_head list; - const char *provider; - unsigned int index; - const char *dev_id; - const char *con_id; -}; - -#define RESET_LOOKUP(_provider, _index, _dev_id, _con_id) \ - { \ - .provider = _provider, \ - .index = _index, \ - .dev_id = _dev_id, \ - .con_id = _con_id, \ - } - -/** * struct reset_controller_dev - reset controller entity that might * provide multiple reset controls * @ops: a pointer to device specific struct reset_control_ops @@ -90,9 +65,6 @@ void reset_controller_unregister(struct reset_controller_dev *rcdev); struct device; int devm_reset_controller_register(struct device *dev, struct reset_controller_dev *rcdev); - -void reset_controller_add_lookup(struct reset_control_lookup *lookup, - unsigned int num_entries); #else static inline int reset_controller_register(struct reset_controller_dev *rcdev) { @@ -108,11 +80,6 @@ static inline int devm_reset_controller_register(struct device *dev, { return 0; } - -static inline void reset_controller_add_lookup(struct reset_control_lookup *lookup, - unsigned int num_entries) -{ -} #endif #endif diff --git a/include/linux/reset.h b/include/linux/reset.h index 840d75d172f6..44f9e3415f92 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -2,6 +2,7 @@ #ifndef _LINUX_RESET_H_ #define _LINUX_RESET_H_ +#include <linux/bits.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/types.h> diff --git a/include/linux/rv.h b/include/linux/rv.h index 9520aab34bcb..92fd467547e7 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -88,7 +88,7 @@ union rv_task_monitor { struct rv_reactor { const char *name; const char *description; - __printf(1, 2) void (*react)(const char *msg, ...); + __printf(1, 0) void (*react)(const char *msg, va_list args); struct list_head list; }; #endif @@ -102,7 +102,7 @@ struct rv_monitor { void (*reset)(void); #ifdef CONFIG_RV_REACTORS struct rv_reactor *reactor; - __printf(1, 2) void (*react)(const char *msg, ...); + __printf(1, 0) void (*react)(const char *msg, va_list args); #endif struct list_head list; struct rv_monitor *parent; @@ -116,13 +116,14 @@ int rv_get_task_monitor_slot(void); void rv_put_task_monitor_slot(int slot); #ifdef CONFIG_RV_REACTORS -bool rv_reacting_on(void); int rv_unregister_reactor(struct rv_reactor *reactor); int rv_register_reactor(struct rv_reactor *reactor); +__printf(2, 3) +void rv_react(struct rv_monitor *monitor, const char *msg, ...); #else -static inline bool rv_reacting_on(void) +__printf(2, 3) +static inline void rv_react(struct rv_monitor *monitor, const char *msg, ...) { - return false; } #endif /* CONFIG_RV_REACTORS */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 0232d983b715..0e1d73955fa5 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -189,12 +189,11 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t); -unsigned long mm_get_unmapped_area(struct mm_struct *mm, struct file *filp, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags); +unsigned long mm_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); -unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, - struct file *filp, +unsigned long mm_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -318,6 +317,9 @@ static inline void might_alloc(gfp_t gfp_mask) fs_reclaim_acquire(gfp_mask); fs_reclaim_release(gfp_mask); + if (current->flags & PF_MEMALLOC) + return; + might_sleep_if(gfpflags_allow_blocking(gfp_mask)); } diff --git a/include/linux/security.h b/include/linux/security.h index eb36451ce41f..83a646d72f6f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2257,8 +2257,6 @@ static inline void securityfs_remove(struct dentry *dentry) #endif -#define securityfs_recursive_remove securityfs_remove - #ifdef CONFIG_BPF_SYSCALL union bpf_attr; struct bpf_map; diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 52791e070506..9f2839e73f8a 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -149,6 +149,23 @@ static inline void seq_buf_commit(struct seq_buf *s, int num) } } +/** + * seq_buf_pop - pop off the last written character + * @s: the seq_buf handle + * + * Removes the last written character to the seq_buf @s. + * + * Returns the last character or -1 if it is empty. + */ +static inline int seq_buf_pop(struct seq_buf *s) +{ + if (!s->len) + return -1; + + s->len--; + return (unsigned int)s->buffer[s->len]; +} + extern __printf(2, 3) int seq_buf_printf(struct seq_buf *s, const char *fmt, ...); extern __printf(2, 0) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 774efe592a9a..5e4b3c1ae5c2 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -94,7 +94,8 @@ extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, loff_t size, unsigned long flags); -extern int shmem_zero_setup(struct vm_area_struct *); +int shmem_zero_setup(struct vm_area_struct *vma); +int shmem_zero_setup_desc(struct vm_area_desc *desc); extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts); @@ -135,11 +136,16 @@ static inline bool shmem_hpage_pmd_enabled(void) #ifdef CONFIG_SHMEM extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); +extern void shmem_uncharge(struct inode *inode, long pages); #else static inline unsigned long shmem_swap_usage(struct vm_area_struct *vma) { return 0; } + +static inline void shmem_uncharge(struct inode *inode, long pages) +{ +} #endif extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); @@ -193,7 +199,6 @@ static inline pgoff_t shmem_fallocend(struct inode *inode, pgoff_t eof) } extern bool shmem_charge(struct inode *inode, long pages); -extern void shmem_uncharge(struct inode *inode, long pages); #ifdef CONFIG_USERFAULTFD #ifdef CONFIG_SHMEM diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 7a69210a250c..0287f9182c4d 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -74,7 +74,14 @@ #define LLCC_CAMSRTIP 73 #define LLCC_CAMRTRF 74 #define LLCC_CAMSRTRF 75 +#define LLCC_VIDEO_APV 83 +#define LLCC_COMPUTE1 87 +#define LLCC_CPUSS_OPP 88 #define LLCC_CPUSSMPAM 89 +#define LLCC_CAM_IPE_STROV 92 +#define LLCC_CAM_OFE_STROV 93 +#define LLCC_CPUSS_HEU 94 +#define LLCC_MDM_PNG_FIXED 100 /** * struct llcc_slice_desc - Cache slice descriptor diff --git a/include/linux/soc/qcom/socinfo.h b/include/linux/soc/qcom/socinfo.h index 608950443eee..ba823a0013c5 100644 --- a/include/linux/soc/qcom/socinfo.h +++ b/include/linux/soc/qcom/socinfo.h @@ -82,6 +82,10 @@ struct socinfo { __le32 num_func_clusters; __le32 boot_cluster; __le32 boot_core; + /* Version 20 */ + __le32 raw_package_type; + /* Version 21, 22, 23 */ + __le32 reserve1[4]; }; /* Internal feature codes */ diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h index 1ed8b1b16bc9..0a4edfe3d96d 100644 --- a/include/linux/soc/qcom/ubwc.h +++ b/include/linux/soc/qcom/ubwc.h @@ -52,6 +52,7 @@ struct qcom_ubwc_cfg_data { #define UBWC_4_0 0x40000000 #define UBWC_4_3 0x40030000 #define UBWC_5_0 0x50000000 +#define UBWC_6_0 0x60000000 #if IS_ENABLED(CONFIG_QCOM_UBWC_CONFIG) const struct qcom_ubwc_cfg_data *qcom_ubwc_config_get_data(void); diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 71e0c09a49eb..532c6c2d1195 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -672,14 +672,341 @@ /* For Tensor GS101 */ /* PMU ALIVE */ -#define GS101_SYSIP_DAT0 (0x810) -#define GS101_CPU0_INFORM (0x860) -#define GS101_CPU_INFORM(cpu) \ - (GS101_CPU0_INFORM + (cpu*4)) -#define GS101_SYSTEM_CONFIGURATION (0x3A00) -#define GS101_EINT_WAKEUP_MASK (0x3A80) -#define GS101_PHY_CTRL_USB20 (0x3EB0) -#define GS101_PHY_CTRL_USBDP (0x3EB4) +#define GS101_OM_STAT 0x0000 +#define GS101_VERSION 0x0004 +#define GS101_PORESET_CHECK 0x0008 +#define GS101_OTP_STATUS 0x000c +#define GS101_SYSTEM_INFO 0x0010 +#define GS101_IDLE_IP(n) (0x03e0 + ((n) & 3) * 4) +#define GS101_IDLE_IP_MASK(n) (0x03f0 + ((n) & 3) * 4) +#define GS101_SLC_CH_OFFSET(ch) (0x0400 + ((ch) & 3) * 0x10) +#define GS101_DATARAM_STATE_SLC_CH(ch) (GS101_SLC_CH_OFFSET(ch) + 0x00) +#define GS101_TAGRAM_STATE_SLC_CH(ch) (GS101_SLC_CH_OFFSET(ch) + 0x04) +#define GS101_LRURAM_STATE_SLC_CH(ch) (GS101_SLC_CH_OFFSET(ch) + 0x08) +#define GS101_PPMPURAM_STATE_SLC_CH(ch) (GS101_SLC_CH_OFFSET(ch) + 0x0c) +#define GS101_DATARAM_INFORM_SCL_CH(ch) (GS101_SLC_CH_OFFSET(ch) + 0x40) +#define GS101_TAGRAM_INFORM_SCL_CH(ch) (GS101_SLC_CH_OFFSET(ch) + 0x44) +#define GS101_LRURAM_INFORM_SCL_CH(ch) (GS101_SLC_CH_OFFSET(ch) + 0x48) +#define GS101_PPMPURAM_INFORM_SCL_CH(ch) (GS101_SLC_CH_OFFSET(ch) + 0x4c) +#define GS101_INFORM0 0x0800 +#define GS101_INFORM1 0x0804 +#define GS101_INFORM2 0x0808 +#define GS101_INFORM3 0x080c +#define GS101_SYSIP_DAT(n) (0x0810 + ((n) & 3) * 4) +#define GS101_PWR_HOLD_HW_TRIP 0x0820 +#define GS101_PWR_HOLD_SW_TRIP 0x0824 +#define GS101_GSA_INFORM(n) (0x0830 + ((n) & 1) * 4) +#define GS101_INFORM4 0x0840 +#define GS101_INFORM5 0x0844 +#define GS101_INFORM6 0x0848 +#define GS101_INFORM7 0x084c +#define GS101_INFORM8 0x0850 +#define GS101_INFORM9 0x0854 +#define GS101_INFORM10 0x0858 +#define GS101_INFORM11 0x085c +#define GS101_CPU_INFORM(cpu) (0x0860 + ((cpu) & 7) * 4) +#define GS101_IROM_INFORM 0x0880 +#define GS101_IROM_CPU_INFORM(cpu) (0x0890 + ((cpu) & 7) * 4) +#define GS101_PMU_SPARE(n) (0x0900 + ((n) & 3) * 4) +#define GS101_IROM_DATA_REG(n) (0x0980 + ((n) & 3) * 4) +#define GS101_IROM_PWRMODE 0x0990 +#define GS101_DREX_CALIBRATION(n) (0x09a0 + ((n) & 7) * 4) + +#define GS101_CLUSTER0_OFFSET 0x1000 +#define GS101_CLUSTER1_OFFSET 0x1300 +#define GS101_CLUSTER2_OFFSET 0x1500 +#define GS101_CLUSTER_CPU_OFFSET(cl, cpu) ((cl) + ((cpu) * 0x80)) +#define GS101_CLUSTER_CPU_CONFIGURATION(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x00) +#define GS101_CLUSTER_CPU_STATUS(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x04) +#define GS101_CLUSTER_CPU_STATES(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x08) +#define GS101_CLUSTER_CPU_OPTION(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x0c) +#define GS101_CLUSTER_CPU_OUT(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x20) +#define GS101_CLUSTER_CPU_IN(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x24) +#define GS101_CLUSTER_CPU_INT_IN(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x40) +#define GS101_CLUSTER_CPU_INT_EN(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x44) +#define GS101_CLUSTER_CPU_INT_TYPE(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x48) +#define GS101_CLUSTER_CPU_INT_DIR(cl, cpu) \ + (GS101_CLUSTER_CPU_OFFSET(cl, cpu) + 0x4c) + +#define GS101_CLUSTER_NONCPU_OFFSET(cl) (0x1200 + ((cl) * 0x200)) +#define GS101_CLUSTER_NONCPU_CONFIGURATION(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x00) +#define GS101_CLUSTER_NONCPU_STATUS(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x04) +#define GS101_CLUSTER_NONCPU_STATES(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x08) +#define GS101_CLUSTER_NONCPU_OPTION(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x0c) +#define GS101_CLUSTER_NONCPU_OUT(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x20) +#define GS101_CLUSTER_NONCPU_IN(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x24) +#define GS101_CLUSTER_NONCPU_INT_IN(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x40) +#define GS101_CLUSTER_NONCPU_INT_EN(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x44) +#define GS101_CLUSTER_NONCPU_INT_TYPE(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x48) +#define GS101_CLUSTER_NONCPU_INT_DIR(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x4c) +#define GS101_CLUSTER_NONCPU_DUALRAIL_CTRL_OUT(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x60) +#define GS101_CLUSTER_NONCPU_DUALRAIL_POS_OUT(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x64) +#define GS101_CLUSTER_NONCPU_DUALRAIL_CTRL_IN(cl) \ + (GS101_CLUSTER_NONCPU_OFFSET(cl) + 0x6c) +#define GS101_CLUSTER0_NONCPU_DSU_PCH \ + (GS101_CLUSTER_NONCPU_OFFSET(0) + 0x80) + +#define GS101_SUBBBLK_OFFSET_ALIVE 0x1800 +#define GS101_SUBBBLK_OFFSET_AOC 0x1880 +#define GS101_SUBBBLK_OFFSET_APM 0x1900 +#define GS101_SUBBBLK_OFFSET_CMU 0x1980 +#define GS101_SUBBBLK_OFFSET_BUS0 0x1a00 +#define GS101_SUBBBLK_OFFSET_BUS1 0x1a80 +#define GS101_SUBBBLK_OFFSET_BUS2 0x1b00 +#define GS101_SUBBBLK_OFFSET_CORE 0x1b80 +#define GS101_SUBBBLK_OFFSET_EH 0x1c00 +#define GS101_SUBBBLK_OFFSET_CPUCL0 0x1c80 +#define GS101_SUBBBLK_OFFSET_CPUCL1 0x1d00 +#define GS101_SUBBBLK_OFFSET_CPUCL2 0x1d80 +#define GS101_SUBBBLK_OFFSET_G3D 0x1e00 +#define GS101_SUBBBLK_OFFSET_EMBEDDED_CPUCL0 0x1e80 +#define GS101_SUBBBLK_OFFSET_EMBEDDED_G3D 0x2000 +#define GS101_SUBBBLK_OFFSET_HSI0 0x2080 +#define GS101_SUBBBLK_OFFSET_HSI1 0x2100 +#define GS101_SUBBBLK_OFFSET_HSI2 0x2180 +#define GS101_SUBBBLK_OFFSET_DPU 0x2200 +#define GS101_SUBBBLK_OFFSET_DISP 0x2280 +#define GS101_SUBBBLK_OFFSET_G2D 0x2300 +#define GS101_SUBBBLK_OFFSET_MFC 0x2380 +#define GS101_SUBBBLK_OFFSET_CSIS 0x2400 +#define GS101_SUBBBLK_OFFSET_PDP 0x2480 +#define GS101_SUBBBLK_OFFSET_DNS 0x2500 +#define GS101_SUBBBLK_OFFSET_G3AA 0x2580 +#define GS101_SUBBBLK_OFFSET_IPP 0x2600 +#define GS101_SUBBBLK_OFFSET_ITP 0x2680 +#define GS101_SUBBBLK_OFFSET_MCSC 0x2700 +#define GS101_SUBBBLK_OFFSET_GDC 0x2780 +#define GS101_SUBBBLK_OFFSET_TNR 0x2800 +#define GS101_SUBBBLK_OFFSET_BO 0x2880 +#define GS101_SUBBBLK_OFFSET_TPU 0x2900 +#define GS101_SUBBBLK_OFFSET_MIF0 0x2980 +#define GS101_SUBBBLK_OFFSET_MIF1 0x2a00 +#define GS101_SUBBBLK_OFFSET_MIF2 0x2a80 +#define GS101_SUBBBLK_OFFSET_MIF3 0x2b00 +#define GS101_SUBBBLK_OFFSET_MISC 0x2b80 +#define GS101_SUBBBLK_OFFSET_PERIC0 0x2c00 +#define GS101_SUBBBLK_OFFSET_PERIC1 0x2c80 +#define GS101_SUBBBLK_OFFSET_S2D 0x2d00 +#define GS101_SUBBLK_CONFIGURATION(blk) ((blk) + 0x00) +#define GS101_SUBBLK_STATUS(blk) ((blk) + 0x04) +#define GS101_SUBBLK_STATES(blk) ((blk) + 0x08) +#define GS101_SUBBLK_OPTION(blk) ((blk) + 0x0c) +#define GS101_SUBBLK_CTRL(blk) ((blk) + 0x10) +#define GS101_SUBBLK_OUT(blk) ((blk) + 0x20) +#define GS101_SUBBLK_IN(blk) ((blk) + 0x24) +#define GS101_SUBBLK_INT_IN(blk) ((blk) + 0x40) +#define GS101_SUBBLK_INT_EN(blk) ((blk) + 0x44) +#define GS101_SUBBLK_INT_TYPE(blk) ((blk) + 0x48) +#define GS101_SUBBLK_INT_DIR(blk) ((blk) + 0x4c) +#define GS101_SUBBLK_MEMORY_OUT(blk) ((blk) + 0x60) +#define GS101_SUBBLK_MEMORY_IN(blk) ((blk) + 0x64) + +#define GS101_SUBBBLK_CPU_OFFSET_APM 0x3000 +#define GS101_SUBBBLK_CPU_OFFSET_DBGCORE 0x3080 +#define GS101_SUBBBLK_CPU_OFFSET_SSS 0x3100 +#define GS101_SUBBLK_CPU_CONFIGURATION(blk) ((blk) + 0x00) +#define GS101_SUBBLK_CPU_STATUS(blk) ((blk) + 0x04) +#define GS101_SUBBLK_CPU_STATES(blk) ((blk) + 0x08) +#define GS101_SUBBLK_CPU_OPTION(blk) ((blk) + 0x0c) +#define GS101_SUBBLK_CPU_OUT(blk) ((blk) + 0x20) +#define GS101_SUBBLK_CPU_IN(blk) ((blk) + 0x24) +#define GS101_SUBBLK_CPU_INT_IN(blk) ((blk) + 0x40) +#define GS101_SUBBLK_CPU_INT_EN(blk) ((blk) + 0x44) +#define GS101_SUBBLK_CPU_INT_TYPE(blk) ((blk) + 0x48) +#define GS101_SUBBLK_CPU_INT_DIR(blk) ((blk) + 0x4c) + +#define GS101_MIF_CONFIGURATION 0x3800 +#define GS101_MIF_STATUS 0x3804 +#define GS101_MIF_STATES 0x3808 +#define GS101_MIF_OPTION 0x380c +#define GS101_MIF_CTRL 0x3810 +#define GS101_MIF_OUT 0x3820 +#define GS101_MIF_IN 0x3824 +#define GS101_MIF_INT_IN 0x3840 +#define GS101_MIF_INT_EN 0x3844 +#define GS101_MIF_INT_TYPE 0x3848 +#define GS101_MIF_INT_DIR 0x384c +#define GS101_TOP_CONFIGURATION 0x3900 +#define GS101_TOP_STATUS 0x3904 +#define GS101_TOP_STATES 0x3908 +#define GS101_TOP_OPTION 0x390c +#define GS101_TOP_OUT 0x3920 +#define GS101_TOP_IN 0x3924 +#define GS101_TOP_INT_IN 0x3940 +#define GS101_TOP_INT_EN 0x3944 +#define GS101_TOP_INT_TYPE 0x3948 +#define GS101_TOP_INT_DIR 0x394c +#define GS101_WAKEUP_STAT 0x3950 +#define GS101_WAKEUP2_STAT 0x3954 +#define GS101_WAKEUP2_INT_IN 0x3960 +#define GS101_WAKEUP2_INT_EN 0x3964 +#define GS101_WAKEUP2_INT_TYPE 0x3968 +#define GS101_WAKEUP2_INT_DIR 0x396c +#define GS101_SYSTEM_CONFIGURATION 0x3a00 +#define GS101_SYSTEM_STATUS 0x3a04 +#define GS101_SYSTEM_STATES 0x3a08 +#define GS101_SYSTEM_OPTION 0x3a0c +#define GS101_SYSTEM_CTRL 0x3a10 +#define GS101_SPARE_CTRL 0x3a14 +#define GS101_USER_DEFINED_OUT 0x3a18 +#define GS101_SYSTEM_OUT 0x3a20 +#define GS101_SYSTEM_IN 0x3a24 +#define GS101_SYSTEM_INT_IN 0x3a40 +#define GS101_SYSTEM_INT_EN 0x3a44 +#define GS101_SYSTEM_INT_TYPE 0x3a48 +#define GS101_SYSTEM_INT_DIR 0x3a4c +#define GS101_EINT_INT_IN 0x3a50 +#define GS101_EINT_INT_EN 0x3a54 +#define GS101_EINT_INT_TYPE 0x3a58 +#define GS101_EINT_INT_DIR 0x3a5c +#define GS101_EINT2_INT_IN 0x3a60 +#define GS101_EINT2_INT_EN 0x3a64 +#define GS101_EINT2_INT_TYPE 0x3a68 +#define GS101_EINT2_INT_DIR 0x3a6c +#define GS101_EINT3_INT_IN 0x3a70 +#define GS101_EINT3_INT_EN 0x3a74 +#define GS101_EINT3_INT_TYPE 0x3a78 +#define GS101_EINT3_INT_DIR 0x3a7c +#define GS101_EINT_WAKEUP_MASK 0x3a80 +#define GS101_EINT_WAKEUP_MASK2 0x3a84 +#define GS101_EINT_WAKEUP_MASK3 0x3a88 +#define GS101_USER_DEFINED_INT_IN 0x3a90 +#define GS101_USER_DEFINED_INT_EN 0x3a94 +#define GS101_USER_DEFINED_INT_TYPE 0x3a98 +#define GS101_USER_DEFINED_INT_DIR 0x3a9c +#define GS101_SCAN2DRAM_INT_IN 0x3aa0 +#define GS101_SCAN2DRAM_INT_EN 0x3aa4 +#define GS101_SCAN2DRAM_INT_TYPE 0x3aa8 +#define GS101_SCAN2DRAM_INT_DIR 0x3aac +#define GS101_HCU_START 0x3ab0 +#define GS101_CUSTOM_OUT 0x3ac0 +#define GS101_CUSTOM_IN 0x3ac4 +#define GS101_CUSTOM_INT_IN 0x3ad0 +#define GS101_CUSTOM_INT_EN 0x3ad4 +#define GS101_CUSTOM_INT_TYPE 0x3ad8 +#define GS101_CUSTOM_INT_DIR 0x3adc +#define GS101_ACK_LAST_CPU 0x3afc +#define GS101_HCU_R(n) (0x3b00 + ((n) & 3) * 4) +#define GS101_HCU_SP 0x3b14 +#define GS101_HCU_PC 0x3b18 +#define GS101_PMU_RAM_CTRL 0x3b20 +#define GS101_APM_HCU_CTRL 0x3b24 +#define GS101_APM_NMI_ENABLE 0x3b30 +#define GS101_DBGCORE_NMI_ENABLE 0x3b34 +#define GS101_HCU_NMI_ENABLE 0x3b38 +#define GS101_PWR_HOLD_WDT_ENABLE 0x3b3c +#define GS101_NMI_SRC_IN 0x3b40 +#define GS101_RST_STAT 0x3b44 +#define GS101_RST_STAT_PMU 0x3b48 +#define GS101_HPM_INT_IN 0x3b60 +#define GS101_HPM_INT_EN 0x3b64 +#define GS101_HPM_INT_TYPE 0x3b68 +#define GS101_HPM_INT_DIR 0x3b6c +#define GS101_S2D_AUTH 0x3b70 +#define GS101_BOOT_STAT 0x3b74 +#define GS101_PMLINK_OUT 0x3c00 +#define GS101_PMLINK_AOC_OUT 0x3c04 +#define GS101_PMLINK_AOC_CTRL 0x3c08 +#define GS101_TCXO_BUF_CTRL 0x3c10 +#define GS101_ADD_CTRL 0x3c14 +#define GS101_HCU_TIMEOUT_RESET 0x3c20 +#define GS101_HCU_TIMEOUT_SCAN2DRAM 0x3c24 +#define GS101_TIMER(n) (0x3c80 + ((n) & 3) * 4) +#define GS101_PPC_MIF(n) (0x3c90 + ((n) & 3) * 4) +#define GS101_PPC_CORE 0x3ca0 +#define GS101_PPC_EH 0x3ca4 +#define GS101_PPC_CPUCL1_0 0x3ca8 +#define GS101_PPC_CPUCL1_1 0x3cac +#define GS101_EXT_REGULATOR_MIF_DURATION 0x3cb0 +#define GS101_EXT_REGULATOR_TOP_DURATION 0x3cb4 +#define GS101_EXT_REGULATOR_CPUCL2_DURATION 0x3cb8 +#define GS101_EXT_REGULATOR_CPUCL1_DURATION 0x3cbc +#define GS101_EXT_REGULATOR_G3D_DURATION 0x3cc0 +#define GS101_EXT_REGULATOR_TPU_DURATION 0x3cc4 +#define GS101_TCXO_DURATION 0x3cc8 +#define GS101_BURNIN_CTRL 0x3cd0 +#define GS101_JTAG_DBG_DET 0x3cd4 +#define GS101_MMC_CONWKUP_CTRL 0x3cd8 +#define GS101_USBDPPHY0_USBDP_WAKEUP 0x3cdc +#define GS101_TMU_TOP_TRIP 0x3ce0 +#define GS101_TMU_SUB_TRIP 0x3ce4 +#define GS101_MEMORY_CEN 0x3d00 +#define GS101_MEMORY_PGEN 0x3d04 +#define GS101_MEMORY_RET 0x3d08 +#define GS101_MEMORY_PGEN_FEEDBACK 0x3d0c +#define GS101_MEMORY_SMX 0x3d10 +#define GS101_MEMORY_SMX_FEEDBACK 0x3d14 +#define GS101_SLC_PCH_CHANNEL 0x3d20 +#define GS101_SLC_PCH_CB 0x3d24 +#define GS101_FORCE_NOMC 0x3d3c +#define GS101_FORCE_BOOST 0x3d4c +#define GS101_PMLINK_SLC_REQ 0x3d50 +#define GS101_PMLINK_SLC_ACK 0x3d54 +#define GS101_PMLINK_SLC_BUSY 0x3d58 +#define GS101_BOOTSYNC_OUT 0x3d80 +#define GS101_BOOTSYNC_IN 0x3d84 +#define GS101_SCAN_READY_OUT 0x3d88 +#define GS101_SCAN_READY_IN 0x3d8c +#define GS101_GSA_RESTORE 0x3d90 +#define GS101_ALIVE_OTP_LATCH 0x3d94 +#define GS101_DEBUG_OVERRIDE 0x3d98 +#define GS101_WDT_OPTION 0x3d9c +#define GS101_AOC_WDT_CFG 0x3da0 +#define GS101_CTRL_SECJTAG_ALIVE 0x3da4 +#define GS101_CTRL_DIV_PLL_ALV_DIVLOW 0x3e00 +#define GS101_CTRL_MUX_CLK_APM_REFSRC_AUTORESTORE 0x3e04 +#define GS101_CTRL_MUX_CLK_APM_REFSRC 0x3e08 +#define GS101_CTRL_MUX_CLK_APM_REF 0x3e0c +#define GS101_CTRL_MUX_PLL_ALV_DIV4 0x3e10 +#define GS101_CTRL_PLL_ALV_DIV4 0x3e14 +#define GS101_CTRL_OSCCLK_APMGSA 0x3e18 +#define GS101_CTRL_BLK_AOC_CLKS 0x3e1c +#define GS101_CTRL_PLL_ALV_LOCK 0x3e20 +#define GS101_CTRL_CLKDIV__CLKRTC 0x3e24 +#define GS101_CTRL_SOC32K 0x3e30 +#define GS101_CTRL_STM_PMU 0x3e34 +#define GS101_CTRL_PMU_DEBUG 0x3e38 +#define GS101_CTRL_DEBUG_UART 0x3e3c +#define GS101_CTRL_TCK 0x3e40 +#define GS101_CTRL_SBU_SW_EN 0x3e44 +#define GS101_PAD_CTRL_CLKOUT0 0x3e80 +#define GS101_PAD_CTRL_CLKOUT1 0x3e84 +#define GS101_PAD_CTRL_APM_24MOUT_0 0x3e88 +#define GS101_PAD_CTRL_APM_24MOUT_1 0x3e8c +#define GS101_PAD_CTRL_IO_FORCE_RETENTION 0x3e90 +#define GS101_PAD_CTRL_APACTIVE_n 0x3e94 +#define GS101_PAD_CTRL_TCXO_ON 0x3e98 +#define GS101_PAD_CTRL_PWR_HOLD 0x3e9c +#define GS101_PAD_CTRL_RESETO_n 0x3ea0 +#define GS101_PAD_CTRL_WRESETO_n 0x3ea4 +#define GS101_PHY_CTRL_USB20 0x3eb0 +#define GS101_PHY_CTRL_USBDP 0x3eb4 +#define GS101_PHY_CTRL_MIPI_DCPHY_M4M4 0x3eb8 +#define GS101_PHY_CTRL_MIPI_DCPHY_S4S4S4S4 0x3ebc +#define GS101_PHY_CTRL_PCIE_GEN4_0 0x3ec0 +#define GS101_PHY_CTRL_PCIE_GEN4_1 0x3ec4 +#define GS101_PHY_CTRL_UFS 0x3ec8 /* PMU INTR GEN */ #define GS101_GRP1_INTR_BID_UPEND (0x0108) diff --git a/include/linux/string.h b/include/linux/string.h index 0266dbdaa4cd..1b564c36d721 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -371,6 +371,10 @@ static inline void memzero_explicit(void *s, size_t count) * kbasename - return the last part of a pathname. * * @path: path to extract the filename from. + * + * Returns: + * Pointer to the filename portion inside @path. If no '/' exists, + * returns @path unchanged. */ static inline const char *kbasename(const char *path) { @@ -556,6 +560,9 @@ static __always_inline size_t str_has_prefix(const char *str, const char *prefix * strstarts - does @str start with @prefix? * @str: string to examine * @prefix: prefix to look for. + * + * Returns: + * True if @str begins with @prefix. False in all other cases. */ static inline bool strstarts(const char *str, const char *prefix) { diff --git a/include/linux/swap.h b/include/linux/swap.h index e818fbade1e2..38ca3df68716 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -301,16 +301,7 @@ struct swap_info_struct { struct work_struct discard_work; /* discard worker */ struct work_struct reclaim_work; /* reclaim worker */ struct list_head discard_clusters; /* discard clusters list */ - struct plist_node avail_lists[]; /* - * entries in swap_avail_heads, one - * entry per node. - * Must be last as the number of the - * array is nr_node_ids, which is not - * a fixed value so have to allocate - * dynamically. - * And it has to be an array so that - * plist_for_each_* can work. - */ + struct plist_node avail_list; /* entry in swap_avail_head */ }; static inline swp_entry_t page_swap_entry(struct page *page) @@ -462,7 +453,7 @@ static inline long get_nr_swap_pages(void) } extern void si_swapinfo(struct sysinfo *); -int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask); +int folio_alloc_swap(struct folio *folio); bool folio_free_swap(struct folio *folio); void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); @@ -560,7 +551,7 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -static inline int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask) +static inline int folio_alloc_swap(struct folio *folio) { return -EINVAL; } diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 64ea151a7ae3..8cfc966eae48 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -28,7 +28,7 @@ #define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1) /* - * Definitions only for PFN swap entries (see is_pfn_swap_entry()). To + * Definitions only for PFN swap entries (see leafeant_has_pfn()). To * store PFN, we only need SWP_PFN_BITS bits. Each of the pfn swap entries * can use the extra bits to store other information besides PFN. */ @@ -66,8 +66,6 @@ #define SWP_MIG_YOUNG BIT(SWP_MIG_YOUNG_BIT) #define SWP_MIG_DIRTY BIT(SWP_MIG_DIRTY_BIT) -static inline bool is_pfn_swap_entry(swp_entry_t entry); - /* Clear all flags but only keep swp_entry_t related information */ static inline pte_t pte_swp_clear_flags(pte_t pte) { @@ -110,36 +108,6 @@ static inline pgoff_t swp_offset(swp_entry_t entry) } /* - * This should only be called upon a pfn swap entry to get the PFN stored - * in the swap entry. Please refers to is_pfn_swap_entry() for definition - * of pfn swap entry. - */ -static inline unsigned long swp_offset_pfn(swp_entry_t entry) -{ - VM_BUG_ON(!is_pfn_swap_entry(entry)); - return swp_offset(entry) & SWP_PFN_MASK; -} - -/* check whether a pte points to a swap entry */ -static inline int is_swap_pte(pte_t pte) -{ - return !pte_none(pte) && !pte_present(pte); -} - -/* - * Convert the arch-dependent pte representation of a swp_entry_t into an - * arch-independent swp_entry_t. - */ -static inline swp_entry_t pte_to_swp_entry(pte_t pte) -{ - swp_entry_t arch_entry; - - pte = pte_swp_clear_flags(pte); - arch_entry = __pte_to_swp_entry(pte); - return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); -} - -/* * Convert the arch-independent representation of a swp_entry_t into the * arch-dependent pte representation. */ @@ -175,27 +143,11 @@ static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) return swp_entry(SWP_DEVICE_WRITE, offset); } -static inline bool is_device_private_entry(swp_entry_t entry) -{ - int type = swp_type(entry); - return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE; -} - -static inline bool is_writable_device_private_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); -} - static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(SWP_DEVICE_EXCLUSIVE, offset); } -static inline bool is_device_exclusive_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_DEVICE_EXCLUSIVE; -} - #else /* CONFIG_DEVICE_PRIVATE */ static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { @@ -207,50 +159,14 @@ static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) return swp_entry(0, 0); } -static inline bool is_device_private_entry(swp_entry_t entry) -{ - return false; -} - -static inline bool is_writable_device_private_entry(swp_entry_t entry) -{ - return false; -} - static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(0, 0); } -static inline bool is_device_exclusive_entry(swp_entry_t entry) -{ - return false; -} - #endif /* CONFIG_DEVICE_PRIVATE */ #ifdef CONFIG_MIGRATION -static inline int is_migration_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_MIGRATION_READ || - swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE || - swp_type(entry) == SWP_MIGRATION_WRITE); -} - -static inline int is_writable_migration_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); -} - -static inline int is_readable_migration_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_MIGRATION_READ); -} - -static inline int is_readable_exclusive_migration_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE); -} static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { @@ -289,14 +205,6 @@ static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) return entry; } -static inline bool is_migration_entry_young(swp_entry_t entry) -{ - if (migration_entry_supports_ad()) - return swp_offset(entry) & SWP_MIG_YOUNG; - /* Keep the old behavior of aging page after migration */ - return false; -} - static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { if (migration_entry_supports_ad()) @@ -305,14 +213,6 @@ static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) return entry; } -static inline bool is_migration_entry_dirty(swp_entry_t entry) -{ - if (migration_entry_supports_ad()) - return swp_offset(entry) & SWP_MIG_DIRTY; - /* Keep the old behavior of clean page after migration */ - return false; -} - extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); extern void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte); @@ -332,43 +232,21 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) return swp_entry(0, 0); } -static inline int is_migration_entry(swp_entry_t swp) -{ - return 0; -} - static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { } -static inline int is_writable_migration_entry(swp_entry_t entry) -{ - return 0; -} -static inline int is_readable_migration_entry(swp_entry_t entry) -{ - return 0; -} static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) { return entry; } -static inline bool is_migration_entry_young(swp_entry_t entry) -{ - return false; -} - static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { return entry; } -static inline bool is_migration_entry_dirty(swp_entry_t entry) -{ - return false; -} #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_MEMORY_FAILURE @@ -426,21 +304,6 @@ static inline swp_entry_t make_pte_marker_entry(pte_marker marker) return swp_entry(SWP_PTE_MARKER, marker); } -static inline bool is_pte_marker_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_PTE_MARKER; -} - -static inline pte_marker pte_marker_get(swp_entry_t entry) -{ - return swp_offset(entry) & PTE_MARKER_MASK; -} - -static inline bool is_pte_marker(pte_t pte) -{ - return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte)); -} - static inline pte_t make_pte_marker(pte_marker marker) { return swp_entry_to_pte(make_pte_marker_entry(marker)); @@ -451,83 +314,11 @@ static inline swp_entry_t make_poisoned_swp_entry(void) return make_pte_marker_entry(PTE_MARKER_POISONED); } -static inline int is_poisoned_swp_entry(swp_entry_t entry) -{ - return is_pte_marker_entry(entry) && - (pte_marker_get(entry) & PTE_MARKER_POISONED); - -} - static inline swp_entry_t make_guard_swp_entry(void) { return make_pte_marker_entry(PTE_MARKER_GUARD); } -static inline int is_guard_swp_entry(swp_entry_t entry) -{ - return is_pte_marker_entry(entry) && - (pte_marker_get(entry) & PTE_MARKER_GUARD); -} - -/* - * This is a special version to check pte_none() just to cover the case when - * the pte is a pte marker. It existed because in many cases the pte marker - * should be seen as a none pte; it's just that we have stored some information - * onto the none pte so it becomes not-none any more. - * - * It should be used when the pte is file-backed, ram-based and backing - * userspace pages, like shmem. It is not needed upon pgtables that do not - * support pte markers at all. For example, it's not needed on anonymous - * memory, kernel-only memory (including when the system is during-boot), - * non-ram based generic file-system. It's fine to be used even there, but the - * extra pte marker check will be pure overhead. - */ -static inline int pte_none_mostly(pte_t pte) -{ - return pte_none(pte) || is_pte_marker(pte); -} - -static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) -{ - struct page *p = pfn_to_page(swp_offset_pfn(entry)); - - /* - * Any use of migration entries may only occur while the - * corresponding page is locked - */ - BUG_ON(is_migration_entry(entry) && !PageLocked(p)); - - return p; -} - -static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry) -{ - struct folio *folio = pfn_folio(swp_offset_pfn(entry)); - - /* - * Any use of migration entries may only occur while the - * corresponding folio is locked - */ - BUG_ON(is_migration_entry(entry) && !folio_test_locked(folio)); - - return folio; -} - -/* - * A pfn swap entry is a special type of swap entry that always has a pfn stored - * in the swap offset. They can either be used to represent unaddressable device - * memory, to restrict access to a page undergoing migration or to represent a - * pfn which has been hwpoisoned and unmapped. - */ -static inline bool is_pfn_swap_entry(swp_entry_t entry) -{ - /* Make sure the swp offset can always store the needed fields */ - BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); - - return is_migration_entry(entry) || is_device_private_entry(entry) || - is_device_exclusive_entry(entry) || is_hwpoison_entry(entry); -} - struct page_vma_mapped_walk; #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION @@ -539,18 +330,6 @@ extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd); -static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) -{ - swp_entry_t arch_entry; - - if (pmd_swp_soft_dirty(pmd)) - pmd = pmd_swp_clear_soft_dirty(pmd); - if (pmd_swp_uffd_wp(pmd)) - pmd = pmd_swp_clear_uffd_wp(pmd); - arch_entry = __pmd_to_swp_entry(pmd); - return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); -} - static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { swp_entry_t arch_entry; @@ -559,10 +338,6 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) return __swp_entry_to_pmd(arch_entry); } -static inline int is_pmd_migration_entry(pmd_t pmd) -{ - return is_swap_pmd(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); -} #else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, struct page *page) @@ -578,26 +353,12 @@ static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { } -static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) -{ - return swp_entry(0, 0); -} - static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { return __pmd(0); } -static inline int is_pmd_migration_entry(pmd_t pmd) -{ - return 0; -} #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -static inline int non_swap_entry(swp_entry_t entry) -{ - return swp_type(entry) >= MAX_SWAPFILES; -} - #endif /* CONFIG_MMU */ #endif /* _LINUX_SWAPOPS_H */ diff --git a/include/linux/syscore_ops.h b/include/linux/syscore_ops.h index ae4d48e4c970..ac6d71be5c38 100644 --- a/include/linux/syscore_ops.h +++ b/include/linux/syscore_ops.h @@ -11,14 +11,19 @@ #include <linux/list.h> struct syscore_ops { + int (*suspend)(void *data); + void (*resume)(void *data); + void (*shutdown)(void *data); +}; + +struct syscore { struct list_head node; - int (*suspend)(void); - void (*resume)(void); - void (*shutdown)(void); + const struct syscore_ops *ops; + void *data; }; -extern void register_syscore_ops(struct syscore_ops *ops); -extern void unregister_syscore_ops(struct syscore_ops *ops); +extern void register_syscore(struct syscore *syscore); +extern void unregister_syscore(struct syscore *syscore); #ifdef CONFIG_PM_SLEEP extern int syscore_suspend(void); extern void syscore_resume(void); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 92e9146b1104..288fe0055cd5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -59,6 +59,121 @@ extern const int sysctl_vals[]; #define SYSCTL_LONG_ONE ((void *)&sysctl_long_vals[1]) #define SYSCTL_LONG_MAX ((void *)&sysctl_long_vals[2]) +#define SYSCTL_CONV_IDENTITY(val) (val) +/** + * + * "dir" originates from read_iter (dir = 0) or write_iter (dir = 1) + * in the file_operations struct at proc/proc_sysctl.c. Its value means + * one of two things for sysctl: + * 1. SYSCTL_USER_TO_KERN(dir) Writing to an internal kernel variable from user + * space (dir > 0) + * 2. SYSCTL_KERN_TO_USER(dir) Writing to a user space buffer from a kernel + * variable (dir == 0). + */ +#define SYSCTL_USER_TO_KERN(dir) (!!(dir)) +#define SYSCTL_KERN_TO_USER(dir) (!dir) + +#define SYSCTL_USER_TO_KERN_INT_CONV(name, u_ptr_op) \ +int sysctl_user_to_kern_int_conv##name(const bool *negp, \ + const unsigned long *u_ptr,\ + int *k_ptr) \ +{ \ + unsigned long u = u_ptr_op(*u_ptr); \ + if (*negp) { \ + if (u > (unsigned long) INT_MAX + 1) \ + return -EINVAL; \ + WRITE_ONCE(*k_ptr, -u); \ + } else { \ + if (u > (unsigned long) INT_MAX) \ + return -EINVAL; \ + WRITE_ONCE(*k_ptr, u); \ + } \ + return 0; \ +} + +#define SYSCTL_KERN_TO_USER_INT_CONV(name, k_ptr_op) \ +int sysctl_kern_to_user_int_conv##name(bool *negp, \ + unsigned long *u_ptr, \ + const int *k_ptr) \ +{ \ + int val = READ_ONCE(*k_ptr); \ + if (val < 0) { \ + *negp = true; \ + *u_ptr = -k_ptr_op((unsigned long)val); \ + } else { \ + *negp = false; \ + *u_ptr = k_ptr_op((unsigned long)val); \ + } \ + return 0; \ +} + +/** + * To range check on a converted value, use a temp k_ptr + * When checking range, value should be within (tbl->extra1, tbl->extra2) + */ +#define SYSCTL_INT_CONV_CUSTOM(name, user_to_kern, kern_to_user, \ + k_ptr_range_check) \ +int do_proc_int_conv##name(bool *negp, unsigned long *u_ptr, int *k_ptr,\ + int dir, const struct ctl_table *tbl) \ +{ \ + if (SYSCTL_KERN_TO_USER(dir)) \ + return kern_to_user(negp, u_ptr, k_ptr); \ + \ + if (k_ptr_range_check) { \ + int tmp_k, ret; \ + if (!tbl) \ + return -EINVAL; \ + ret = user_to_kern(negp, u_ptr, &tmp_k); \ + if (ret) \ + return ret; \ + if ((tbl->extra1 && *(int *)tbl->extra1 > tmp_k) || \ + (tbl->extra2 && *(int *)tbl->extra2 < tmp_k)) \ + return -EINVAL; \ + WRITE_ONCE(*k_ptr, tmp_k); \ + } else \ + return user_to_kern(negp, u_ptr, k_ptr); \ + return 0; \ +} + +#define SYSCTL_USER_TO_KERN_UINT_CONV(name, u_ptr_op) \ +int sysctl_user_to_kern_uint_conv##name(const unsigned long *u_ptr,\ + unsigned int *k_ptr) \ +{ \ + unsigned long u = u_ptr_op(*u_ptr); \ + if (u > UINT_MAX) \ + return -EINVAL; \ + WRITE_ONCE(*k_ptr, u); \ + return 0; \ +} + +#define SYSCTL_UINT_CONV_CUSTOM(name, user_to_kern, kern_to_user, \ + k_ptr_range_check) \ +int do_proc_uint_conv##name(unsigned long *u_ptr, unsigned int *k_ptr, \ + int dir, const struct ctl_table *tbl) \ +{ \ + if (SYSCTL_KERN_TO_USER(dir)) \ + return kern_to_user(u_ptr, k_ptr); \ + \ + if (k_ptr_range_check) { \ + unsigned int tmp_k; \ + int ret; \ + if (!tbl) \ + return -EINVAL; \ + ret = user_to_kern(u_ptr, &tmp_k); \ + if (ret) \ + return ret; \ + if ((tbl->extra1 && \ + *(unsigned int *)tbl->extra1 > tmp_k) || \ + (tbl->extra2 && \ + *(unsigned int *)tbl->extra2 < tmp_k)) \ + return -ERANGE; \ + WRITE_ONCE(*k_ptr, tmp_k); \ + } else \ + return user_to_kern(u_ptr, k_ptr); \ + return 0; \ +} + + extern const unsigned long sysctl_long_vals[]; typedef int proc_handler(const struct ctl_table *ctl, int write, void *buffer, @@ -68,25 +183,30 @@ int proc_dostring(const struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dobool(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_dointvec(const struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_minmax(const struct ctl_table *table, int dir, void *buffer, + size_t *lenp, loff_t *ppos); +int proc_dointvec_conv(const struct ctl_table *table, int dir, void *buffer, + size_t *lenp, loff_t *ppos, + int (*conv)(bool *negp, unsigned long *u_ptr, int *k_ptr, + int dir, const struct ctl_table *table)); int proc_douintvec(const struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_dointvec_minmax(const struct ctl_table *, int, void *, size_t *, loff_t *); int proc_douintvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); +int proc_douintvec_conv(const struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos, + int (*conv)(unsigned long *lvalp, unsigned int *valp, + int write, const struct ctl_table *table)); + int proc_dou8vec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int proc_dointvec_jiffies(const struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int proc_dointvec_userhz_jiffies(const struct ctl_table *, int, void *, size_t *, - loff_t *); -int proc_dointvec_ms_jiffies(const struct ctl_table *, int, void *, size_t *, - loff_t *); int proc_doulongvec_minmax(const struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int, void *, - size_t *, loff_t *); +int proc_doulongvec_minmax_conv(const struct ctl_table *table, int dir, + void *buffer, size_t *lenp, loff_t *ppos, + unsigned long convmul, unsigned long convdiv); int proc_do_large_bitmap(const struct ctl_table *, int, void *, size_t *, loff_t *); int proc_do_static_key(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); +int sysctl_kern_to_user_uint_conv(unsigned long *u_ptr, const unsigned int *k_ptr); /* * Register a set of sysctl names by calling register_sysctl @@ -156,6 +276,10 @@ struct ctl_node { * @nreg: When nreg drops to 0 the ctl_table_header will be unregistered. * @rcu: Delays the freeing of the inode. Introduced with "unfuck proc_sysctl ->d_compare()" * + * @type: Enumeration to differentiate between ctl target types + * @type.SYSCTL_TABLE_TYPE_DEFAULT: ctl target with no special considerations + * @type.SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY: Identifies a permanently empty dir + * target to serve as a mount point */ struct ctl_table_header { union { @@ -175,13 +299,6 @@ struct ctl_table_header { struct ctl_dir *parent; struct ctl_node *node; struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */ - /** - * enum type - Enumeration to differentiate between ctl target types - * @SYSCTL_TABLE_TYPE_DEFAULT: ctl target with no special considerations - * @SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY: Used to identify a permanently - * empty directory target to serve - * as mount point. - */ enum { SYSCTL_TABLE_TYPE_DEFAULT, SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY, @@ -235,12 +352,6 @@ extern struct ctl_table_header *register_sysctl_mount_point(const char *path); void do_sysctl_args(void); bool sysctl_is_alias(char *param); -int do_proc_douintvec(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos, - int (*conv)(unsigned long *lvalp, - unsigned int *valp, - int write, void *data), - void *data); extern int unaligned_enabled; extern int no_unaligned_warning; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9a25a2911652..c33a96b7391a 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -58,6 +58,12 @@ do { \ #define sysfs_attr_init(attr) do {} while (0) #endif +#ifdef CONFIG_CFI +#define __SYSFS_FUNCTION_ALTERNATIVE(MEMBERS...) struct { MEMBERS } +#else +#define __SYSFS_FUNCTION_ALTERNATIVE(MEMBERS...) union { MEMBERS } +#endif + /** * struct attribute_group - data structure used to declare an attribute group. * @name: Optional: Attribute group name @@ -98,14 +104,21 @@ do { \ */ struct attribute_group { const char *name; - umode_t (*is_visible)(struct kobject *, - struct attribute *, int); + __SYSFS_FUNCTION_ALTERNATIVE( + umode_t (*is_visible)(struct kobject *, + struct attribute *, int); + umode_t (*is_visible_const)(struct kobject *, + const struct attribute *, int); + ); umode_t (*is_bin_visible)(struct kobject *, const struct bin_attribute *, int); size_t (*bin_size)(struct kobject *, const struct bin_attribute *, int); - struct attribute **attrs; + union { + struct attribute **attrs; + const struct attribute *const *attrs_const; + }; const struct bin_attribute *const *bin_attrs; }; @@ -238,28 +251,20 @@ struct attribute_group { .store = _store, \ } -#define __ATTR_RO(_name) { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = _name##_show, \ -} - #define __ATTR_RO_MODE(_name, _mode) { \ .attr = { .name = __stringify(_name), \ .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ .show = _name##_show, \ } -#define __ATTR_RW_MODE(_name, _mode) { \ - .attr = { .name = __stringify(_name), \ - .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ - .show = _name##_show, \ - .store = _name##_store, \ -} +#define __ATTR_RO(_name) \ + __ATTR_RO_MODE(_name, 0444) -#define __ATTR_WO(_name) { \ - .attr = { .name = __stringify(_name), .mode = 0200 }, \ - .store = _name##_store, \ -} +#define __ATTR_RW_MODE(_name, _mode) \ + __ATTR(_name, _mode, _name##_show, _name##_store) + +#define __ATTR_WO(_name) \ + __ATTR(_name, 0200, NULL, _name##_store) #define __ATTR_RW(_name) __ATTR(_name, 0644, _name##_show, _name##_store) @@ -284,7 +289,12 @@ static const struct attribute_group *_name##_groups[] = { \ #define ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group _name##_group = { \ - .attrs = _name##_attrs, \ + .attrs = _Generic(_name##_attrs, \ + struct attribute **: \ + _name##_attrs, \ + const struct attribute *const *: \ + (void *)_name##_attrs \ + ), \ }; \ __ATTRIBUTE_GROUPS(_name) diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 3d8f7d1ce2b8..202da079d500 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -529,41 +529,18 @@ static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip) #endif } -void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, - u32 handle, u8 *name); +int tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, + u32 handle, u8 *name); void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, u8 attributes, u8 *passphrase, int passphraselen); void tpm_buf_append_auth(struct tpm_chip *chip, struct tpm_buf *buf, - u8 attributes, u8 *passphrase, int passphraselen); -static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, - struct tpm_buf *buf, - u8 attributes, - u8 *passphrase, - int passphraselen) -{ - struct tpm_header *head; - int offset; - - if (tpm2_chip_auth(chip)) { - tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen); - } else { - offset = buf->handles * 4 + TPM_HEADER_SIZE; - head = (struct tpm_header *)buf->data; - - /* - * If the only sessions are optional, the command tag must change to - * TPM2_ST_NO_SESSIONS. - */ - if (tpm_buf_length(buf) == offset) - head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); - } -} + u8 *passphrase, int passphraselen); #ifdef CONFIG_TCG_TPM2_HMAC int tpm2_start_auth_session(struct tpm_chip *chip); -void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); +int tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, int rc); void tpm2_end_auth_session(struct tpm_chip *chip); @@ -577,10 +554,13 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip) static inline void tpm2_end_auth_session(struct tpm_chip *chip) { } -static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip, - struct tpm_buf *buf) + +static inline int tpm_buf_fill_hmac_session(struct tpm_chip *chip, + struct tpm_buf *buf) { + return 0; } + static inline int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, int rc) diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 557780fe1c77..4a0b8c172d27 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -80,6 +80,19 @@ static inline bool trace_seq_has_overflowed(struct trace_seq *s) return s->full || seq_buf_has_overflowed(&s->seq); } +/** + * trace_seq_pop - pop off the last written character + * @s: trace sequence descriptor + * + * Removes the last written character to the trace_seq @s. + * + * Returns the last character or -1 if it is empty. + */ +static inline int trace_seq_pop(struct trace_seq *s) +{ + return seq_buf_pop(&s->seq); +} + /* * Currently only defined when tracing is enabled. */ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 826ce3f8e1f8..8a56f3278b1b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -222,6 +222,15 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } /* + * When a tracepoint is used, it's name is added to the __tracepoint_check + * section. This section is only used at build time to make sure all + * defined tracepoints are used. It is discarded after the build. + */ +# define TRACEPOINT_CHECK(name) \ + static const char __used __section("__tracepoint_check") \ + __trace_check_##name[] = #name; + +/* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. @@ -270,6 +279,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void __do_trace_##name(proto) \ { \ + TRACEPOINT_CHECK(name) \ if (cond) { \ guard(preempt_notrace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ @@ -289,6 +299,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void __do_trace_##name(proto) \ { \ + TRACEPOINT_CHECK(name) \ guard(rcu_tasks_trace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ @@ -371,10 +382,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) __DEFINE_TRACE_EXT(_name, NULL, PARAMS(_proto), PARAMS(_args)); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ + TRACEPOINT_CHECK(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name); \ EXPORT_SYMBOL_GPL(__traceiter_##name); \ EXPORT_STATIC_CALL_GPL(tp_func_##name) #define EXPORT_TRACEPOINT_SYMBOL(name) \ + TRACEPOINT_CHECK(name) \ EXPORT_SYMBOL(__tracepoint_##name); \ EXPORT_SYMBOL(__traceiter_##name); \ EXPORT_STATIC_CALL(tp_func_##name) diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index c0e716aec26a..fd5f42765497 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -16,7 +16,7 @@ #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/swap.h> -#include <linux/swapops.h> +#include <linux/leafops.h> #include <asm-generic/pgtable_uffd.h> #include <linux/hugetlb_inline.h> @@ -228,15 +228,14 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, if (wp_async && (vm_flags == VM_UFFD_WP)) return true; -#ifndef CONFIG_PTE_MARKER_UFFD_WP /* * If user requested uffd-wp but not enabled pte markers for * uffd-wp, then shmem & hugetlbfs are not supported but only * anonymous. */ - if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) + if (!uffd_supports_wp_marker() && (vm_flags & VM_UFFD_WP) && + !vma_is_anonymous(vma)) return false; -#endif /* By default, allow any of anon|shmem|hugetlb */ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || @@ -291,6 +290,43 @@ void userfaultfd_release_new(struct userfaultfd_ctx *ctx); void userfaultfd_release_all(struct mm_struct *mm, struct userfaultfd_ctx *ctx); +static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) +{ + /* Only wr-protect mode uses pte markers */ + if (!userfaultfd_wp(vma)) + return false; + + /* File-based uffd-wp always need markers */ + if (!vma_is_anonymous(vma)) + return true; + + /* + * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED + * enabled (to apply markers on zero pages). + */ + return userfaultfd_wp_unpopulated(vma); +} + +/* + * Returns true if this is a swap pte and was uffd-wp wr-protected in either + * forms (pte marker or a normal swap pte), false otherwise. + */ +static inline bool pte_swp_uffd_wp_any(pte_t pte) +{ + if (!uffd_supports_wp_marker()) + return false; + + if (pte_present(pte)) + return false; + + if (pte_swp_uffd_wp(pte)) + return true; + + if (pte_is_uffd_wp_marker(pte)) + return true; + + return false; +} #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -415,49 +451,9 @@ static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) return false; } -#endif /* CONFIG_USERFAULTFD */ - static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) { - /* Only wr-protect mode uses pte markers */ - if (!userfaultfd_wp(vma)) - return false; - - /* File-based uffd-wp always need markers */ - if (!vma_is_anonymous(vma)) - return true; - - /* - * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED - * enabled (to apply markers on zero pages). - */ - return userfaultfd_wp_unpopulated(vma); -} - -static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) -{ -#ifdef CONFIG_PTE_MARKER_UFFD_WP - return is_pte_marker_entry(entry) && - (pte_marker_get(entry) & PTE_MARKER_UFFD_WP); -#else - return false; -#endif -} - -static inline bool pte_marker_uffd_wp(pte_t pte) -{ -#ifdef CONFIG_PTE_MARKER_UFFD_WP - swp_entry_t entry; - - if (!is_swap_pte(pte)) - return false; - - entry = pte_to_swp_entry(pte); - - return pte_marker_entry_uffd_wp(entry); -#else return false; -#endif } /* @@ -466,17 +462,7 @@ static inline bool pte_marker_uffd_wp(pte_t pte) */ static inline bool pte_swp_uffd_wp_any(pte_t pte) { -#ifdef CONFIG_PTE_MARKER_UFFD_WP - if (!is_swap_pte(pte)) - return false; - - if (pte_swp_uffd_wp(pte)) - return true; - - if (pte_marker_uffd_wp(pte)) - return true; -#endif return false; } - +#endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index eb54b7b3202f..e8e94f90d686 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -50,7 +50,11 @@ struct iov_iter; /* in uio.h */ #endif struct vm_struct { - struct vm_struct *next; + union { + struct vm_struct *next; /* Early registration of vm_areas. */ + struct llist_node llnode; /* Asynchronous freeing on error paths. */ + }; + void *addr; unsigned long size; unsigned long flags; @@ -328,4 +332,6 @@ bool vmalloc_dump_obj(void *object); static inline bool vmalloc_dump_obj(void *object) { return false; } #endif +unsigned int memalloc_apply_gfp_scope(gfp_t gfp_mask); +void memalloc_restore_scope(unsigned int flags); #endif /* _LINUX_VMALLOC_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index c287998908bf..3398a345bda8 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -520,32 +520,12 @@ static inline const char *vm_event_name(enum vm_event_item item) #ifdef CONFIG_MEMCG -void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, +void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); -static inline void mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_lruvec_state(lruvec, idx, val); - local_irq_restore(flags); -} - -void __lruvec_stat_mod_folio(struct folio *folio, +void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val); -static inline void lruvec_stat_mod_folio(struct folio *folio, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __lruvec_stat_mod_folio(folio, idx, val); - local_irq_restore(flags); -} - static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { @@ -554,24 +534,12 @@ static inline void mod_lruvec_page_state(struct page *page, #else -static inline void __mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); -} - static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { mod_node_page_state(lruvec_pgdat(lruvec), idx, val); } -static inline void __lruvec_stat_mod_folio(struct folio *folio, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(folio_pgdat(folio), idx, val); -} - static inline void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { @@ -586,18 +554,6 @@ static inline void mod_lruvec_page_state(struct page *page, #endif /* CONFIG_MEMCG */ -static inline void __lruvec_stat_add_folio(struct folio *folio, - enum node_stat_item idx) -{ - __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); -} - -static inline void __lruvec_stat_sub_folio(struct folio *folio, - enum node_stat_item idx) -{ - __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); -} - static inline void lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) { diff --git a/include/net/sock.h b/include/net/sock.h index 02253c6a578b..aafe8bdb2c0f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2662,8 +2662,12 @@ static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) #endif /* CONFIG_MEMCG_V1 */ do { - if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) + if (time_before64(get_jiffies_64(), + mem_cgroup_get_socket_pressure(memcg))) { + memcg_memory_event(mem_cgroup_from_sk(sk), + MEMCG_SOCK_THROTTLED); return true; + } } while ((memcg = parent_mem_cgroup(memcg))); return false; diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index c9f0b1018bcc..eaecc3c5f772 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -12,7 +12,6 @@ #include <linux/pci.h> #include <linux/aer.h> #include <linux/cper.h> -#include <linux/mm.h> /* * MCE Extended Error Log trace event @@ -378,91 +377,6 @@ TRACE_EVENT(aer_event, "Not available") ); #endif /* CONFIG_PCIEAER */ - -/* - * memory-failure recovery action result event - * - * unsigned long pfn - Page Frame Number of the corrupted page - * int type - Page types of the corrupted page - * int result - Result of recovery action - */ - -#ifdef CONFIG_MEMORY_FAILURE -#define MF_ACTION_RESULT \ - EM ( MF_IGNORED, "Ignored" ) \ - EM ( MF_FAILED, "Failed" ) \ - EM ( MF_DELAYED, "Delayed" ) \ - EMe ( MF_RECOVERED, "Recovered" ) - -#define MF_PAGE_TYPE \ - EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ - EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ - EM ( MF_MSG_HUGE, "huge page" ) \ - EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ - EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" ) \ - EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \ - EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \ - EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \ - EM ( MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page" ) \ - EM ( MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page" ) \ - EM ( MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page" ) \ - EM ( MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page" ) \ - EM ( MF_MSG_DIRTY_LRU, "dirty LRU page" ) \ - EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \ - EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \ - EM ( MF_MSG_BUDDY, "free buddy page" ) \ - EM ( MF_MSG_DAX, "dax page" ) \ - EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ - EM ( MF_MSG_ALREADY_POISONED, "already poisoned" ) \ - EMe ( MF_MSG_UNKNOWN, "unknown page" ) - -/* - * First define the enums in MM_ACTION_RESULT to be exported to userspace - * via TRACE_DEFINE_ENUM(). - */ -#undef EM -#undef EMe -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -MF_ACTION_RESULT -MF_PAGE_TYPE - -/* - * Now redefine the EM() and EMe() macros to map the enums to the strings - * that will be printed in the output. - */ -#undef EM -#undef EMe -#define EM(a, b) { a, b }, -#define EMe(a, b) { a, b } - -TRACE_EVENT(memory_failure_event, - TP_PROTO(unsigned long pfn, - int type, - int result), - - TP_ARGS(pfn, type, result), - - TP_STRUCT__entry( - __field(unsigned long, pfn) - __field(int, type) - __field(int, result) - ), - - TP_fast_assign( - __entry->pfn = pfn; - __entry->type = type; - __entry->result = result; - ), - - TP_printk("pfn %#lx: recovery action for %s: %s", - __entry->pfn, - __print_symbolic(__entry->type, MF_PAGE_TYPE), - __print_symbolic(__entry->result, MF_ACTION_RESULT) - ) -); -#endif /* CONFIG_MEMORY_FAILURE */ #endif /* _TRACE_HW_EVENT_MC_H */ /* This part must be outside protection */ diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index 17fa4f6e5ea6..0cef64366538 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -16,34 +16,19 @@ #include <linux/bug.h> #include <linux/sched.h> -#ifdef CONFIG_RV_REACTORS - -#define DECLARE_RV_REACTING_HELPERS(name, type) \ -static void cond_react_##name(type curr_state, type event) \ -{ \ - if (!rv_reacting_on() || !rv_##name.react) \ - return; \ - rv_##name.react("rv: monitor %s does not allow event %s on state %s\n", \ - #name, \ - model_get_event_name_##name(event), \ - model_get_state_name_##name(curr_state)); \ -} - -#else /* CONFIG_RV_REACTOR */ - -#define DECLARE_RV_REACTING_HELPERS(name, type) \ -static void cond_react_##name(type curr_state, type event) \ -{ \ - return; \ -} -#endif - /* * Generic helpers for all types of deterministic automata monitors. */ #define DECLARE_DA_MON_GENERIC_HELPERS(name, type) \ \ -DECLARE_RV_REACTING_HELPERS(name, type) \ +static void react_##name(type curr_state, type event) \ +{ \ + rv_react(&rv_##name, \ + "rv: monitor %s does not allow event %s on state %s\n", \ + #name, \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(curr_state)); \ +} \ \ /* \ * da_monitor_reset_##name - reset a monitor and setting it to init state \ @@ -126,7 +111,7 @@ da_event_##name(struct da_monitor *da_mon, enum events_##name event) \ for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \ next_state = model_get_next_state_##name(curr_state, event); \ if (next_state == INVALID_STATE) { \ - cond_react_##name(curr_state, event); \ + react_##name(curr_state, event); \ trace_error_##name(model_get_state_name_##name(curr_state), \ model_get_event_name_##name(event)); \ return false; \ @@ -165,7 +150,7 @@ static inline bool da_event_##name(struct da_monitor *da_mon, struct task_struct for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \ next_state = model_get_next_state_##name(curr_state, event); \ if (next_state == INVALID_STATE) { \ - cond_react_##name(curr_state, event); \ + react_##name(curr_state, event); \ trace_error_##name(tsk->pid, \ model_get_state_name_##name(curr_state), \ model_get_event_name_##name(event)); \ diff --git a/include/rv/ltl_monitor.h b/include/rv/ltl_monitor.h index 5368cf5fd623..eff60cd61106 100644 --- a/include/rv/ltl_monitor.h +++ b/include/rv/ltl_monitor.h @@ -16,23 +16,9 @@ #error "Please include $(MODEL_NAME).h generated by rvgen" #endif -#ifdef CONFIG_RV_REACTORS #define RV_MONITOR_NAME CONCATENATE(rv_, MONITOR_NAME) static struct rv_monitor RV_MONITOR_NAME; -static void rv_cond_react(struct task_struct *task) -{ - if (!rv_reacting_on() || !RV_MONITOR_NAME.react) - return; - RV_MONITOR_NAME.react("rv: "__stringify(MONITOR_NAME)": %s[%d]: violation detected\n", - task->comm, task->pid); -} -#else -static void rv_cond_react(struct task_struct *task) -{ -} -#endif - static int ltl_monitor_slot = RV_PER_TASK_MONITOR_INIT; static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon); @@ -98,7 +84,8 @@ static void ltl_monitor_destroy(void) static void ltl_illegal_state(struct task_struct *task, struct ltl_monitor *mon) { CONCATENATE(trace_error_, MONITOR_NAME)(task); - rv_cond_react(task); + rv_react(&RV_MONITOR_NAME, "rv: "__stringify(MONITOR_NAME)": %s[%d]: violation detected\n", + task->comm, task->pid); } static void ltl_attempt_start(struct task_struct *task, struct ltl_monitor *mon) diff --git a/include/scsi/scsi_dbg.h b/include/scsi/scsi_dbg.h index bd29cdb513a5..efcdc78530d5 100644 --- a/include/scsi/scsi_dbg.h +++ b/include/scsi/scsi_dbg.h @@ -11,11 +11,11 @@ extern size_t __scsi_format_command(char *, size_t, const unsigned char *, size_t); extern void scsi_print_sense_hdr(const struct scsi_device *, const char *, const struct scsi_sense_hdr *); -extern void scsi_print_sense(const struct scsi_cmnd *); +extern void scsi_print_sense(struct scsi_cmnd *); extern void __scsi_print_sense(const struct scsi_device *, const char *name, const unsigned char *sense_buffer, int sense_len); -extern void scsi_print_result(const struct scsi_cmnd *, const char *, int); +extern void scsi_print_result(struct scsi_cmnd *, const char *, int); #ifdef CONFIG_SCSI_CONSTANTS extern bool scsi_opcode_sa_name(int, int, const char **, const char **); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 993008cdea65..d32f5841f4f8 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -179,6 +179,12 @@ struct scsi_device { unsigned manage_shutdown:1; /* + * If true, let the high-level device driver (sd) manage the device + * power state for system restart (reboot) operations. + */ + unsigned manage_restart:1; + + /* * If set and if the device is runtime suspended, ask the high-level * device driver (sd) to force a runtime resume of the device. */ @@ -313,8 +319,8 @@ sdev_prefix_printk(const char *, const struct scsi_device *, const char *, #define sdev_printk(l, sdev, fmt, a...) \ sdev_prefix_printk(l, sdev, NULL, fmt, ##a) -__printf(3, 4) void -scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...); +__printf(3, 4) void scmd_printk(const char *, struct scsi_cmnd *, const char *, + ...); #define scmd_dbg(scmd, fmt, a...) \ do { \ @@ -558,6 +564,10 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, const struct scsi_exec_args *args); void scsi_failures_reset_retries(struct scsi_failures *failures); +struct scsi_cmnd *scsi_get_internal_cmd(struct scsi_device *sdev, + enum dma_data_direction data_direction, + blk_mq_req_flags_t flags); +void scsi_put_internal_cmd(struct scsi_cmnd *scmd); extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); extern int scsi_vpd_lun_id(struct scsi_device *, char *, size_t); @@ -589,6 +599,22 @@ static inline unsigned int sdev_id(struct scsi_device *sdev) #define scmd_id(scmd) sdev_id((scmd)->device) #define scmd_channel(scmd) sdev_channel((scmd)->device) +/** + * scsi_device_is_pseudo_dev() - Whether a device is a pseudo SCSI device. + * @sdev: SCSI device to examine + * + * A pseudo SCSI device can be used to allocate SCSI commands but does not show + * up in sysfs. Additionally, the logical unit information in *@sdev is made up. + * + * This function tests the LUN number instead of comparing @sdev with + * @sdev->host->pseudo_sdev because this function may be called before + * @sdev->host->pseudo_sdev has been initialized. + */ +static inline bool scsi_device_is_pseudo_dev(struct scsi_device *sdev) +{ + return sdev->lun == U64_MAX; +} + /* * checks for positions of the SCSI state machine */ diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index f5a243261236..e87cf7eadd26 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -87,6 +87,12 @@ struct scsi_host_template { int (* queuecommand)(struct Scsi_Host *, struct scsi_cmnd *); /* + * Queue a reserved command (BLK_MQ_REQ_RESERVED). The .queuecommand() + * documentation also applies to the .queue_reserved_command() callback. + */ + int (*queue_reserved_command)(struct Scsi_Host *, struct scsi_cmnd *); + + /* * The commit_rqs function is used to trigger a hardware * doorbell after some requests have been queued with * queuecommand, when an error is encountered before sending @@ -375,11 +381,20 @@ struct scsi_host_template { /* * This determines if we will use a non-interrupt driven * or an interrupt driven scheme. It is set to the maximum number - * of simultaneous commands a single hw queue in HBA will accept. + * of simultaneous commands a single hw queue in HBA will accept + * excluding internal commands. */ int can_queue; /* + * This determines how many commands the HBA will set aside + * for internal commands. This number will be added to + * @can_queue to calculate the maximum number of simultaneous + * commands sent to the host. + */ + int nr_reserved_cmds; + + /* * In many instances, especially where disconnect / reconnect are * supported, our host also has an ID on the SCSI bus. If this is * the case, then it must be reserved. Please set this_id to -1 if @@ -611,7 +626,17 @@ struct Scsi_Host { unsigned short max_cmd_len; int this_id; + + /* + * Number of commands this host can handle at the same time. + * This excludes reserved commands as specified by nr_reserved_cmds. + */ int can_queue; + /* + * Number of reserved commands to allocate, if any. + */ + unsigned int nr_reserved_cmds; + short cmd_per_lun; short unsigned int sg_tablesize; short unsigned int sg_prot_tablesize; @@ -703,6 +728,12 @@ struct Scsi_Host { struct device shost_gendev, shost_dev; /* + * A SCSI device structure used for sending internal commands to the + * HBA. There is no corresponding logical unit inside the SCSI device. + */ + struct scsi_device *pseudo_sdev; + + /* * Points to the transport data (if any) which is allocated * separately */ diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 4063a701081b..e32de80854b6 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -121,8 +121,10 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, bool target_sense_desc_format(struct se_device *dev); sector_t target_to_linux_sector(struct se_device *dev, sector_t lb); -bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, - struct block_device *bdev); +bool target_configure_unmap_from_bdev(struct se_dev_attrib *attrib, + struct block_device *bdev); +void target_configure_write_atomic_from_bdev(struct se_dev_attrib *attrib, + struct block_device *bdev); static inline bool target_dev_configured(struct se_device *se_dev) { diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index c4d9116904aa..7016d93fa383 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -158,6 +158,7 @@ enum se_cmd_flags_table { SCF_TASK_ATTR_SET = (1 << 17), SCF_TREAT_READ_AS_NORMAL = (1 << 18), SCF_TASK_ORDERED_SYNC = (1 << 19), + SCF_ATOMIC = (1 << 20), }; /* @@ -671,9 +672,9 @@ struct se_lun_acl { }; struct se_dev_entry_io_stats { - u32 total_cmds; - u32 read_bytes; - u32 write_bytes; + u64 total_cmds; + u64 read_bytes; + u64 write_bytes; }; struct se_dev_entry { @@ -731,6 +732,11 @@ struct se_dev_attrib { u32 unmap_granularity; u32 unmap_granularity_alignment; u32 max_write_same_len; + u32 atomic_max_len; + u32 atomic_alignment; + u32 atomic_granularity; + u32 atomic_max_with_boundary; + u32 atomic_max_boundary; u8 submit_type; struct se_device *da_dev; struct config_group da_group; @@ -744,9 +750,9 @@ struct se_port_stat_grps { }; struct scsi_port_stats { - atomic_long_t cmd_pdus; - atomic_long_t tx_data_octets; - atomic_long_t rx_data_octets; + u64 cmd_pdus; + u64 tx_data_octets; + u64 rx_data_octets; }; struct se_lun { @@ -773,7 +779,7 @@ struct se_lun { spinlock_t lun_tg_pt_gp_lock; struct se_portal_group *lun_tpg; - struct scsi_port_stats lun_stats; + struct scsi_port_stats __percpu *lun_stats; struct config_group lun_group; struct se_port_stat_grps port_stat_grps; struct completion lun_shutdown_comp; @@ -806,9 +812,9 @@ struct se_device_queue { }; struct se_dev_io_stats { - u32 total_cmds; - u32 read_bytes; - u32 write_bytes; + u64 total_cmds; + u64 read_bytes; + u64 write_bytes; }; struct se_device { diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index dd94d14a2427..4cde53b45a85 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -10,8 +10,7 @@ #define SCAN_STATUS \ EM( SCAN_FAIL, "failed") \ EM( SCAN_SUCCEED, "succeeded") \ - EM( SCAN_PMD_NULL, "pmd_null") \ - EM( SCAN_PMD_NONE, "pmd_none") \ + EM( SCAN_NO_PTE_TABLE, "no_pte_table") \ EM( SCAN_PMD_MAPPED, "page_pmd_mapped") \ EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \ EM( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte") \ diff --git a/include/trace/events/memory-failure.h b/include/trace/events/memory-failure.h new file mode 100644 index 000000000000..aa57cc8f896b --- /dev/null +++ b/include/trace/events/memory-failure.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM memory_failure +#define TRACE_INCLUDE_FILE memory-failure + +#if !defined(_TRACE_MEMORY_FAILURE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MEMORY_FAILURE_H + +#include <linux/tracepoint.h> +#include <linux/mm.h> + +/* + * memory-failure recovery action result event + * + * unsigned long pfn - Page Frame Number of the corrupted page + * int type - Page types of the corrupted page + * int result - Result of recovery action + */ + +#define MF_ACTION_RESULT \ + EM ( MF_IGNORED, "Ignored" ) \ + EM ( MF_FAILED, "Failed" ) \ + EM ( MF_DELAYED, "Delayed" ) \ + EMe ( MF_RECOVERED, "Recovered" ) + +#define MF_PAGE_TYPE \ + EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ + EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ + EM ( MF_MSG_HUGE, "huge page" ) \ + EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ + EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" ) \ + EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \ + EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \ + EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \ + EM ( MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page" ) \ + EM ( MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page" ) \ + EM ( MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page" ) \ + EM ( MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page" ) \ + EM ( MF_MSG_DIRTY_LRU, "dirty LRU page" ) \ + EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \ + EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \ + EM ( MF_MSG_BUDDY, "free buddy page" ) \ + EM ( MF_MSG_DAX, "dax page" ) \ + EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ + EM ( MF_MSG_ALREADY_POISONED, "already poisoned" ) \ + EM ( MF_MSG_PFN_MAP, "non struct page pfn" ) \ + EMe ( MF_MSG_UNKNOWN, "unknown page" ) + +/* + * First define the enums in MM_ACTION_RESULT to be exported to userspace + * via TRACE_DEFINE_ENUM(). + */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +MF_ACTION_RESULT +MF_PAGE_TYPE + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) { a, b }, +#define EMe(a, b) { a, b } + +TRACE_EVENT(memory_failure_event, + TP_PROTO(unsigned long pfn, + int type, + int result), + + TP_ARGS(pfn, type, result), + + TP_STRUCT__entry( + __field(unsigned long, pfn) + __field(int, type) + __field(int, result) + ), + + TP_fast_assign( + __entry->pfn = pfn; + __entry->type = type; + __entry->result = result; + ), + + TP_printk("pfn %#lx: recovery action for %s: %s", + __entry->pfn, + __print_symbolic(__entry->type, MF_PAGE_TYPE), + __print_symbolic(__entry->result, MF_ACTION_RESULT) + ) +); +#endif /* _TRACE_MEMORY_FAILURE_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index aa441f593e9a..a6e5a44c9b42 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -213,6 +213,7 @@ IF_HAVE_PG_ARCH_3(arch_3) {VM_UFFD_MISSING, "uffd_missing" }, \ IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \ {VM_PFNMAP, "pfnmap" }, \ + {VM_MAYBE_GUARD, "maybe_guard" }, \ {VM_UFFD_WP, "uffd_wp" }, \ {VM_LOCKED, "locked" }, \ {VM_IO, "io" }, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 8e193f3a33b3..0dd7f2b33431 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -16,6 +16,9 @@ * @name: name of the syscall * @syscall_nr: number of the syscall * @nb_args: number of parameters it takes + * @user_arg_is_str: set if the arg for @user_arg_size is a string + * @user_arg_size: holds @arg that has size of the user space to read + * @user_mask: mask of @args that will read user space * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) * @enter_fields: list of fields for syscall_enter trace event @@ -25,7 +28,10 @@ struct syscall_metadata { const char *name; int syscall_nr; - int nb_args; + u8 nb_args:7; + u8 user_arg_is_str:1; + s8 user_arg_size; + short user_mask; const char **types; const char **args; struct list_head enter_fields; diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 5e277fd955aa..aadfbf6e0cb3 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -4,11 +4,7 @@ #include <asm/fcntl.h> #include <linux/openat2.h> -#ifdef __KERNEL__ #include <linux/types.h> -#else -#include <stdint.h> -#endif #define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0) #define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1) @@ -90,9 +86,9 @@ /* Argument structure for F_GETDELEG and F_SETDELEG */ struct delegation { - uint32_t d_flags; /* Must be 0 */ - uint16_t d_type; /* F_RDLCK, F_WRLCK, F_UNLCK */ - uint16_t __pad; /* Must be 0 */ + __u32 d_flags; /* Must be 0 */ + __u16 d_type; /* F_RDLCK, F_WRLCK, F_UNLCK */ + __u16 __pad; /* Must be 0 */ }; /* diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 52f6000ab020..dddb781b0507 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -179,6 +179,7 @@ struct kvm_xen_exit { #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 #define KVM_EXIT_TDX 40 +#define KVM_EXIT_ARM_SEA 41 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -473,6 +474,14 @@ struct kvm_run { } setup_event_notify; }; } tdx; + /* KVM_EXIT_ARM_SEA */ + struct { +#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0) + __u64 flags; + __u64 esr; + __u64 gva; + __u64 gpa; + } arm_sea; /* Fix the size of the union. */ char padding[256]; }; @@ -963,6 +972,8 @@ struct kvm_enable_cap { #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 #define KVM_CAP_GUEST_MEMFD_FLAGS 244 +#define KVM_CAP_ARM_SEA_TO_USER 245 +#define KVM_CAP_S390_USER_OPEREXEC 246 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index bb575f3ab45e..638ca21b7a90 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -103,5 +103,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ +#define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */ #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/uapi/linux/media/amlogic/c3-isp-config.h b/include/uapi/linux/media/amlogic/c3-isp-config.h index 0a3c1cc55ccb..92db5dcdda18 100644 --- a/include/uapi/linux/media/amlogic/c3-isp-config.h +++ b/include/uapi/linux/media/amlogic/c3-isp-config.h @@ -186,7 +186,7 @@ enum c3_isp_params_block_type { #define C3_ISP_PARAMS_BLOCK_FL_ENABLE V4L2_ISP_PARAMS_FL_BLOCK_ENABLE /** - * struct c3_isp_params_block_header - C3 ISP parameter block header + * c3_isp_params_block_header - C3 ISP parameter block header * * This structure represents the common part of all the ISP configuration * blocks and is identical to :c:type:`v4l2_isp_params_block_header`. diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 245a6a829ce9..ab8f6c07b5a2 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -651,6 +651,11 @@ struct ufs_dev_info { u8 rtt_cap; /* bDeviceRTTCap */ bool hid_sup; + + /* Unique device ID string (manufacturer+model+serial+version+date) */ + char *device_id; + u8 rpmb_io_size; + u8 rpmb_region_size[4]; }; #endif /* End of Header */ diff --git a/include/ufs/ufs_quirks.h b/include/ufs/ufs_quirks.h index 83563247c36c..e9c59ec1ceae 100644 --- a/include/ufs/ufs_quirks.h +++ b/include/ufs/ufs_quirks.h @@ -101,13 +101,6 @@ struct ufs_dev_quirk { #define UFS_DEVICE_QUIRK_SUPPORT_EXTENDED_FEATURES (1 << 10) /* - * Some UFS devices require delay after VCC power rail is turned-off. - * Enable this quirk to introduce 5ms delays after VCC power-off during - * suspend flow. - */ -#define UFS_DEVICE_QUIRK_DELAY_AFTER_LPM (1 << 11) - -/* * Some ufs devices may need more time to be in hibern8 before exiting. * Enable this quirk to give it an additional 100us. */ diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 0f95576bf1f6..19154228780b 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -78,7 +78,7 @@ struct uic_command { const u32 argument1; u32 argument2; u32 argument3; - int cmd_active; + bool cmd_active; struct completion done; }; @@ -161,7 +161,6 @@ struct ufs_pm_lvl_states { * @ucd_prdt_dma_addr: PRDT dma address for debug * @ucd_rsp_dma_addr: UPIU response dma address for debug * @ucd_req_dma_addr: UPIU request dma address for debug - * @cmd: pointer to SCSI command * @scsi_status: SCSI status of the command * @command_type: SCSI, UFS, Query. * @task_tag: Task tag of the command @@ -186,11 +185,9 @@ struct ufshcd_lrb { dma_addr_t ucd_rsp_dma_addr; dma_addr_t ucd_prdt_dma_addr; - struct scsi_cmnd *cmd; int scsi_status; int command_type; - int task_tag; u8 lun; /* UPIU LUN id field is only 8-bit wide */ bool intr_cmd; bool req_abort_skip; @@ -239,13 +236,11 @@ struct ufs_query { * struct ufs_dev_cmd - all assosiated fields with device management commands * @type: device management command type - Query, NOP OUT * @lock: lock to allow one command at a time - * @complete: internal commands completion * @query: Device management query information */ struct ufs_dev_cmd { enum dev_cmd_type type; struct mutex lock; - struct completion complete; struct ufs_query query; }; @@ -833,6 +828,7 @@ enum ufshcd_mcq_opr { * @host: Scsi_Host instance of the driver * @dev: device handle * @ufs_device_wlun: WLUN that controls the entire UFS device. + * @ufs_rpmb_wlun: RPMB WLUN SCSI device * @hwmon_device: device instance registered with the hwmon core. * @curr_dev_pwr_mode: active UFS device power mode. * @uic_link_state: active state of the link to the UFS device. @@ -840,7 +836,6 @@ enum ufshcd_mcq_opr { * @spm_lvl: desired UFS power management level during system PM. * @pm_op_in_progress: whether or not a PM operation is in progress. * @ahit: value of Auto-Hibernate Idle Timer register. - * @lrb: local reference block * @outstanding_tasks: Bits representing outstanding task requests * @outstanding_lock: Protects @outstanding_reqs. * @outstanding_reqs: Bits representing outstanding transfer requests @@ -849,7 +844,6 @@ enum ufshcd_mcq_opr { * @nutrs: Transfer Request Queue depth supported by controller * @nortt - Max outstanding RTTs supported by controller * @nutmrs: Task Management Queue depth supported by controller - * @reserved_slot: Used to submit device commands. Protected by @dev_cmd.lock. * @ufs_version: UFS Version to which controller complies * @vops: pointer to variant specific operations * @vps: pointer to variant specific parameters @@ -940,7 +934,6 @@ enum ufshcd_mcq_opr { * @res: array of resource info of MCQ registers * @mcq_base: Multi circular queue registers base address * @uhq: array of supported hardware queues - * @dev_cmd_queue: Queue for issuing device management commands * @mcq_opr: MCQ operation and runtime registers * @ufs_rtc_update_work: A work for UFS RTC periodic update * @pm_qos_req: PM QoS request handle @@ -948,8 +941,8 @@ enum ufshcd_mcq_opr { * @pm_qos_mutex: synchronizes PM QoS request and status updates * @critical_health_count: count of critical health exceptions * @dev_lvl_exception_count: count of device level exceptions since last reset - * @dev_lvl_exception_id: vendor specific information about the - * device level exception event. + * @dev_lvl_exception_id: vendor specific information about the device level exception event. + * @rpmbs: list of OP-TEE RPMB devices (one per RPMB region) */ struct ufs_hba { void __iomem *mmio_base; @@ -967,6 +960,7 @@ struct ufs_hba { struct Scsi_Host *host; struct device *dev; struct scsi_device *ufs_device_wlun; + struct scsi_device *ufs_rpmb_wlun; #ifdef CONFIG_SCSI_UFS_HWMON struct device *hwmon_device; @@ -983,8 +977,6 @@ struct ufs_hba { /* Auto-Hibernate Idle Timer register value */ u32 ahit; - struct ufshcd_lrb *lrb; - unsigned long outstanding_tasks; spinlock_t outstanding_lock; unsigned long outstanding_reqs; @@ -994,7 +986,6 @@ struct ufs_hba { int nortt; u32 mcq_capabilities; int nutmrs; - u32 reserved_slot; u32 ufs_version; const struct ufs_hba_variant_ops *vops; struct ufs_hba_variant_params *vps; @@ -1112,7 +1103,6 @@ struct ufs_hba { bool mcq_esi_enabled; void __iomem *mcq_base; struct ufs_hw_queue *uhq; - struct ufs_hw_queue *dev_cmd_queue; struct ufshcd_mcq_opr_info_t mcq_opr[OPR_MAX]; struct delayed_work ufs_rtc_update_work; @@ -1124,6 +1114,8 @@ struct ufs_hba { int critical_health_count; atomic_t dev_lvl_exception_count; u64 dev_lvl_exception_id; + u32 vcc_off_delay_us; + struct list_head rpmbs; }; /** @@ -1302,7 +1294,6 @@ static inline void ufshcd_rmwl(struct ufs_hba *hba, u32 mask, u32 val, u32 reg) void ufshcd_enable_irq(struct ufs_hba *hba); void ufshcd_disable_irq(struct ufs_hba *hba); -void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs); int ufshcd_alloc_host(struct device *, struct ufs_hba **); int ufshcd_hba_enable(struct ufs_hba *hba); int ufshcd_init(struct ufs_hba *, void __iomem *, unsigned int); @@ -1438,10 +1429,6 @@ static inline int ufshcd_disable_host_tx_lcc(struct ufs_hba *hba) void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit); void ufshcd_fixup_dev_quirks(struct ufs_hba *hba, const struct ufs_dev_quirk *fixups); -#define SD_ASCII_STD true -#define SD_RAW false -int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index, - u8 **buf, bool ascii); void ufshcd_hold(struct ufs_hba *hba); void ufshcd_release(struct ufs_hba *hba); @@ -1494,5 +1481,7 @@ int ufshcd_write_ee_control(struct ufs_hba *hba); int ufshcd_update_ee_control(struct ufs_hba *hba, u16 *mask, const u16 *other_mask, u16 set, u16 clr); void ufshcd_force_error_recovery(struct ufs_hba *hba); +void ufshcd_pm_qos_update(struct ufs_hba *hba, bool on); +u32 ufshcd_us_to_ahit(unsigned int timer); #endif /* End of Header */ diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index e64b70132101..d36df24242a3 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -83,12 +83,14 @@ enum { }; enum { + /* Submission Queue (SQ) Configuration Registers */ REG_SQATTR = 0x0, REG_SQLBA = 0x4, REG_SQUBA = 0x8, REG_SQDAO = 0xC, REG_SQISAO = 0x10, + /* Completion Queue (CQ) Configuration Registers */ REG_CQATTR = 0x20, REG_CQLBA = 0x24, REG_CQUBA = 0x28, @@ -96,6 +98,7 @@ enum { REG_CQISAO = 0x30, }; +/* Operation and Runtime Registers - Submission Queues and Completion Queues */ enum { REG_SQHP = 0x0, REG_SQTP = 0x4, @@ -569,10 +572,26 @@ struct cq_entry { __le16 prd_table_offset; /* DW 4 */ - __le32 status; + u8 overall_status; + u8 extended_error_code; + __le16 reserved_1; - /* DW 5-7 */ - __le32 reserved[3]; + /* DW 5 */ + u8 task_tag; + u8 lun; +#if defined(__BIG_ENDIAN) + u8 ext_iid:4; + u8 iid:4; +#elif defined(__LITTLE_ENDIAN) + u8 iid:4; + u8 ext_iid:4; +#else +#error +#endif + u8 reserved_2; + + /* DW 6-7 */ + __le32 reserved_3[2]; }; static_assert(sizeof(struct cq_entry) == 32); diff --git a/include/ufs/unipro.h b/include/ufs/unipro.h index 360e1245fb40..59de737490ca 100644 --- a/include/ufs/unipro.h +++ b/include/ufs/unipro.h @@ -111,6 +111,9 @@ #define PA_TXLINKSTARTUPHS 0x1544 #define PA_AVAILRXDATALANES 0x1540 #define PA_MINRXTRAILINGCLOCKS 0x1543 +#define PA_TXHSG1SYNCLENGTH 0x1552 +#define PA_TXHSG2SYNCLENGTH 0x1554 +#define PA_TXHSG3SYNCLENGTH 0x1556 #define PA_LOCAL_TX_LCC_ENABLE 0x155E #define PA_ACTIVETXDATALANES 0x1560 #define PA_CONNECTEDTXDATALANES 0x1561 @@ -160,7 +163,9 @@ #define PA_PACPFRAMECOUNT 0x15C0 #define PA_PACPERRORCOUNT 0x15C1 #define PA_PHYTESTCONTROL 0x15C2 -#define PA_TXHSADAPTTYPE 0x15D4 +#define PA_TXHSG4SYNCLENGTH 0x15D0 +#define PA_TXHSADAPTTYPE 0x15D4 +#define PA_TXHSG5SYNCLENGTH 0x15D6 /* Adpat type for PA_TXHSADAPTTYPE attribute */ #define PA_REFRESH_ADAPT 0x00 @@ -174,6 +179,7 @@ #define VS_POWERSTATE 0xD083 #define VS_MPHYCFGUPDT 0xD085 #define VS_DEBUGOMC 0xD09E +#define VS_MPHYDISABLE 0xD0C1 #define PA_GRANULARITY_MIN_VAL 1 #define PA_GRANULARITY_MAX_VAL 6 |
