From 2a2848e7c2fde1c26ff46998ac10f7bf9ca2de04 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 8 Apr 2022 09:40:09 +0530 Subject: arm64/mm: Compute PTRS_PER_[PMD|PUD] independently of PTRS_PER_PTE Possible page table entries (or pointers) on non-zero page table levels are dependent on a single page size i.e PAGE_SIZE and size required for each individual page table entry i.e 8 bytes. PTRS_PER_[PMD|PUD] as such are not related to PTRS_PER_PTE in any manner, as being implied currently. So lets just make this very explicit and compute these macros independently. Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220408041009.1259701-1-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable-hwdef.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 66671ff05183..dd3d12bce07b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -49,7 +49,7 @@ #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PTRS_PER_PMD PTRS_PER_PTE +#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3)) #endif /* @@ -59,7 +59,7 @@ #define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) -#define PTRS_PER_PUD PTRS_PER_PTE +#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3)) #endif /* -- cgit From 96bb1530c4f9039996bf95d28dcc957861558696 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 Apr 2022 15:59:07 +0100 Subject: arm64: stacktrace: make struct stackframe private to stacktrace.c Now that arm64 uses arch_stack_walk() consistently, struct stackframe is only used within stacktrace.c. To make it easier to read and maintain this code, it would be nicer if the definition were there too. Move the definition into stacktrace.c. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Reviewed-by: Madhavan T. Venkataraman Reviwed-by: Mark Brown Reviewed-by: Kalesh Singh for the series. Link: https://lore.kernel.org/r/20220413145910.3060139-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index e77cdef9ca29..aec9315bf156 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -31,38 +31,6 @@ struct stack_info { enum stack_type type; }; -/* - * A snapshot of a frame record or fp/lr register values, along with some - * accounting information necessary for robust unwinding. - * - * @fp: The fp value in the frame record (or the real fp) - * @pc: The lr value in the frame record (or the real lr) - * - * @stacks_done: Stacks which have been entirely unwound, for which it is no - * longer valid to unwind to. - * - * @prev_fp: The fp that pointed to this frame record, or a synthetic value - * of 0. This is used to ensure that within a stack, each - * subsequent frame record is at an increasing address. - * @prev_type: The type of stack this frame record was on, or a synthetic - * value of STACK_TYPE_UNKNOWN. This is used to detect a - * transition from one stack to another. - * - * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance - * associated with the most recently encountered replacement lr - * value. - */ -struct stackframe { - unsigned long fp; - unsigned long pc; - DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); - unsigned long prev_fp; - enum stack_type prev_type; -#ifdef CONFIG_KRETPROBES - struct llist_node *kr_cur; -#endif -}; - extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl); -- cgit From b4adc83b07706042ad6e6a767f6c04636db69bcc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:13 +0100 Subject: arm64/sme: System register and exception syndrome definitions The arm64 Scalable Matrix Extension (SME) adds some new system registers, fields in existing system registers and exception syndromes. This patch adds definitions for these for use in future patches implementing support for this extension. Since SME will be the first user of FEAT_HCX in the kernel also include the definitions for enumerating it and the HCRX system register it adds. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-6-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 12 ++++++- arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/include/asm/sysreg.h | 67 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d52a0b269ee8..43872e0cfd1e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -37,7 +37,8 @@ #define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ /* Unallocated EC: 0x1B */ #define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ -/* Unallocated EC: 0x1D - 0x1E */ +#define ESR_ELx_EC_SME (0x1D) +/* Unallocated EC: 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) @@ -327,6 +328,15 @@ #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ ESR_ELx_CP15_32_ISS_DIR_READ) +/* + * ISS values for SME traps + */ + +#define ESR_ELx_SME_ISS_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_ILL 1 +#define ESR_ELx_SME_ISS_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_ZA_DISABLED 3 + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1767ded83888..13ae232ec4a1 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -279,6 +279,7 @@ #define CPTR_EL2_TCPAC (1U << 31) #define CPTR_EL2_TAM (1 << 30) #define CPTR_EL2_TTA (1 << 20) +#define CPTR_EL2_TSM (1 << 12) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) #define CPTR_EL2_TZ (1 << 8) #define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fbf5f8bb9055..bebfdd27296a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -118,6 +118,10 @@ * System registers, organised loosely by encoding but grouped together * where the architected name contains an index. e.g. ID_MMFR_EL1. */ +#define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3) +#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) +#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) + #define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) #define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) #define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) @@ -181,6 +185,7 @@ #define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) #define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) #define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) +#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5) #define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) #define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) @@ -204,6 +209,8 @@ #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) +#define SYS_SMPRI_EL1 sys_reg(3, 0, 1, 2, 4) +#define SYS_SMCR_EL1 sys_reg(3, 0, 1, 2, 6) #define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) #define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) @@ -396,6 +403,8 @@ #define TRBIDR_ALIGN_MASK GENMASK(3, 0) #define TRBIDR_ALIGN_SHIFT 0 +#define SMPRI_EL1_PRIORITY_MASK 0xf + #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) @@ -451,8 +460,13 @@ #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) #define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) +#define SYS_SMIDR_EL1 sys_reg(3, 1, 0, 0, 6) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) +#define SYS_SMIDR_EL1_IMPLEMENTER_SHIFT 24 +#define SYS_SMIDR_EL1_SMPS_SHIFT 15 +#define SYS_SMIDR_EL1_AFFINITY_SHIFT 0 + #define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) @@ -461,6 +475,10 @@ #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) +#define SYS_SVCR_EL0 sys_reg(3, 3, 4, 2, 2) +#define SYS_SVCR_EL0_ZA_MASK 2 +#define SYS_SVCR_EL0_SM_MASK 1 + #define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) #define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) @@ -477,6 +495,7 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +#define SYS_TPIDR2_EL0 sys_reg(3, 3, 13, 0, 5) #define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) @@ -546,6 +565,9 @@ #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) +#define SYS_SMPRIMAP_EL2 sys_reg(3, 4, 1, 2, 5) +#define SYS_SMCR_EL2 sys_reg(3, 4, 1, 2, 6) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) @@ -605,6 +627,7 @@ #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) #define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) #define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) @@ -628,6 +651,7 @@ #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) #define SCTLR_ELx_ATA (BIT(43)) @@ -836,6 +860,7 @@ #define ID_AA64PFR0_ELx_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_SME_SHIFT 24 #define ID_AA64PFR1_MPAMFRAC_SHIFT 16 #define ID_AA64PFR1_RASFRAC_SHIFT 12 #define ID_AA64PFR1_MTE_SHIFT 8 @@ -846,6 +871,7 @@ #define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 #define ID_AA64PFR1_BT_BTI 0x1 +#define ID_AA64PFR1_SME 1 #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 @@ -874,6 +900,23 @@ #define ID_AA64ZFR0_AES_PMULL 0x2 #define ID_AA64ZFR0_SVEVER_SVE2 0x1 +/* id_aa64smfr0 */ +#define ID_AA64SMFR0_FA64_SHIFT 63 +#define ID_AA64SMFR0_I16I64_SHIFT 52 +#define ID_AA64SMFR0_F64F64_SHIFT 48 +#define ID_AA64SMFR0_I8I32_SHIFT 36 +#define ID_AA64SMFR0_F16F32_SHIFT 35 +#define ID_AA64SMFR0_B16F32_SHIFT 34 +#define ID_AA64SMFR0_F32F32_SHIFT 32 + +#define ID_AA64SMFR0_FA64 0x1 +#define ID_AA64SMFR0_I16I64 0x4 +#define ID_AA64SMFR0_F64F64 0x1 +#define ID_AA64SMFR0_I8I32 0x4 +#define ID_AA64SMFR0_F16F32 0x1 +#define ID_AA64SMFR0_B16F32 0x1 +#define ID_AA64SMFR0_F32F32 0x1 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_ECV_SHIFT 60 #define ID_AA64MMFR0_FGT_SHIFT 56 @@ -926,6 +969,7 @@ /* id_aa64mmfr1 */ #define ID_AA64MMFR1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_HCX_SHIFT 40 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 #define ID_AA64MMFR1_TWED_SHIFT 32 @@ -1119,9 +1163,24 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff +#define SMCR_ELx_FA64_SHIFT 31 +#define SMCR_ELx_FA64_MASK (1 << SMCR_ELx_FA64_SHIFT) + +/* + * The SMCR_ELx_LEN_* definitions intentionally include bits [8:4] which + * are reserved by the SME architecture for future expansion of the LEN + * field, with compatible semantics. + */ +#define SMCR_ELx_LEN_SHIFT 0 +#define SMCR_ELx_LEN_SIZE 9 +#define SMCR_ELx_LEN_MASK 0x1ff + #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_SMEN_EL1EN (BIT(24)) /* enable EL1 access */ +#define CPACR_EL1_SMEN_EL0EN (BIT(25)) /* enable EL0 access, if EL1EN set */ + #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ @@ -1170,6 +1229,8 @@ #define TRFCR_ELx_ExTRE BIT(1) #define TRFCR_ELx_E0TRE BIT(0) +/* HCRX_EL2 definitions */ +#define HCRX_EL2_SMPME_MASK (1 << 5) /* GIC Hypervisor interface registers */ /* ICH_MISR_EL2 bit definitions */ @@ -1233,6 +1294,12 @@ #define ICH_VTR_TDS_SHIFT 19 #define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) +/* HFG[WR]TR_EL2 bit definitions */ +#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT 55 +#define HFGxTR_EL2_nTPIDR2_EL0_MASK BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT) +#define HFGxTR_EL2_nSMPRI_EL1_SHIFT 54 +#define HFGxTR_EL2_nSMPRI_EL1_MASK BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT) + #define ARM64_FEATURE_FIELD_BITS 4 /* Create a mask for the feature bits of the specified feature. */ -- cgit From ca8a4ebcff4465f0272637433c789a5e4a272626 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:14 +0100 Subject: arm64/sme: Manually encode SME instructions As with SVE rather than impose ambitious toolchain requirements for SME we manually encode the few instructions which we require in order to perform the work the kernel needs to do. The instructions used to save and restore context are provided as assembler macros while those for entering and leaving streaming mode are done in asm volatile blocks since they are expected to be used from C. We could do the SMSTART and SMSTOP operations with read/modify/write cycles on SVCR but using the aliases provided for individual field accesses should be slightly faster. These instructions are aliases for MSR but since our minimum toolchain requirements are old enough to mean that we can't use the sX_X_cX_cX_X form and they always use xzr rather than taking a value like write_sysreg_s() wants we just use .inst. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-7-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 25 ++++++++++++++++ arch/arm64/include/asm/fpsimdmacros.h | 54 +++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index cb24385e3632..6e2dc9dcbf49 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -249,6 +249,31 @@ static inline void sve_setup(void) { } #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static inline void sme_smstart_sm(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr")); +} + +static inline void sme_smstop_sm(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr")); +} + +static inline void sme_smstop(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr")); +} + +#else + +static inline void sme_smstart_sm(void) { } +static inline void sme_smstop_sm(void) { } +static inline void sme_smstop(void) { } + +#endif /* ! CONFIG_ARM64_SME */ + /* For use by EFI runtime services calls only */ extern void __efi_fpsimd_begin(void); extern void __efi_fpsimd_end(void); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 2509d7dde55a..2e9a33155081 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -93,6 +93,12 @@ .endif .endm +.macro _sme_check_wv v + .if (\v) < 12 || (\v) > 15 + .error "Bad vector select register \v." + .endif +.endm + /* SVE instruction encodings for non-SVE-capable assemblers */ /* (pre binutils 2.28, all kernel capable clang versions support SVE) */ @@ -174,6 +180,54 @@ | (\np) .endm +/* SME instruction encodings for non-SME-capable assemblers */ +/* (pre binutils 2.38/LLVM 13) */ + +/* RDSVL X\nx, #\imm */ +.macro _sme_rdsvl nx, imm + _check_general_reg \nx + _check_num (\imm), -0x20, 0x1f + .inst 0x04bf5800 \ + | (\nx) \ + | (((\imm) & 0x3f) << 5) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _sme_str_zav nw, nxbase, offset=0 + _sme_check_wv \nw + _check_general_reg \nxbase + _check_num (\offset), -0x100, 0xff + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _sme_ldr_zav nw, nxbase, offset=0 + _sme_check_wv \nw + _check_general_reg \nxbase + _check_num (\offset), -0x100, 0xff + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * Zero the entire ZA array + * ZERO ZA + */ +.macro zero_za + .inst 0xc00800ff +.endm + .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from -- cgit From b2cf6a23289b3268cc7915a09c0c8372147b2727 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:15 +0100 Subject: arm64/sme: Early CPU setup for SME SME requires similar setup to that for SVE: disable traps to EL2 and make sure that the maximum vector length is available to EL1, for SME we have two traps - one for SME itself and one for TPIDR2. In addition since we currently make no active use of priority control for SCMUs we map all SME priorities lower ELs may configure to 0, the architecture specified minimum priority, to ensure that nothing we manage is able to configure itself to consume excessive resources. This will need to be revisited should there be a need to manage SME priorities at runtime. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-8-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/el2_setup.h | 64 +++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index c31be7eda9df..fabdbde0fe02 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -143,6 +143,50 @@ .Lskip_sve_\@: .endm +/* SME register access and priority mapping */ +.macro __init_el2_nvhe_sme + mrs x1, id_aa64pfr1_el1 + ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 + cbz x1, .Lskip_sme_\@ + + bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps + msr cptr_el2, x0 // Disable copro. traps to EL2 + isb + + mrs x1, sctlr_el2 + orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps + msr sctlr_el2, x1 + isb + + mov x1, #0 // SMCR controls + + mrs_s x2, SYS_ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM? + cbz x2, .Lskip_sme_fa64_\@ + + orr x1, x1, SMCR_ELx_FA64_MASK +.Lskip_sme_fa64_\@: + + orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector + msr_s SYS_SMCR_EL2, x1 // length for EL1. + + mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? + ubfx x1, x1, #SYS_SMIDR_EL1_SMPS_SHIFT, #1 + cbz x1, .Lskip_sme_\@ + + msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal + + mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? + ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x1, .Lskip_sme_\@ + + mrs_s x1, SYS_HCRX_EL2 + orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping + msr_s SYS_HCRX_EL2, x1 + +.Lskip_sme_\@: +.endm + /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 @@ -153,15 +197,26 @@ mrs x1, id_aa64dfr0_el1 ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 cmp x1, #3 - b.lt .Lset_fgt_\@ + b.lt .Lset_debug_fgt_\@ /* Disable PMSNEVFR_EL1 read and write traps */ orr x0, x0, #(1 << 62) -.Lset_fgt_\@: +.Lset_debug_fgt_\@: msr_s SYS_HDFGRTR_EL2, x0 msr_s SYS_HDFGWTR_EL2, x0 - msr_s SYS_HFGRTR_EL2, xzr - msr_s SYS_HFGWTR_EL2, xzr + + mov x0, xzr + mrs x1, id_aa64pfr1_el1 + ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 + cbz x1, .Lset_fgt_\@ + + /* Disable nVHE traps of TPIDR2 and SMPRI */ + orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK + orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK + +.Lset_fgt_\@: + msr_s SYS_HFGRTR_EL2, x0 + msr_s SYS_HFGWTR_EL2, x0 msr_s SYS_HFGITR_EL2, xzr mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU @@ -196,6 +251,7 @@ __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve + __init_el2_nvhe_sme __init_el2_fgt __init_el2_nvhe_prepare_eret .endm -- cgit From 5e64b862c4823ab53aac028042abd918c2f27041 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:16 +0100 Subject: arm64/sme: Basic enumeration support This patch introduces basic cpufeature support for discovering the presence of the Scalable Matrix Extension. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-9-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpu.h | 1 + arch/arm64/include/asm/cpufeature.h | 12 ++++++++++++ arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/include/asm/hwcap.h | 8 ++++++++ 4 files changed, 23 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index a58e366f0b07..d08062bcb9c1 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -58,6 +58,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; u64 reg_id_aa64zfr0; + u64 reg_id_aa64smfr0; struct cpuinfo_32bit aarch32; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index c62e7e5e2f0c..8ac12e4094aa 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -759,6 +759,18 @@ static __always_inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static __always_inline bool system_supports_sme(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME); +} + +static __always_inline bool system_supports_fa64(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME_FA64); +} + static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 6e2dc9dcbf49..2e8ef00e7520 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -74,6 +74,8 @@ extern void sve_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 8db5ec0089db..9f0ce004fdbc 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -109,6 +109,14 @@ #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) #define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) +#define KERNEL_HWCAP_SME __khwcap2_feature(SME) +#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64) +#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64) +#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32) +#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32) +#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32) +#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32) +#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64) /* * This yields a mask that user programs can use to figure out what -- cgit From b42990d3bf77cc29d7c33e21518c1f806dae6b21 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:17 +0100 Subject: arm64/sme: Identify supported SME vector lengths at boot The vector lengths used for SME are controlled through a similar set of registers to those for SVE and enumerated using a similar algorithm with some slight differences due to the fact that unlike SVE there are no restrictions on which combinations of vector lengths can be supported nor any mandatory vector lengths which must be implemented. Add a new vector type and implement support for enumerating it. One slightly awkward feature is that we need to read the current vector length using a different instruction (or enter streaming mode which would have the same issue and be higher cost). Rather than add an ops structure we add special cases directly in the otherwise generic vec_probe_vqs() function, this is a bit inelegant but it's the only place where this is an issue. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-10-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpu.h | 3 +++ arch/arm64/include/asm/cpufeature.h | 7 +++++++ arch/arm64/include/asm/fpsimd.h | 26 ++++++++++++++++++++++++++ arch/arm64/include/asm/processor.h | 1 + 4 files changed, 37 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index d08062bcb9c1..115cdec1ae87 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -64,6 +64,9 @@ struct cpuinfo_arm64 { /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */ u64 reg_zcr; + + /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */ + u64 reg_smcr; }; DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8ac12e4094aa..5ddfae233ea5 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0) return val > 0; } +static inline bool id_aa64pfr1_sme(u64 pfr1) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT); + + return val > 0; +} + static inline bool id_aa64pfr1_mte(u64 pfr1) { u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 2e8ef00e7520..32cd682258d9 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -78,6 +78,7 @@ extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); +extern u64 read_smcr_features(void); /* * Helpers to translate bit indices in sve_vq_map to VQ values (and @@ -172,6 +173,12 @@ static inline void write_vl(enum vec_type type, u64 val) tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK; write_sysreg_s(tmp | val, SYS_ZCR_EL1); break; +#endif +#ifdef CONFIG_ARM64_SME + case ARM64_VEC_SME: + tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK; + write_sysreg_s(tmp | val, SYS_SMCR_EL1); + break; #endif default: WARN_ON_ONCE(1); @@ -268,12 +275,31 @@ static inline void sme_smstop(void) asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr")); } +extern void __init sme_setup(void); + +static inline int sme_max_vl(void) +{ + return vec_max_vl(ARM64_VEC_SME); +} + +static inline int sme_max_virtualisable_vl(void) +{ + return vec_max_virtualisable_vl(ARM64_VEC_SME); +} + +extern unsigned int sme_get_vl(void); + #else static inline void sme_smstart_sm(void) { } static inline void sme_smstop_sm(void) { } static inline void sme_smstop(void) { } +static inline void sme_setup(void) { } +static inline unsigned int sme_get_vl(void) { return 0; } +static inline int sme_max_vl(void) { return 0; } +static inline int sme_max_virtualisable_vl(void) { return 0; } + #endif /* ! CONFIG_ARM64_SME */ /* For use by EFI runtime services calls only */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 73e38d9a540c..abf34a9c2eab 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -118,6 +118,7 @@ struct debug_info { enum vec_type { ARM64_VEC_SVE = 0, + ARM64_VEC_SME, ARM64_VEC_MAX, }; -- cgit From 9e4ab6c89109472082616f8d2f6ada7deaffe161 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:19 +0100 Subject: arm64/sme: Implement vector length configuration prctl()s As for SVE provide a prctl() interface which allows processes to configure their SME vector length. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-12-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 4 ++++ arch/arm64/include/asm/processor.h | 4 +++- arch/arm64/include/asm/thread_info.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 32cd682258d9..38fd6aab7feb 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -288,6 +288,8 @@ static inline int sme_max_virtualisable_vl(void) } extern unsigned int sme_get_vl(void); +extern int sme_set_current_vl(unsigned long arg); +extern int sme_get_current_vl(void); #else @@ -299,6 +301,8 @@ static inline void sme_setup(void) { } static inline unsigned int sme_get_vl(void) { return 0; } static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } +static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } +static inline int sme_get_current_vl(void) { return -EINVAL; } #endif /* ! CONFIG_ARM64_SME */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index abf34a9c2eab..7a57cbff8a03 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -355,9 +355,11 @@ extern void __init minsigstksz_setup(void); */ #include -/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ +/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() +#define SME_SET_VL(arg) sme_set_current_vl(arg) +#define SME_GET_VL() sme_get_current_vl() /* PR_PAC_RESET_KEYS prctl */ #define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e1317b7c4525..4e6b58dcd6f9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -82,6 +82,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ +#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -- cgit From a9d69158595017d260ab37bf88b8f125e5e8144c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:20 +0100 Subject: arm64/sme: Implement support for TPIDR2 The Scalable Matrix Extension introduces support for a new thread specific data register TPIDR2 intended for use by libc. The kernel must save the value of TPIDR2 on context switch and should ensure that all new threads start off with a default value of 0. Add a field to the thread_struct to store TPIDR2 and context switch it with the other thread specific data. In case there are future extensions which also use TPIDR2 we introduce system_supports_tpidr2() and use that rather than system_supports_sme() for TPIDR2 handling. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-13-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 5 +++++ arch/arm64/include/asm/processor.h | 1 + 2 files changed, 6 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5ddfae233ea5..14a8f3d93add 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -778,6 +778,11 @@ static __always_inline bool system_supports_fa64(void) cpus_have_const_cap(ARM64_SME_FA64); } +static __always_inline bool system_supports_tpidr2(void) +{ + return system_supports_sme(); +} + static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7a57cbff8a03..849e97d418a8 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -169,6 +169,7 @@ struct thread_struct { u64 mte_ctrl; #endif u64 sctlr_user; + u64 tpidr2_el0; }; static inline unsigned int thread_get_vl(struct thread_struct *thread, -- cgit From b40c559b45bec736f588c57dd5be967fe573058b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:21 +0100 Subject: arm64/sme: Implement SVCR context switching In SME the use of both streaming SVE mode and ZA are tracked through PSTATE.SM and PSTATE.ZA, visible through the system register SVCR. In order to context switch the floating point state for SME we need to context switch the contents of this register as part of context switching the floating point state. Since changing the vector length exits streaming SVE mode and disables ZA we also make sure we update SVCR appropriately when setting vector length, and similarly ensure that new threads have streaming SVE mode and ZA disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-14-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 3 ++- arch/arm64/include/asm/processor.h | 1 + arch/arm64/include/asm/thread_info.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 38fd6aab7feb..821d270980da 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -46,7 +46,8 @@ extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, - void *sve_state, unsigned int sve_vl); + void *sve_state, unsigned int sve_vl, + u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 849e97d418a8..22cd11e86854 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -169,6 +169,7 @@ struct thread_struct { u64 mte_ctrl; #endif u64 sctlr_user; + u64 svcr; u64 tpidr2_el0; }; diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 4e6b58dcd6f9..848739c15de8 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -82,6 +82,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ +#define TIF_SME 27 /* SME in use */ #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -- cgit From af7167d6d2675f3343eff3ad6c9b4a8e30122e2c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:22 +0100 Subject: arm64/sme: Implement streaming SVE context switching When in streaming mode we need to save and restore the streaming mode SVE register state rather than the regular SVE register state. This uses the streaming mode vector length and omits FFR but is otherwise identical, if TIF_SVE is enabled when we are in streaming mode then streaming mode takes precedence. This does not handle use of streaming SVE state with KVM, ptrace or signals. This will be updated in further patches. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-15-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 22 ++++++++++++++++++++-- arch/arm64/include/asm/fpsimdmacros.h | 11 +++++++++++ arch/arm64/include/asm/processor.h | 10 ++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 821d270980da..cd94f5c5b516 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -47,11 +47,21 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl, - u64 *svcr); + unsigned int sme_vl, u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); +static inline bool thread_sm_enabled(struct thread_struct *thread) +{ + return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK); +} + +static inline bool thread_za_enabled(struct thread_struct *thread) +{ + return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_ZA_MASK); +} + /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ #define VL_ARCH_MAX 0x100 @@ -63,7 +73,14 @@ static inline size_t sve_ffr_offset(int vl) static inline void *sve_pffr(struct thread_struct *thread) { - return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread)); + unsigned int vl; + + if (system_supports_sme() && thread_sm_enabled(thread)) + vl = thread_get_sme_vl(thread); + else + vl = thread_get_sve_vl(thread); + + return (char *)thread->sve_state + sve_ffr_offset(vl); } extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); @@ -72,6 +89,7 @@ extern void sve_load_state(void const *state, u32 const *pfpsr, extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); +extern void sme_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 2e9a33155081..f6ab36e0cd8d 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -262,6 +262,17 @@ 921: .endm +/* Update SMCR_EL1.LEN with the new VQ */ +.macro sme_load_vq xvqminus1, xtmp, xtmp2 + mrs_s \xtmp, SYS_SMCR_EL1 + bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, \xtmp + b.eq 921f + msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising +921: +.endm + /* Preserve the first 128-bits of Znz and zero the rest. */ .macro _sve_flush_z nz _sve_check_zreg \nz diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 22cd11e86854..7542310b4e6b 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -184,6 +184,11 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread) return thread_get_vl(thread, ARM64_VEC_SVE); } +static inline unsigned int thread_get_sme_vl(struct thread_struct *thread) +{ + return thread_get_vl(thread, ARM64_VEC_SME); +} + unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); void task_set_vl(struct task_struct *task, enum vec_type type, unsigned long vl); @@ -197,6 +202,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task) return task_get_vl(task, ARM64_VEC_SVE); } +static inline unsigned int task_get_sme_vl(const struct task_struct *task) +{ + return task_get_vl(task, ARM64_VEC_SME); +} + static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl) { task_set_vl(task, ARM64_VEC_SVE, vl); -- cgit From 0033cd9339642f9b7bef23f96aa2e7277ab51cce Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:23 +0100 Subject: arm64/sme: Implement ZA context switching Allocate space for storing ZA on first access to SME and use that to save and restore ZA state when context switching. We do this by using the vector form of the LDR and STR ZA instructions, these do not require streaming mode and have implementation recommendations that they avoid contention issues in shared SMCU implementations. Since ZA is architecturally guaranteed to be zeroed when enabled we do not need to explicitly zero ZA, either we will be restoring from a saved copy or trapping on first use of SME so we know that ZA must be disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-16-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 5 ++++- arch/arm64/include/asm/fpsimdmacros.h | 22 ++++++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/processor.h | 1 + 4 files changed, 30 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index cd94f5c5b516..1a709c03bb6c 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -47,7 +47,8 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl, - unsigned int sme_vl, u64 *svcr); + void *za_state, unsigned int sme_vl, + u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); @@ -90,6 +91,8 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); extern void sme_set_vq(unsigned long vq_minus_1); +extern void za_save_state(void *state); +extern void za_load_state(void const *state); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index f6ab36e0cd8d..5e0910cf4832 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -319,3 +319,25 @@ ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm + +.macro sme_save_za nxbase, xvl, nw + mov w\nw, #0 + +423: + _sme_str_zav \nw, \nxbase + add x\nxbase, x\nxbase, \xvl + add x\nw, x\nw, #1 + cmp \xvl, x\nw + bne 423b +.endm + +.macro sme_load_za nxbase, xvl, nw + mov w\nw, #0 + +423: + _sme_ldr_zav \nw, \nxbase + add x\nxbase, x\nxbase, \xvl + add x\nw, x\nw, #1 + cmp \xvl, x\nw + bne 423b +.endm diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 94a27a7520f4..8a7c442d5b57 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -295,8 +295,11 @@ struct vcpu_reset_state { struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; + + /* Guest floating point state */ void *sve_state; unsigned int sve_max_vl; + u64 svcr; /* Stage 2 paging state used by the hardware on next switch */ struct kvm_s2_mmu *hw_mmu; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7542310b4e6b..6a3a6c3dec90 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -154,6 +154,7 @@ struct thread_struct { unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ + void *za_state; /* ZA register, if any */ unsigned int vl[ARM64_VEC_MAX]; /* vector length */ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ -- cgit From 8bd7f91c03d886f41d35f6108078d20be5a4a1bd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:24 +0100 Subject: arm64/sme: Implement traps and syscall handling for SME By default all SME operations in userspace will trap. When this happens we allocate storage space for the SME register state, set up the SVE registers and disable traps. We do not need to initialize ZA since the architecture guarantees that it will be zeroed when enabled and when we trap ZA is disabled. On syscall we exit streaming mode if we were previously in it and ensure that all but the lower 128 bits of the registers are zeroed while preserving the state of ZA. This follows the aarch64 PCS for SME, ZA state is preserved over a function call and streaming mode is exited. Since the traps for SME do not distinguish between streaming mode SVE and ZA usage if ZA is in use rather than reenabling traps we instead zero the parts of the SVE registers not shared with FPSIMD and leave SME enabled, this simplifies handling SME traps. If ZA is not in use then we reenable SME traps and fall through to normal handling of SVE. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-17-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/include/asm/exception.h | 1 + arch/arm64/include/asm/fpsimd.h | 39 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 43872e0cfd1e..0467837fd66b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -76,6 +76,7 @@ #define ESR_ELx_IL_SHIFT (25) #define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) /* ISS field definitions shared by different classes */ #define ESR_ELx_WNR_SHIFT (6) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 339477dca551..2add7f33b7c2 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -64,6 +64,7 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); void do_sve_acc(unsigned int esr, struct pt_regs *regs); +void do_sme_acc(unsigned int esr, struct pt_regs *regs); void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); void do_sysinstr(unsigned int esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 1a709c03bb6c..6c33bc832ed4 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -239,6 +239,8 @@ static inline bool sve_vq_available(unsigned int vq) return vq_available(ARM64_VEC_SVE, vq); } +size_t sve_state_size(struct task_struct const *task); + #else /* ! CONFIG_ARM64_SVE */ static inline void sve_alloc(struct task_struct *task) { } @@ -278,10 +280,25 @@ static inline void vec_update_vq_map(enum vec_type t) { } static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } +static inline size_t sve_state_size(struct task_struct const *task) +{ + return 0; +} + #endif /* ! CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_SME +static inline void sme_user_disable(void) +{ + sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0); +} + +static inline void sme_user_enable(void) +{ + sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN); +} + static inline void sme_smstart_sm(void) { asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr")); @@ -309,16 +326,33 @@ static inline int sme_max_virtualisable_vl(void) return vec_max_virtualisable_vl(ARM64_VEC_SME); } +extern void sme_alloc(struct task_struct *task); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +/* + * Return how many bytes of memory are required to store the full SME + * specific state (currently just ZA) for task, given task's currently + * configured vector length. + */ +static inline size_t za_state_size(struct task_struct const *task) +{ + unsigned int vl = task_get_sme_vl(task); + + return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); +} + #else +static inline void sme_user_disable(void) { BUILD_BUG(); } +static inline void sme_user_enable(void) { BUILD_BUG(); } + static inline void sme_smstart_sm(void) { } static inline void sme_smstop_sm(void) { } static inline void sme_smstop(void) { } +static inline void sme_alloc(struct task_struct *task) { } static inline void sme_setup(void) { } static inline unsigned int sme_get_vl(void) { return 0; } static inline int sme_max_vl(void) { return 0; } @@ -326,6 +360,11 @@ static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline size_t za_state_size(struct task_struct const *task) +{ + return 0; +} + #endif /* ! CONFIG_ARM64_SME */ /* For use by EFI runtime services calls only */ -- cgit From 85ed24dad2904f7c141911d91b7807ab02694b5e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:26 +0100 Subject: arm64/sme: Implement streaming SVE signal handling When in streaming mode we have the same set of SVE registers as we do in regular SVE mode with the exception of FFR and the use of the SME vector length. Provide signal handling for these registers by taking one of the reserved words in the SVE signal context as a flags field and defining a flag which is set for streaming mode. When the flag is set the vector length is set to the streaming mode vector length and we save and restore streaming mode data. We support entering or leaving streaming mode based on the value of the flag but do not support changing the vector length, this is not currently supported SVE signal handling. We could instead allocate a separate record in the signal frame for the streaming mode SVE context but this inflates the size of the maximal signal frame required and adds complication when validating signal frames from userspace, especially given the current structure of the code. Any implementation of support for streaming mode vectors in signals will have some potential for causing issues for applications that attempt to handle SVE vectors in signals, use streaming mode but do not understand streaming mode in their signal handling code, it is hard to identify a case that is clearly better than any other - they all have cases where they could cause unexpected register corruption or faults. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-19-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6a3a6c3dec90..1d2ca4870b84 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -190,6 +190,14 @@ static inline unsigned int thread_get_sme_vl(struct thread_struct *thread) return thread_get_vl(thread, ARM64_VEC_SME); } +static inline unsigned int thread_get_cur_vl(struct thread_struct *thread) +{ + if (system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK)) + return thread_get_sme_vl(thread); + else + return thread_get_sve_vl(thread); +} + unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); void task_set_vl(struct task_struct *task, enum vec_type type, unsigned long vl); -- cgit From e12310a0d30f260b26297bc8d7c95769489af038 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:28 +0100 Subject: arm64/sme: Implement ptrace support for streaming mode SVE registers The streaming mode SVE registers are represented using the same data structures as for SVE but since the vector lengths supported and in use may not be the same as SVE we represent them with a new type NT_ARM_SSVE. Unfortunately we only have a single 16 bit reserved field available in the header so there is no space to fit the current and maximum vector length for both standard and streaming SVE mode without redefining the structure in a way the creates a complicatd and fragile ABI. Since FFR is not present in streaming mode it is read and written as zero. Setting NT_ARM_SSVE registers will put the task into streaming mode, similarly setting NT_ARM_SVE registers will exit it. Reads that do not correspond to the current mode of the task will return the header with no register data. For compatibility reasons on write setting no flag for the register type will be interpreted as setting SVE registers, though users can provide no register data as an alternative mechanism for doing so. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-21-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 6c33bc832ed4..5afcd0709aae 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -144,6 +144,7 @@ struct vl_info { extern void sve_alloc(struct task_struct *task); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); +extern void fpsimd_force_sync_to_sve(struct task_struct *task); extern void sve_sync_to_fpsimd(struct task_struct *task); extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); -- cgit From 861262ab862702061ae3355b811a07b15d1b2fc0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:34 +0100 Subject: KVM: arm64: Handle SME host state when running guests While we don't currently support SME in guests we do currently support it for the host system so we need to take care of SME's impact, including the floating point register state, when running guests. Simiarly to SVE we need to manage the traps in CPACR_RL1, what is new is the handling of streaming mode and ZA. Normally we defer any handling of the floating point register state until the guest first uses it however if the system is in streaming mode FPSIMD and SVE operations may generate SME traps which we would need to distinguish from actual attempts by the guest to use SME. Rather than do this for the time being if we are in streaming mode when entering the guest we force the floating point state to be saved immediately and exit streaming mode, meaning that the guest won't generate SME traps for supported operations. We could handle ZA in the access trap similarly to the FPSIMD/SVE state without the disruption caused by streaming mode but for simplicity handle it the same way as streaming mode for now. This will be revisited when we support SME for guests (hopefully before SME hardware becomes available), for now it will only incur additional cost on systems with SME and even there only if streaming mode or ZA are enabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419112247.711548-27-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8a7c442d5b57..f8f0d30dd1a2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -454,6 +454,7 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ #define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) #define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */ +#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | \ -- cgit From f3ba50a7a100e91b0b13ca43190a66c1bfdb9993 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Sat, 23 Apr 2022 11:07:50 +0100 Subject: arm64: Add support for user sub-page fault probing With MTE, even if the pte allows an access, a mismatched tag somewhere within a page can still cause a fault. Select ARCH_HAS_SUBPAGE_FAULTS if MTE is enabled and implement the probe_subpage_writeable() function. Note that get_user() is sufficient for the writeable MTE check since the same tag mismatch fault would be triggered by a read. The caller of probe_subpage_writeable() will need to check the pte permissions (put_user, GUP). Signed-off-by: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20220423100751.1870771-3-catalin.marinas@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 1 + arch/arm64/include/asm/uaccess.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index adcb937342f1..aa523591a44e 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -47,6 +47,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg); long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, unsigned long addr, unsigned long data); +size_t mte_probe_user_range(const char __user *uaddr, size_t size); #else /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index e8dce0cc5eaa..63f9c828f1a7 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -460,4 +460,19 @@ static inline int __copy_from_user_flushcache(void *dst, const void __user *src, } #endif +#ifdef CONFIG_ARCH_HAS_SUBPAGE_FAULTS + +/* + * Return 0 on success, the number of bytes not probed otherwise. + */ +static inline size_t probe_subpage_writeable(const char __user *uaddr, + size_t size) +{ + if (!system_supports_mte()) + return 0; + return mte_probe_user_range(uaddr, size); +} + +#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */ + #endif /* __ASM_UACCESS_H */ -- cgit From c4a0ebf87cebbfa28d56e7d93b2536e2311e30c9 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Thu, 21 Apr 2022 00:00:06 +0800 Subject: arm64/ftrace: Make function graph use ftrace directly As we do in commit 0c0593b45c9b ("x86/ftrace: Make function graph use ftrace directly"), we don't need special hook for graph tracer, but instead we use graph_ops:func function to install return_hooker. Since commit 3b23e4991fb6 ("arm64: implement ftrace with regs") add implementation for FTRACE_WITH_REGS on arm64, we can easily adopt the same cleanup on arm64. And this cleanup only changes the FTRACE_WITH_REGS implementation, so the mcount-based implementation is unaffected. While in theory it would be possible to make a similar cleanup for !FTRACE_WITH_REGS, this will require rework of the core code, and so for now we only change the FTRACE_WITH_REGS implementation. Tested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Chengming Zhou Link: https://lore.kernel.org/r/20220420160006.17880-2-zhouchengming@bytedance.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ftrace.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 1494cfa8639b..dbc45a4157fa 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -80,8 +80,15 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS struct dyn_ftrace; +struct ftrace_ops; +struct ftrace_regs; + int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop + +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#define ftrace_graph_func ftrace_graph_func #endif #define ftrace_return_address(n) return_address(n) -- cgit From 48e6f22e25a44e43952db5fbb767dea0c9319cb2 Mon Sep 17 00:00:00 2001 From: Michal Orzel Date: Tue, 26 Apr 2022 09:06:03 +0200 Subject: arm64: cputype: Avoid overflow using MIDR_IMPLEMENTOR_MASK Value of macro MIDR_IMPLEMENTOR_MASK exceeds the range of integer and can lead to overflow. Currently there is no issue as it is used in expressions implicitly casting it to u32. To avoid possible problems, fix the macro. Signed-off-by: Michal Orzel Link: https://lore.kernel.org/r/20220426070603.56031-1-michal.orzel@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ff8f4511df71..92331c07c2d1 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -36,7 +36,7 @@ #define MIDR_VARIANT(midr) \ (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) #define MIDR_IMPLEMENTOR_SHIFT 24 -#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT) #define MIDR_IMPLEMENTOR(midr) \ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) -- cgit From d158a0608eb85f29f98357b97d01b80250636613 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 5 May 2022 23:15:17 +0100 Subject: arm64/sme: More sensibly define the size for the ZA register set Since the vector length configuration mechanism is identical between SVE and SME we share large elements of the code including the definition for the maximum vector length. Unfortunately when we were defining the ABI for SVE we included not only the actual maximum vector length of 2048 bits but also the value possible if all the bits reserved in the architecture for expansion of the LEN field were used, 16384 bits. This starts creating problems if we try to allocate anything for the ZA matrix based on the maximum possible vector length, as we do for the regset used with ptrace during the process of generating a core dump. While the maximum potential size for ZA with the current architecture is a reasonably managable 64K with the higher reserved limit ZA would be 64M which leads to entirely reasonable complaints from the memory management code when we try to allocate a buffer of that size. Avoid these issues by defining the actual maximum vector length for the architecture and using it for the SME regsets. Also use the full ZA_PT_SIZE() with the header rather than just the actual register payload when specifying the size, fixing support for the largest vector lengths now that we have this new, lower define. With the SVE maximum this did not cause problems due to the extra headroom we had. While we're at it add a comment clarifying why even though ZA is a single register we tell the regset code that it is a multi-register regset. Reported-by: Qian Cai Signed-off-by: Mark Brown Tested-by: Naresh Kamboju Link: https://lore.kernel.org/r/20220505221517.1642014-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 5afcd0709aae..75caa2098d5b 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -32,6 +32,18 @@ #define VFP_STATE_SIZE ((32 * 8) + 4) #endif +/* + * When we defined the maximum SVE vector length we defined the ABI so + * that the maximum vector length included all the reserved for future + * expansion bits in ZCR rather than those just currently defined by + * the architecture. While SME follows a similar pattern the fact that + * it includes a square matrix means that any allocations that attempt + * to cover the maximum potential vector length (such as happen with + * the regset used for ptrace) end up being extremely large. Define + * the much lower actual limit for use in such situations. + */ +#define SME_VQ_MAX 16 + struct task_struct; extern void fpsimd_save_state(struct user_fpsimd_state *state); -- cgit From c733812dd77350ba0da18cd6e474e5a2e6461b49 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 5 May 2022 18:32:07 +0200 Subject: arm64: mm: Make arch_faults_on_old_pte() check for migratability arch_faults_on_old_pte() relies on the calling context being non-preemptible. CONFIG_PREEMPT_RT turns the PTE lock into a sleepable spinlock, which doesn't disable preemption once acquired, triggering the warning in arch_faults_on_old_pte(). It does however disable migration, ensuring the task remains on the same CPU during the entirety of the critical section, making the read of cpu_has_hw_af() safe and stable. Make arch_faults_on_old_pte() check cant_migrate() instead of preemptible(). Cc: Valentin Schneider Suggested-by: Sebastian Andrzej Siewior Signed-off-by: Valentin Schneider Link: https://lore.kernel.org/r/20220127192437.1192957-1-valentin.schneider@arm.com Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220505163207.85751-4-bigeasy@linutronix.de Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 94e147e5456c..9c0a9bfd6b07 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1001,7 +1001,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, */ static inline bool arch_faults_on_old_pte(void) { - WARN_ON(preemptible()); + /* The register read below requires a stable CPU to make any sense */ + cant_migrate(); return !cpu_has_hw_af(); } -- cgit From bc5dfb4fd7bd471c77ea48143159eb5e1308d636 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 16 May 2022 08:55:58 +0800 Subject: arm64/hugetlb: Implement arm64 specific huge_ptep_get() Now we use huge_ptep_get() to get the pte value of a hugetlb page, however it will only return one specific pte value for the CONT-PTE or CONT-PMD size hugetlb on ARM64 system, which can contain several continuous pte or pmd entries with same page table attributes. And it will not take into account the subpages' dirty or young bits of a CONT-PTE/PMD size hugetlb page. So the huge_ptep_get() is inconsistent with huge_ptep_get_and_clear(), which already takes account the dirty or young bits for any subpages in this CONT-PTE/PMD size hugetlb [1]. Meanwhile we can miss dirty or young flags statistics for hugetlb pages with current huge_ptep_get(), such as the gather_hugetlb_stats() function, and CONT-PTE/PMD hugetlb monitoring with DAMON. Thus define an ARM64 specific huge_ptep_get() implementation as well as enabling __HAVE_ARCH_HUGE_PTEP_GET, that will take into account any subpages' dirty or young bits for CONT-PTE/PMD size hugetlb page, for those functions that want to check the dirty and young flags of a hugetlb page. [1] https://lore.kernel.org/linux-mm/85bd80b4-b4fd-0d3f-a2e5-149559f2f387@oracle.com/ Suggested-by: Muchun Song Signed-off-by: Baolin Wang Reviewed-by: Muchun Song Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/624109a80ac4bbdf1e462dfa0b49e9f7c31a7c0d.1652496622.git.baolin.wang@linux.alibaba.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hugetlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64/include/asm') diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 1242f71937f8..d656822b13f1 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -44,6 +44,8 @@ extern void huge_ptep_clear_flush(struct vm_area_struct *vma, #define __HAVE_ARCH_HUGE_PTE_CLEAR extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz); +#define __HAVE_ARCH_HUGE_PTEP_GET +extern pte_t huge_ptep_get(pte_t *ptep); extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz); #define set_huge_swap_pte_at set_huge_swap_pte_at -- cgit