diff options
author | Oliver Upton <oliver.upton@linux.dev> | 2023-02-13 22:30:07 +0000 |
---|---|---|
committer | Oliver Upton <oliver.upton@linux.dev> | 2023-02-13 22:30:17 +0000 |
commit | 619cec00857f21dbc6db5ef9e0b9c613479f3745 (patch) | |
tree | 046deb5e9b2056f4284681699b3c877f36b09843 /arch/arm64 | |
parent | 92425e058ab6ab1a4c4a9d384d2d86b810f57e23 (diff) | |
parent | 9442d05bba6c12749fdc4039eddcf801398ec82b (diff) |
Merge branch arm64/for-next/sme2 into kvmarm/next
Merge the SME2 branch to fix up a rather annoying conflict due to the
EL2 finalization refactor.
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/include/asm/cpufeature.h | 6 | ||||
-rw-r--r-- | arch/arm64/include/asm/esr.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/fpsimd.h | 30 | ||||
-rw-r--r-- | arch/arm64/include/asm/fpsimdmacros.h | 22 | ||||
-rw-r--r-- | arch/arm64/include/asm/hwcap.h | 6 | ||||
-rw-r--r-- | arch/arm64/include/asm/processor.h | 2 | ||||
-rw-r--r-- | arch/arm64/include/uapi/asm/hwcap.h | 6 | ||||
-rw-r--r-- | arch/arm64/include/uapi/asm/sigcontext.h | 19 | ||||
-rw-r--r-- | arch/arm64/kernel/cpufeature.c | 28 | ||||
-rw-r--r-- | arch/arm64/kernel/cpuinfo.c | 6 | ||||
-rw-r--r-- | arch/arm64/kernel/entry-fpsimd.S | 30 | ||||
-rw-r--r-- | arch/arm64/kernel/fpsimd.c | 47 | ||||
-rw-r--r-- | arch/arm64/kernel/hyp-stub.S | 7 | ||||
-rw-r--r-- | arch/arm64/kernel/idreg-override.c | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/process.c | 21 | ||||
-rw-r--r-- | arch/arm64/kernel/ptrace.c | 60 | ||||
-rw-r--r-- | arch/arm64/kernel/signal.c | 113 | ||||
-rw-r--r-- | arch/arm64/kvm/fpsimd.c | 2 | ||||
-rw-r--r-- | arch/arm64/tools/cpucaps | 1 | ||||
-rw-r--r-- | arch/arm64/tools/sysreg | 27 |
20 files changed, 377 insertions, 58 deletions
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 03d1c9d7af82..fc2c739f48f1 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -769,6 +769,12 @@ static __always_inline bool system_supports_sme(void) cpus_have_const_cap(ARM64_SME); } +static __always_inline bool system_supports_sme2(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME2); +} + static __always_inline bool system_supports_fa64(void) { return IS_ENABLED(CONFIG_ARM64_SME) && diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 206de10524e3..c9f15b9e3c71 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -350,6 +350,7 @@ #define ESR_ELx_SME_ISS_ILL 1 #define ESR_ELx_SME_ISS_SM_DISABLED 2 #define ESR_ELx_SME_ISS_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_ZT_DISABLED 4 #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index e6fa1e2982c8..67f2fb781f59 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void); struct cpu_fp_state { struct user_fpsimd_state *st; void *sve_state; - void *za_state; + void *sme_state; u64 *svcr; unsigned int sve_vl; unsigned int sme_vl; @@ -105,6 +105,13 @@ static inline void *sve_pffr(struct thread_struct *thread) return (char *)thread->sve_state + sve_ffr_offset(vl); } +static inline void *thread_zt_state(struct thread_struct *thread) +{ + /* The ZT register state is stored immediately after the ZA state */ + unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread)); + return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq); +} + extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); extern void sve_load_state(void const *state, u32 const *pfpsr, int restore_ffr); @@ -112,12 +119,13 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); extern void sme_set_vq(unsigned long vq_minus_1); -extern void za_save_state(void *state); -extern void za_load_state(void const *state); +extern void sme_save_state(void *state, int zt); +extern void sme_load_state(void const *state, int zt); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); @@ -355,14 +363,20 @@ extern int sme_get_current_vl(void); /* * Return how many bytes of memory are required to store the full SME - * specific state (currently just ZA) for task, given task's currently - * configured vector length. + * specific state for task, given task's currently configured vector + * length. */ -static inline size_t za_state_size(struct task_struct const *task) +static inline size_t sme_state_size(struct task_struct const *task) { unsigned int vl = task_get_sme_vl(task); + size_t size; + + size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); + + if (system_supports_sme2()) + size += ZT_SIG_REG_SIZE; - return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); + return size; } #else @@ -382,7 +396,7 @@ static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } -static inline size_t za_state_size(struct task_struct const *task) +static inline size_t sme_state_size(struct task_struct const *task) { return 0; } diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 5e0910cf4832..cd03819a3b68 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -221,6 +221,28 @@ .endm /* + * LDR (ZT0) + * + * LDR ZT0, nx + */ +.macro _ldr_zt nx + _check_general_reg \nx + .inst 0xe11f8000 \ + | (\nx << 5) +.endm + +/* + * STR (ZT0) + * + * STR ZT0, nx + */ +.macro _str_zt nx + _check_general_reg \nx + .inst 0xe13f8000 \ + | (\nx << 5) +.endm + +/* * Zero the entire ZA array * ZERO ZA */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 06dd12c514e6..475c803ecf42 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -123,6 +123,12 @@ #define KERNEL_HWCAP_CSSC __khwcap2_feature(CSSC) #define KERNEL_HWCAP_RPRFM __khwcap2_feature(RPRFM) #define KERNEL_HWCAP_SVE2P1 __khwcap2_feature(SVE2P1) +#define KERNEL_HWCAP_SME2 __khwcap2_feature(SME2) +#define KERNEL_HWCAP_SME2P1 __khwcap2_feature(SME2P1) +#define KERNEL_HWCAP_SME_I16I32 __khwcap2_feature(SME_I16I32) +#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32) +#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16) +#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index d51b32a69309..3918f2a67970 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -161,7 +161,7 @@ struct thread_struct { enum fp_type fp_type; /* registers FPSIMD or SVE? */ unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ - void *za_state; /* ZA register, if any */ + void *sme_state; /* ZA and ZT state, if any */ unsigned int vl[ARM64_VEC_MAX]; /* vector length */ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b713d30544f1..69a4fb749c65 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -96,5 +96,11 @@ #define HWCAP2_CSSC (1UL << 34) #define HWCAP2_RPRFM (1UL << 35) #define HWCAP2_SVE2P1 (1UL << 36) +#define HWCAP2_SME2 (1UL << 37) +#define HWCAP2_SME2P1 (1UL << 38) +#define HWCAP2_SME_I16I32 (1UL << 39) +#define HWCAP2_SME_BI32I32 (1UL << 40) +#define HWCAP2_SME_B16B16 (1UL << 41) +#define HWCAP2_SME_F16F16 (1UL << 42) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 9525041e4a14..46e9072985a5 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -152,6 +152,14 @@ struct za_context { __u16 __reserved[3]; }; +#define ZT_MAGIC 0x5a544e01 + +struct zt_context { + struct _aarch64_ctx head; + __u16 nregs; + __u16 __reserved[3]; +}; + #endif /* !__ASSEMBLY__ */ #include <asm/sve_context.h> @@ -304,4 +312,15 @@ struct za_context { #define ZA_SIG_CONTEXT_SIZE(vq) \ (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq)) +#define ZT_SIG_REG_SIZE 512 + +#define ZT_SIG_REG_BYTES (ZT_SIG_REG_SIZE / 8) + +#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context) + +#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n) + +#define ZT_SIG_CONTEXT_SIZE(n) \ + (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n)) + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a77315b338e6..5bd959bd9a1f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -283,16 +283,26 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0), ARM64_FTR_END, }; @@ -2649,6 +2659,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .cpu_enable = fa64_kernel_enable, }, + { + .desc = "SME2", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_SME2, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_EL1_SME_SHIFT, + .field_width = ID_AA64PFR1_EL1_SME_WIDTH, + .min_field_value = ID_AA64PFR1_EL1_SME_SME2, + .matches = has_cpuid_feature, + .cpu_enable = sme2_kernel_enable, + }, #endif /* CONFIG_ARM64_SME */ { .desc = "WFx with timeout", @@ -2827,11 +2849,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SME HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16B16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_BI32I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), #endif /* CONFIG_ARM64_SME */ {}, diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 379695262b77..85e54417d141 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -119,6 +119,12 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_CSSC] = "cssc", [KERNEL_HWCAP_RPRFM] = "rprfm", [KERNEL_HWCAP_SVE2P1] = "sve2p1", + [KERNEL_HWCAP_SME2] = "sme2", + [KERNEL_HWCAP_SME2P1] = "sme2p1", + [KERNEL_HWCAP_SME_I16I32] = "smei16i32", + [KERNEL_HWCAP_SME_BI32I32] = "smebi32i32", + [KERNEL_HWCAP_SME_B16B16] = "smeb16b16", + [KERNEL_HWCAP_SME_F16F16] = "smef16f16", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 229436f33df5..6325db1a2179 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -100,25 +100,35 @@ SYM_FUNC_START(sme_set_vq) SYM_FUNC_END(sme_set_vq) /* - * Save the SME state + * Save the ZA and ZT state * * x0 - pointer to buffer for state + * x1 - number of ZT registers to save */ -SYM_FUNC_START(za_save_state) - _sme_rdsvl 1, 1 // x1 = VL/8 - sme_save_za 0, x1, 12 +SYM_FUNC_START(sme_save_state) + _sme_rdsvl 2, 1 // x2 = VL/8 + sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA + + cbz x1, 1f + _str_zt 0 +1: ret -SYM_FUNC_END(za_save_state) +SYM_FUNC_END(sme_save_state) /* - * Load the SME state + * Load the ZA and ZT state * * x0 - pointer to buffer for state + * x1 - number of ZT registers to save */ -SYM_FUNC_START(za_load_state) - _sme_rdsvl 1, 1 // x1 = VL/8 - sme_load_za 0, x1, 12 +SYM_FUNC_START(sme_load_state) + _sme_rdsvl 2, 1 // x2 = VL/8 + sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA + + cbz x1, 1f + _ldr_zt 0 +1: ret -SYM_FUNC_END(za_load_state) +SYM_FUNC_END(sme_load_state) #endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b6ef1af0122e..7c67190c44e4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -299,7 +299,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, /* * TIF_SME controls whether a task can use SME without trapping while * in userspace, when TIF_SME is set then we must have storage - * alocated in sve_state and za_state to store the contents of both ZA + * alocated in sve_state and sme_state to store the contents of both ZA * and the SVE registers for both streaming and non-streaming modes. * * If both SVCR.ZA and SVCR.SM are disabled then at any point we @@ -429,7 +429,8 @@ static void task_fpsimd_load(void) write_sysreg_s(current->thread.svcr, SYS_SVCR); if (thread_za_enabled(¤t->thread)) - za_load_state(current->thread.za_state); + sme_load_state(current->thread.sme_state, + system_supports_sme2()); if (thread_sm_enabled(¤t->thread)) restore_ffr = system_supports_fa64(); @@ -490,7 +491,8 @@ static void fpsimd_save(void) *svcr = read_sysreg_s(SYS_SVCR); if (*svcr & SVCR_ZA_MASK) - za_save_state(last->za_state); + sme_save_state(last->sme_state, + system_supports_sme2()); /* If we are in streaming mode override regular SVE. */ if (*svcr & SVCR_SM_MASK) { @@ -1257,30 +1259,30 @@ void fpsimd_release_task(struct task_struct *dead_task) #ifdef CONFIG_ARM64_SME /* - * Ensure that task->thread.za_state is allocated and sufficiently large. + * Ensure that task->thread.sme_state is allocated and sufficiently large. * * This function should be used only in preparation for replacing - * task->thread.za_state with new data. The memory is always zeroed + * task->thread.sme_state with new data. The memory is always zeroed * here to prevent stale data from showing through: this is done in * the interest of testability and predictability, the architecture * guarantees that when ZA is enabled it will be zeroed. */ void sme_alloc(struct task_struct *task) { - if (task->thread.za_state) { - memset(task->thread.za_state, 0, za_state_size(task)); + if (task->thread.sme_state) { + memset(task->thread.sme_state, 0, sme_state_size(task)); return; } /* This could potentially be up to 64K. */ - task->thread.za_state = - kzalloc(za_state_size(task), GFP_KERNEL); + task->thread.sme_state = + kzalloc(sme_state_size(task), GFP_KERNEL); } static void sme_free(struct task_struct *task) { - kfree(task->thread.za_state); - task->thread.za_state = NULL; + kfree(task->thread.sme_state); + task->thread.sme_state = NULL; } void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) @@ -1302,6 +1304,17 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) * This must be called after sme_kernel_enable(), we rely on the * feature table being sorted to ensure this. */ +void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + /* Allow use of ZT0 */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK, + SYS_SMCR_EL1); +} + +/* + * This must be called after sme_kernel_enable(), we rely on the + * feature table being sorted to ensure this. + */ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) { /* Allow use of FA64 */ @@ -1488,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) sve_alloc(current, false); sme_alloc(current); - if (!current->thread.sve_state || !current->thread.za_state) { + if (!current->thread.sve_state || !current->thread.sme_state) { force_sig(SIGKILL); return; } @@ -1609,7 +1622,7 @@ static void fpsimd_flush_thread_vl(enum vec_type type) void fpsimd_flush_thread(void) { void *sve_state = NULL; - void *za_state = NULL; + void *sme_state = NULL; if (!system_supports_fpsimd()) return; @@ -1634,8 +1647,8 @@ void fpsimd_flush_thread(void) clear_thread_flag(TIF_SME); /* Defer kfree() while in atomic context */ - za_state = current->thread.za_state; - current->thread.za_state = NULL; + sme_state = current->thread.sme_state; + current->thread.sme_state = NULL; fpsimd_flush_thread_vl(ARM64_VEC_SME); current->thread.svcr = 0; @@ -1645,7 +1658,7 @@ void fpsimd_flush_thread(void) put_cpu_fpsimd_context(); kfree(sve_state); - kfree(za_state); + kfree(sme_state); } /* @@ -1711,7 +1724,7 @@ static void fpsimd_bind_task_to_cpu(void) WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; - last->za_state = current->thread.za_state; + last->sme_state = current->thread.sme_state; last->sve_vl = task_get_sve_vl(current); last->sme_vl = task_get_sme_vl(current); last->svcr = ¤t->thread.svcr; diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 2ee18c860f2a..111ff33d93ee 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -132,6 +132,13 @@ SYM_CODE_START_LOCAL(__finalise_el2) orr x0, x0, SMCR_ELx_FA64_MASK .Lskip_sme_fa64: + // ZT0 available? + mrs_s x1, SYS_ID_AA64SMFR0_EL1 + __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_SMEver_SHIFT 4 .Linit_sme_zt0 .Lskip_sme_zt0 +.Linit_sme_zt0: + orr x0, x0, SMCR_ELx_EZT0_MASK +.Lskip_sme_zt0: + orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector msr_s SYS_SMCR_EL2, x0 // length for EL1. diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 95133765ed29..d833d78a7f31 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -131,6 +131,7 @@ static const struct ftr_set_desc smfr0 __initconst = { .name = "id_aa64smfr0", .override = &id_aa64smfr0_override, .fields = { + FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL), /* FA64 is a one bit field... :-/ */ { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, }, {} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 269ac1c25ae2..71d59b5abede 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -307,27 +307,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) /* * In the unlikely event that we create a new thread with ZA - * enabled we should retain the ZA state so duplicate it here. - * This may be shortly freed if we exec() or if CLONE_SETTLS - * but it's simpler to do it here. To avoid confusing the rest - * of the code ensure that we have a sve_state allocated - * whenever za_state is allocated. + * enabled we should retain the ZA and ZT state so duplicate + * it here. This may be shortly freed if we exec() or if + * CLONE_SETTLS but it's simpler to do it here. To avoid + * confusing the rest of the code ensure that we have a + * sve_state allocated whenever sme_state is allocated. */ if (thread_za_enabled(&src->thread)) { dst->thread.sve_state = kzalloc(sve_state_size(src), GFP_KERNEL); if (!dst->thread.sve_state) return -ENOMEM; - dst->thread.za_state = kmemdup(src->thread.za_state, - za_state_size(src), - GFP_KERNEL); - if (!dst->thread.za_state) { + + dst->thread.sme_state = kmemdup(src->thread.sme_state, + sme_state_size(src), + GFP_KERNEL); + if (!dst->thread.sme_state) { kfree(dst->thread.sve_state); dst->thread.sve_state = NULL; return -ENOMEM; } } else { - dst->thread.za_state = NULL; + dst->thread.sme_state = NULL; clear_tsk_thread_flag(dst, TIF_SME); } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 0c321ad23cd3..89b87f1021ed 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1045,7 +1045,7 @@ static int za_get(struct task_struct *target, if (thread_za_enabled(&target->thread)) { start = end; end = ZA_PT_SIZE(vq); - membuf_write(&to, target->thread.za_state, end - start); + membuf_write(&to, target->thread.sme_state, end - start); } /* Zero any trailing padding */ @@ -1099,7 +1099,7 @@ static int za_set(struct task_struct *target, /* Allocate/reinit ZA storage */ sme_alloc(target); - if (!target->thread.za_state) { + if (!target->thread.sme_state) { ret = -ENOMEM; goto out; } @@ -1124,7 +1124,7 @@ static int za_set(struct task_struct *target, start = ZA_PT_ZA_OFFSET; end = ZA_PT_SIZE(vq); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.za_state, + target->thread.sme_state, start, end); if (ret) goto out; @@ -1138,6 +1138,51 @@ out: return ret; } +static int zt_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + if (!system_supports_sme2()) + return -EINVAL; + + /* + * If PSTATE.ZA is not set then ZT will be zeroed when it is + * enabled so report the current register value as zero. + */ + if (thread_za_enabled(&target->thread)) + membuf_write(&to, thread_zt_state(&target->thread), + ZT_SIG_REG_BYTES); + else + membuf_zero(&to, ZT_SIG_REG_BYTES); + + return 0; +} + +static int zt_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!system_supports_sme2()) + return -EINVAL; + + if (!thread_za_enabled(&target->thread)) { + sme_alloc(target); + if (!target->thread.sme_state) + return -ENOMEM; + } + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + thread_zt_state(&target->thread), + 0, ZT_SIG_REG_BYTES); + if (ret == 0) + target->thread.svcr |= SVCR_ZA_MASK; + + return ret; +} + #endif /* CONFIG_ARM64_SME */ #ifdef CONFIG_ARM64_PTR_AUTH @@ -1360,6 +1405,7 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SME REGSET_SSVE, REGSET_ZA, + REGSET_ZT, #endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, @@ -1467,6 +1513,14 @@ static const struct user_regset aarch64_regsets[] = { .regset_get = za_get, .set = za_set, }, + [REGSET_ZT] = { /* SME ZT */ + .core_note_type = NT_ARM_ZT, + .n = 1, + .size = ZT_SIG_REG_BYTES, + .align = sizeof(u64), + .regset_get = zt_get, + .set = zt_set, + }, #endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index be279fd48248..14779619375b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -57,6 +57,7 @@ struct rt_sigframe_user_layout { unsigned long esr_offset; unsigned long sve_offset; unsigned long za_offset; + unsigned long zt_offset; unsigned long extra_offset; unsigned long end_offset; }; @@ -221,6 +222,7 @@ struct user_ctxs { struct fpsimd_context __user *fpsimd; struct sve_context __user *sve; struct za_context __user *za; + struct zt_context __user *zt; }; #ifdef CONFIG_ARM64_SVE @@ -394,7 +396,7 @@ static int preserve_za_context(struct za_context __user *ctx) * fpsimd_signal_preserve_current_state(). */ err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET, - current->thread.za_state, + current->thread.sme_state, ZA_SIG_REGS_SIZE(vq)); } @@ -425,7 +427,7 @@ static int restore_za_context(struct user_ctxs *user) /* * Careful: we are about __copy_from_user() directly into - * thread.za_state with preemption enabled, so protection is + * thread.sme_state with preemption enabled, so protection is * needed to prevent a racing context switch from writing stale * registers back over the new data. */ @@ -434,13 +436,13 @@ static int restore_za_context(struct user_ctxs *user) /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ sme_alloc(current); - if (!current->thread.za_state) { + if (!current->thread.sme_state) { current->thread.svcr &= ~SVCR_ZA_MASK; clear_thread_flag(TIF_SME); return -ENOMEM; } - err = __copy_from_user(current->thread.za_state, + err = __copy_from_user(current->thread.sme_state, (char __user const *)user->za + ZA_SIG_REGS_OFFSET, ZA_SIG_REGS_SIZE(vq)); @@ -452,11 +454,81 @@ static int restore_za_context(struct user_ctxs *user) return 0; } + +static int preserve_zt_context(struct zt_context __user *ctx) +{ + int err = 0; + u16 reserved[ARRAY_SIZE(ctx->__reserved)]; + + if (WARN_ON(!thread_za_enabled(¤t->thread))) + return -EINVAL; + + memset(reserved, 0, sizeof(reserved)); + + __put_user_error(ZT_MAGIC, &ctx->head.magic, err); + __put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16), + &ctx->head.size, err); + __put_user_error(1, &ctx->nregs, err); + BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); + err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); + + /* + * This assumes that the ZT state has already been saved to + * the task struct by calling the function + * fpsimd_signal_preserve_current_state(). + */ + err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET, + thread_zt_state(¤t->thread), + ZT_SIG_REGS_SIZE(1)); + + return err ? -EFAULT : 0; +} + +static int restore_zt_context(struct user_ctxs *user) +{ + int err; + struct zt_context zt; + + /* ZA must be restored first for this check to be valid */ + if (!thread_za_enabled(¤t->thread)) + return -EINVAL; + + if (__copy_from_user(&zt, user->zt, sizeof(zt))) + return -EFAULT; + + if (zt.nregs != 1) + return -EINVAL; + + if (zt.head.size != ZT_SIG_CONTEXT_SIZE(zt.nregs)) + return -EINVAL; + + /* + * Careful: we are about __copy_from_user() directly into + * thread.zt_state with preemption enabled, so protection is + * needed to prevent a racing context switch from writing stale + * registers back over the new data. + */ + + fpsimd_flush_task_state(current); + /* From now, fpsimd_thread_switch() won't touch ZT in thread state */ + + err = __copy_from_user(thread_zt_state(¤t->thread), + (char __user const *)user->zt + + ZT_SIG_REGS_OFFSET, + ZT_SIG_REGS_SIZE(1)); + if (err) + return -EFAULT; + + return 0; +} + #else /* ! CONFIG_ARM64_SME */ /* Turn any non-optimised out attempts to use these into a link error: */ extern int preserve_za_context(void __user *ctx); extern int restore_za_context(struct user_ctxs *user); +extern int preserve_zt_context(void __user *ctx); +extern int restore_zt_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SME */ @@ -474,6 +546,7 @@ static int parse_user_sigframe(struct user_ctxs *user, user->fpsimd = NULL; user->sve = NULL; user->za = NULL; + user->zt = NULL; if (!IS_ALIGNED((unsigned long)base, 16)) goto invalid; @@ -552,6 +625,19 @@ static int parse_user_sigframe(struct user_ctxs *user, user->za = (struct za_context __user *)head; break; + case ZT_MAGIC: + if (!system_supports_sme2()) + goto invalid; + + if (user->zt) + goto invalid; + + if (size < sizeof(*user->zt)) + goto invalid; + + user->zt = (struct zt_context __user *)head; + break; + case EXTRA_MAGIC: if (have_extra_context) goto invalid; @@ -674,6 +760,9 @@ static int restore_sigframe(struct pt_regs *regs, if (err == 0 && system_supports_sme() && user.za) err = restore_za_context(&user); + if (err == 0 && system_supports_sme2() && user.zt) + err = restore_zt_context(&user); + return err; } @@ -774,6 +863,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } + if (system_supports_sme2()) { + if (add_all || thread_za_enabled(¤t->thread)) { + err = sigframe_alloc(user, &user->zt_offset, + ZT_SIG_CONTEXT_SIZE(1)); + if (err) + return err; + } + } + return sigframe_alloc_end(user); } @@ -829,6 +927,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= preserve_za_context(za_ctx); } + /* ZT state if present */ + if (system_supports_sme2() && err == 0 && user->zt_offset) { + struct zt_context __user *zt_ctx = + apply_user_offset(user, user->zt_offset); + err |= preserve_zt_context(zt_ctx); + } + if (err == 0 && user->extra_offset) { char __user *sfp = (char __user *)user->sigframe; char __user *userp = diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 02dd7e9ebd39..235775d0c825 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -143,7 +143,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fp_state.st = &vcpu->arch.ctxt.fp_regs; fp_state.sve_state = vcpu->arch.sve_state; fp_state.sve_vl = vcpu->arch.sve_max_vl; - fp_state.za_state = NULL; + fp_state.sme_state = NULL; fp_state.svcr = &vcpu->arch.svcr; fp_state.fp_type = &vcpu->arch.fp_type; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index dfeb2c51e257..37b6a1d8d8b0 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -50,6 +50,7 @@ MTE MTE_ASYMM SME SME_FA64 +SME2 SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 184e58fd5631..13c87d8a42f1 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -894,6 +894,7 @@ EndEnum Enum 27:24 SME 0b0000 NI 0b0001 IMP + 0b0010 SME2 EndEnum Res0 23:20 Enum 19:16 MPAM_frac @@ -975,7 +976,9 @@ Enum 63 FA64 EndEnum Res0 62:60 Enum 59:56 SMEver - 0b0000 IMP + 0b0000 SME + 0b0001 SME2 + 0b0010 SME2p1 EndEnum Enum 55:52 I16I64 0b0000 NI @@ -986,7 +989,19 @@ Enum 48 F64F64 0b0 NI 0b1 IMP EndEnum -Res0 47:40 +Enum 47:44 I16I32 + 0b0000 NI + 0b0101 IMP +EndEnum +Enum 43 B16B16 + 0b0 NI + 0b1 IMP +EndEnum +Enum 42 F16F16 + 0b0 NI + 0b1 IMP +EndEnum +Res0 41:40 Enum 39:36 I8I32 0b0000 NI 0b1111 IMP @@ -999,7 +1014,10 @@ Enum 34 B16F32 0b0 NI 0b1 IMP EndEnum -Res0 33 +Enum 33 BI32I32 + 0b0 NI + 0b1 IMP +EndEnum Enum 32 F32F32 0b0 NI 0b1 IMP @@ -1599,7 +1617,8 @@ EndSysreg SysregFields SMCR_ELx Res0 63:32 Field 31 FA64 -Res0 30:9 +Field 30 EZT0 +Res0 29:9 Raz 8:4 Field 3:0 LEN EndSysregFields |