From bc9bbb78801af1bd155cda6a30ecaa720ef2bab7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:33 +0100 Subject: arm64: Explicitly save/restore CPACR when probing SVE and SME When a CPUs onlined we first probe for supported features and propetites, and then we subsequently enable features that have been detected. This is a little problematic for SVE and SME, as some properties (e.g. vector lengths) cannot be probed while they are disabled. Due to this, the code probing for SVE properties has to enable SVE for EL1 prior to proving, and the code probing for SME properties has to enable SME for EL1 prior to probing. We never disable SVE or SME for EL1 after probing. It would be a little nicer to transiently enable SVE and SME during probing, leaving them both disabled unless explicitly enabled, as this would make it much easier to catch unintentional usage (e.g. when they are not present system-wide). This patch reworks the SVE and SME feature probing code to only transiently enable support at EL1, disabling after probing is complete. Signed-off-by: Mark Rutland Cc: Suzuki K Poulose Cc: Will Deacon Reviewed-by: Mark Brown Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kernel/fpsimd.c') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 91e44ac7150f..601b973f90ad 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1174,7 +1174,7 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE * vector length. * - * Use only if SVE is present. + * Use only if SVE is present and enabled. * This function clobbers the SVE vector length. */ u64 read_zcr_features(void) @@ -1183,7 +1183,6 @@ u64 read_zcr_features(void) * Set the maximum possible VL, and write zeroes to all other * bits to see if they stick. */ - sve_kernel_enable(NULL); write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); /* Return LEN value that would be written to get the maximum VL */ @@ -1337,13 +1336,11 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) * Read the pseudo-SMCR used by cpufeatures to identify the supported * vector length. * - * Use only if SME is present. + * Use only if SME is present and enabled. * This function clobbers the SME vector length. */ u64 read_smcr_features(void) { - sme_kernel_enable(NULL); - /* * Set the maximum possible VL. */ -- cgit From 907722917002ed06b8f75e119b83842d5f63b15e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:34 +0100 Subject: arm64: Use build-time assertions for cpucap ordering Both sme2_kernel_enable() and fa64_kernel_enable() need to run after sme_kernel_enable(). This happens to be true today as ARM64_SME has a lower index than either ARM64_SME2 or ARM64_SME_FA64, and both functions have a comment to this effect. It would be nicer to have a build-time assertion like we for for can_use_gic_priorities() and has_gic_prio_relaxed_sync(), as that way it will be harder to miss any potential breakage. This patch replaces the comments with build-time assertions. Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/arm64/kernel/fpsimd.c') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 601b973f90ad..48338cf8f90b 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1310,23 +1310,21 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) isb(); } -/* - * This must be called after sme_kernel_enable(), we rely on the - * feature table being sorted to ensure this. - */ void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) { + /* This must be enabled after SME */ + BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME); + /* Allow use of ZT0 */ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK, SYS_SMCR_EL1); } -/* - * This must be called after sme_kernel_enable(), we rely on the - * feature table being sorted to ensure this. - */ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) { + /* This must be enabled after SME */ + BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME); + /* Allow use of FA64 */ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK, SYS_SMCR_EL1); -- cgit From 14567ba42c5747f50584eb463ac8029f2a30e0d1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:35 +0100 Subject: arm64: Rename SVE/SME cpu_enable functions The arm64_cpu_capabilities::cpu_enable() callbacks for SVE, SME, SME2, and FA64 are named with an unusual "${feature}_kernel_enable" pattern rather than the much more common "cpu_enable_${feature}". Now that we only use these as cpu_enable() callbacks, it would be nice to have them match the usual scheme. This patch renames the cpu_enable() callbacks to match this scheme. At the same time, the comment above cpu_enable_sve() is removed for consistency with the other cpu_enable() callbacks. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/arm64/kernel/fpsimd.c') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 48338cf8f90b..45ea9cabbaa4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1160,11 +1160,7 @@ fail: panic("Cannot allocate percpu memory for EFI SVE save/restore"); } -/* - * Enable SVE for EL1. - * Intended for use by the cpufeatures code during CPU boot. - */ -void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p) { write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1); isb(); @@ -1295,7 +1291,7 @@ static void sme_free(struct task_struct *task) task->thread.sme_state = NULL; } -void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p) { /* Set priority for all PEs to architecturally defined minimum */ write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK, @@ -1310,7 +1306,7 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) isb(); } -void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p) { /* This must be enabled after SME */ BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME); @@ -1320,7 +1316,7 @@ void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) SYS_SMCR_EL1); } -void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p) { /* This must be enabled after SME */ BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME); -- cgit From 34f66c4c4d5518c11bfb7d10defff8f814c9f28a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:36 +0100 Subject: arm64: Use a positive cpucap for FP/SIMD Currently we have a negative cpucap which describes the *absence* of FP/SIMD rather than *presence* of FP/SIMD. This largely works, but is somewhat awkward relative to other cpucaps that describe the presence of a feature, and it would be nicer to have a cpucap which describes the presence of FP/SIMD: * This will allow the cpucap to be treated as a standard ARM64_CPUCAP_SYSTEM_FEATURE, which can be detected with the standard has_cpuid_feature() function and ARM64_CPUID_FIELDS() description. * This ensures that the cpucap will only transition from not-present to present, reducing the risk of unintentional and/or unsafe usage of FP/SIMD before cpucaps are finalized. * This will allow using arm64_cpu_capabilities::cpu_enable() to enable the use of FP/SIMD later, with FP/SIMD being disabled at boot time otherwise. This will ensure that any unintentional and/or unsafe usage of FP/SIMD prior to this is trapped, and will ensure that FP/SIMD is never unintentionally enabled for userspace in mismatched big.LITTLE systems. This patch replaces the negative ARM64_HAS_NO_FPSIMD cpucap with a positive ARM64_HAS_FPSIMD cpucap, making changes as described above. Note that as FP/SIMD will now be trapped when not supported system-wide, do_fpsimd_acc() must handle these traps in the same way as for SVE and SME. The commentary in fpsimd_restore_current_state() is updated to describe the new scheme. No users of system_supports_fpsimd() need to know that FP/SIMD is available prior to alternatives being patched, so this is updated to use alternative_has_cap_likely() to check for the ARM64_HAS_FPSIMD cpucap, without generating code to test the system_cpucaps bitmap. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kernel/fpsimd.c') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 45ea9cabbaa4..d286bcfc3f41 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1520,8 +1520,17 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) */ void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs) { - /* TODO: implement lazy context saving/restoring */ - WARN_ON(1); + /* Even if we chose not to use FPSIMD, the hardware could still trap: */ + if (!system_supports_fpsimd()) { + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + return; + } + + /* + * When FPSIMD is enabled, we should never take a trap unless something + * has gone very wrong. + */ + BUG(); } /* @@ -1762,13 +1771,23 @@ void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state) void fpsimd_restore_current_state(void) { /* - * For the tasks that were created before we detected the absence of - * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), - * e.g, init. This could be then inherited by the children processes. - * If we later detect that the system doesn't support FP/SIMD, - * we must clear the flag for all the tasks to indicate that the - * FPSTATE is clean (as we can't have one) to avoid looping for ever in - * do_notify_resume(). + * TIF_FOREIGN_FPSTATE is set on the init task and copied by + * arch_dup_task_struct() regardless of whether FP/SIMD is detected. + * Thus user threads can have this set even when FP/SIMD hasn't been + * detected. + * + * When FP/SIMD is detected, begin_new_exec() will set + * TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(), + * and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when + * switching tasks. We detect FP/SIMD before we exec the first user + * process, ensuring this has TIF_FOREIGN_FPSTATE set and + * do_notify_resume() will call fpsimd_restore_current_state() to + * install the user FP/SIMD context. + * + * When FP/SIMD is not detected, nothing else will clear or set + * TIF_FOREIGN_FPSTATE prior to the first return to userspace, and + * we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume() + * looping forever calling fpsimd_restore_current_state(). */ if (!system_supports_fpsimd()) { clear_thread_flag(TIF_FOREIGN_FPSTATE); @@ -2101,6 +2120,13 @@ static inline void fpsimd_hotplug_init(void) static inline void fpsimd_hotplug_init(void) { } #endif +void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p) +{ + unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN; + write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1); + isb(); +} + /* * FP/SIMD support code initialisation. */ -- cgit From a76521d160284a1edcb866e6fc6727d9f10ccb68 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:52 +0100 Subject: arm64: Avoid cpus_have_const_cap() for ARM64_{SVE,SME,SME2,FA64} In system_supports_{sve,sme,sme2,fa64}() we use cpus_have_const_cap() to check for the relevant cpucaps, but this is only necessary so that sve_setup() and sme_setup() can run prior to alternatives being patched, and otherwise alternative_has_cap_*() would be preferable. For historical reasons, cpus_have_const_cap() is more complicated than it needs to be. Before cpucaps are finalized, it will perform a bitmap test of the system_cpucaps bitmap, and once cpucaps are finalized it will use an alternative branch. This used to be necessary to handle some race conditions in the window between cpucap detection and the subsequent patching of alternatives and static branches, where different branches could be out-of-sync with one another (or w.r.t. alternative sequences). Now that we use alternative branches instead of static branches, these are all patched atomically w.r.t. one another, and there are only a handful of cases that need special care in the window between cpucap detection and alternative patching. Due to the above, it would be nice to remove cpus_have_const_cap(), and migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(), or cpus_have_cap() depending on when their requirements. This will remove redundant instructions and improve code generation, and will make it easier to determine how each callsite will behave before, during, and after alternative patching. All of system_supports_{sve,sme,sme2,fa64}() will return false prior to system cpucaps being detected. In the window between system cpucaps being detected and patching alternatives, we need system_supports_sve() and system_supports_sme() to run to initialize SVE and SME properties, but all other users of system_supports_{sve,sme,sme2,fa64}() don't depend on the relevant cpucap becoming true until alternatives are patched: * No KVM code runs until after alternatives are patched, and so this can safely use cpus_have_final_cap() or alternative_has_cap_*(). * The cpuid_cpu_online() callback in arch/arm64/kernel/cpuinfo.c is registered later from cpuinfo_regs_init() as a device_initcall, and so this can safely use cpus_have_final_cap() or alternative_has_cap_*(). * The entry, signal, and ptrace code isn't reachable until userspace has run, and so this can safely use cpus_have_final_cap() or alternative_has_cap_*(). * Currently perf_reg_validate() will un-reserve the PERF_REG_ARM64_VG pseudo-register before alternatives are patched, and before sve_setup() has run. If a sampling event is created early enough, this would allow perf_ext_reg_value() to sample (the as-yet uninitialized) thread_struct::vl[] prior to alternatives being patched. It would be preferable to defer this until alternatives are patched, and this can safely use alternative_has_cap_*(). * The context-switch code will run during this window as part of stop_machine() used during alternatives_patch_all(), and potentially for other work if other kernel threads are created early. No threads require the use of SVE/SME/SME2/FA64 prior to alternatives being patched, and it would be preferable for the related context-switch logic to take effect after alternatives are patched so that ths is guaranteed to see a consistent system-wide state (e.g. anything initialized by sve_setup() and sme_setup(). This can safely ues alternative_has_cap_*(). This patch replaces the use of cpus_have_const_cap() with alternative_has_cap_unlikely(), which will avoid generating code to test the system_cpucaps bitmap and should be better for all subsequent calls at runtime. The sve_setup() and sme_setup() functions are modified to use cpus_have_cap() directly so that they can observe the cpucaps being set prior to alternatives being patched. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel/fpsimd.c') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index d286bcfc3f41..d0d28bc069d2 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1192,7 +1192,7 @@ void __init sve_setup(void) DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); unsigned long b; - if (!system_supports_sve()) + if (!cpus_have_cap(ARM64_SVE)) return; /* @@ -1351,7 +1351,7 @@ void __init sme_setup(void) u64 smcr; int min_bit; - if (!system_supports_sme()) + if (!cpus_have_cap(ARM64_SME)) return; /* -- cgit