summaryrefslogtreecommitdiff
path: root/arch/arm64/include
diff options
context:
space:
mode:
authorDave Martin <Dave.Martin@arm.com>2017-08-03 17:23:23 +0100
committerCatalin Marinas <catalin.marinas@arm.com>2017-08-04 15:00:57 +0100
commitcb84d11e1625aa3a081d898ca2640bf3a9ca0e96 (patch)
tree514af7380143b4950fd3a158d5c0b0d2a90cc56b /arch/arm64/include
parent4328825d4fdc185d365d8e858cace8b324198a70 (diff)
arm64: neon: Remove support for nested or hardirq kernel-mode NEON
Support for kernel-mode NEON to be nested and/or used in hardirq context adds significant complexity, and the benefits may be marginal. In practice, kernel-mode NEON is not used in hardirq context, and is rarely used in softirq context (by certain mac80211 drivers). This patch implements an arm64 may_use_simd() function to allow clients to check whether kernel-mode NEON is usable in the current context, and simplifies kernel_neon_{begin,end}() to handle only saving of the task FPSIMD state (if any). Without nesting, there is no other state to save. The partial fpsimd save/restore functions become redundant as a result of these changes, so they are removed too. The save/restore model is changed to operate directly on task_struct without additional percpu storage. This simplifies the code and saves a bit of memory, but means that softirqs must now be disabled when manipulating the task fpsimd state from task context: correspondingly, preempt_{en,dis}sable() calls are upgraded to local_bh_{en,dis}able() as appropriate. fpsimd_thread_switch() already runs with hardirqs disabled and so is already protected from softirqs. These changes should make it easier to support kernel-mode NEON in the presence of the Scalable Vector extension in the future. Signed-off-by: Dave Martin <Dave.Martin@arm.com> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Diffstat (limited to 'arch/arm64/include')
-rw-r--r--arch/arm64/include/asm/fpsimd.h14
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h56
-rw-r--r--arch/arm64/include/asm/neon.h4
-rw-r--r--arch/arm64/include/asm/simd.h33
4 files changed, 33 insertions, 74 deletions
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 5155f21e15e3..410c48163c6a 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -41,16 +41,6 @@ struct fpsimd_state {
unsigned int cpu;
};
-/*
- * Struct for stacking the bottom 'n' FP/SIMD registers.
- */
-struct fpsimd_partial_state {
- u32 fpsr;
- u32 fpcr;
- u32 num_regs;
- __uint128_t vregs[32];
-};
-
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -77,10 +67,6 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
extern void fpsimd_flush_task_state(struct task_struct *target);
-extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
- u32 num_regs);
-extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
-
/* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void);
extern void __efi_fpsimd_end(void);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index a2daf1293028..0f5fdd388b0d 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -75,59 +75,3 @@
ldr w\tmpnr, [\state, #16 * 2 + 4]
fpsimd_restore_fpcr x\tmpnr, \state
.endm
-
-.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
- mrs x\tmpnr1, fpsr
- str w\numnr, [\state, #8]
- mrs x\tmpnr2, fpcr
- stp w\tmpnr1, w\tmpnr2, [\state]
- adr x\tmpnr1, 0f
- add \state, \state, x\numnr, lsl #4
- sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
- br x\tmpnr1
- stp q30, q31, [\state, #-16 * 30 - 16]
- stp q28, q29, [\state, #-16 * 28 - 16]
- stp q26, q27, [\state, #-16 * 26 - 16]
- stp q24, q25, [\state, #-16 * 24 - 16]
- stp q22, q23, [\state, #-16 * 22 - 16]
- stp q20, q21, [\state, #-16 * 20 - 16]
- stp q18, q19, [\state, #-16 * 18 - 16]
- stp q16, q17, [\state, #-16 * 16 - 16]
- stp q14, q15, [\state, #-16 * 14 - 16]
- stp q12, q13, [\state, #-16 * 12 - 16]
- stp q10, q11, [\state, #-16 * 10 - 16]
- stp q8, q9, [\state, #-16 * 8 - 16]
- stp q6, q7, [\state, #-16 * 6 - 16]
- stp q4, q5, [\state, #-16 * 4 - 16]
- stp q2, q3, [\state, #-16 * 2 - 16]
- stp q0, q1, [\state, #-16 * 0 - 16]
-0:
-.endm
-
-.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
- ldp w\tmpnr1, w\tmpnr2, [\state]
- msr fpsr, x\tmpnr1
- fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
- adr x\tmpnr1, 0f
- ldr w\tmpnr2, [\state, #8]
- add \state, \state, x\tmpnr2, lsl #4
- sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
- br x\tmpnr1
- ldp q30, q31, [\state, #-16 * 30 - 16]
- ldp q28, q29, [\state, #-16 * 28 - 16]
- ldp q26, q27, [\state, #-16 * 26 - 16]
- ldp q24, q25, [\state, #-16 * 24 - 16]
- ldp q22, q23, [\state, #-16 * 22 - 16]
- ldp q20, q21, [\state, #-16 * 20 - 16]
- ldp q18, q19, [\state, #-16 * 18 - 16]
- ldp q16, q17, [\state, #-16 * 16 - 16]
- ldp q14, q15, [\state, #-16 * 14 - 16]
- ldp q12, q13, [\state, #-16 * 12 - 16]
- ldp q10, q11, [\state, #-16 * 10 - 16]
- ldp q8, q9, [\state, #-16 * 8 - 16]
- ldp q6, q7, [\state, #-16 * 6 - 16]
- ldp q4, q5, [\state, #-16 * 4 - 16]
- ldp q2, q3, [\state, #-16 * 2 - 16]
- ldp q0, q1, [\state, #-16 * 0 - 16]
-0:
-.endm
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index 5368bd04fe7b..fb9d137256a6 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -16,9 +16,7 @@
#define cpu_has_neon() system_supports_fpsimd()
-#define kernel_neon_begin() kernel_neon_begin_partial(32)
-
-void kernel_neon_begin_partial(u32 num_regs);
+void kernel_neon_begin(void);
void kernel_neon_end(void);
#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 96959b52afae..5a1a927b74a2 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -9,15 +9,46 @@
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
+#include <linux/compiler.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
#include <linux/types.h>
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+DECLARE_PER_CPU(bool, kernel_neon_busy);
+
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
* instructions or access the SIMD register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
*/
static __must_check inline bool may_use_simd(void)
{
- return true;
+ /*
+ * The raw_cpu_read() is racy if called with preemption enabled.
+ * This is not a bug: kernel_neon_busy is only set when
+ * preemption is disabled, so we cannot migrate to another CPU
+ * while it is set, nor can we migrate to a CPU where it is set.
+ * So, if we find it clear on some CPU then we're guaranteed to
+ * find it clear on any CPU we could migrate to.
+ *
+ * If we are in between kernel_neon_begin()...kernel_neon_end(),
+ * the flag will be set, but preemption is also disabled, so we
+ * can't migrate to another CPU and spuriously see it become
+ * false.
+ */
+ return !in_irq() && !in_nmi() && !raw_cpu_read(kernel_neon_busy);
}
+#else /* ! CONFIG_KERNEL_MODE_NEON */
+
+static __must_check inline bool may_use_simd(void) {
+ return false;
+}
+
+#endif /* ! CONFIG_KERNEL_MODE_NEON */
+
#endif