summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorWill Deacon <will@kernel.org>2021-06-24 14:07:04 +0100
committerWill Deacon <will@kernel.org>2021-06-24 14:07:04 +0100
commit3d1bf78c7b13a0ad88e77dce94df88400f47e0dd (patch)
tree8175a9c64a9dbfb47f73dd059a9f8233a1ce14ff /arch
parenta4a49140ae84e74f5d831c2416fe4860c8ff9d34 (diff)
parentad4711f962e08eff8d6e9b03f9670b1af6ea9395 (diff)
Merge branch 'for-next/sve' into for-next/core
Optimise SVE switching for CPUs with 128-bit implementations. * for-next/sve: arm64/sve: Skip flushing Z registers with 128 bit vectors arm64/sve: Use the sve_flush macros in sve_load_from_fpsimd_state() arm64/sve: Split _sve_flush macro into separate Z and predicate flushes
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/fpsimd.h2
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h4
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S22
-rw-r--r--arch/arm64/kernel/fpsimd.c6
4 files changed, 23 insertions, 11 deletions
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 2599504674b5..c072161d5c65 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -69,7 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread)
extern void sve_save_state(void *state, u32 *pfpsr);
extern void sve_load_state(void const *state, u32 const *pfpsr,
unsigned long vq_minus_1);
-extern void sve_flush_live(void);
+extern void sve_flush_live(unsigned long vq_minus_1);
extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index a2563992d2dc..059204477ce6 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -213,8 +213,10 @@
mov v\nz\().16b, v\nz\().16b
.endm
-.macro sve_flush
+.macro sve_flush_z
_for n, 0, 31, _sve_flush_z \n
+.endm
+.macro sve_flush_p_ffr
_for n, 0, 15, _sve_pfalse \n
_sve_wrffr 0
.endm
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 3ecec60d3295..0a7a64753878 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -63,16 +63,24 @@ SYM_FUNC_END(sve_set_vq)
* and the rest zeroed. All the other SVE registers will be zeroed.
*/
SYM_FUNC_START(sve_load_from_fpsimd_state)
- sve_load_vq x1, x2, x3
- fpsimd_restore x0, 8
- _for n, 0, 15, _sve_pfalse \n
- _sve_wrffr 0
- ret
+ sve_load_vq x1, x2, x3
+ fpsimd_restore x0, 8
+ sve_flush_p_ffr
+ ret
SYM_FUNC_END(sve_load_from_fpsimd_state)
-/* Zero all SVE registers but the first 128-bits of each vector */
+/*
+ * Zero all SVE registers but the first 128-bits of each vector
+ *
+ * VQ must already be configured by caller, any further updates of VQ
+ * will need to ensure that the register state remains valid.
+ *
+ * x0 = VQ - 1
+ */
SYM_FUNC_START(sve_flush_live)
- sve_flush
+ cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
+ sve_flush_z
+1: sve_flush_p_ffr
ret
SYM_FUNC_END(sve_flush_live)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index ad3dd34a83cf..e57b23f95284 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -957,8 +957,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
* disabling the trap, otherwise update our in-memory copy.
*/
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
- sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1);
- sve_flush_live();
+ unsigned long vq_minus_one =
+ sve_vq_from_vl(current->thread.sve_vl) - 1;
+ sve_set_vq(vq_minus_one);
+ sve_flush_live(vq_minus_one);
fpsimd_bind_task_to_cpu();
} else {
fpsimd_to_sve(current);