summaryrefslogtreecommitdiff
path: root/arch/arm/vfp
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/vfp')
-rw-r--r--arch/arm/vfp/Makefile4
-rw-r--r--arch/arm/vfp/entry.S56
-rw-r--r--arch/arm/vfp/vfp.h1
-rw-r--r--arch/arm/vfp/vfphw.S236
-rw-r--r--arch/arm/vfp/vfpinstr.h27
-rw-r--r--arch/arm/vfp/vfpmodule.c326
6 files changed, 336 insertions, 314 deletions
diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile
index 9975b63ac3b0..dfd64bc2b2fb 100644
--- a/arch/arm/vfp/Makefile
+++ b/arch/arm/vfp/Makefile
@@ -8,6 +8,4 @@
# ccflags-y := -DDEBUG
# asflags-y := -DDEBUG
-KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp -mfloat-abi=soft)
-
-obj-y += vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o
+obj-y += vfpmodule.o vfphw.o vfpsingle.o vfpdouble.o
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
deleted file mode 100644
index 0186cf9da890..000000000000
--- a/arch/arm/vfp/entry.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/arm/vfp/entry.S
- *
- * Copyright (C) 2004 ARM Limited.
- * Written by Deep Blue Solutions Limited.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/vfpmacros.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-
-@ VFP entry point.
-@
-@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@ r2 = PC value to resume execution after successful emulation
-@ r9 = normal "successful" return address
-@ r10 = this threads thread_info structure
-@ lr = unrecognised instruction return address
-@ IRQs enabled.
-@
-ENTRY(do_vfp)
- inc_preempt_count r10, r4
- ldr r4, .LCvfp
- ldr r11, [r10, #TI_CPU] @ CPU number
- add r10, r10, #TI_VFPSTATE @ r10 = workspace
- ldr pc, [r4] @ call VFP entry point
-ENDPROC(do_vfp)
-
-ENTRY(vfp_null_entry)
- dec_preempt_count_ti r10, r4
- ret lr
-ENDPROC(vfp_null_entry)
-
- .align 2
-.LCvfp:
- .word vfp_vector
-
-@ This code is called if the VFP does not exist. It needs to flag the
-@ failure to the VFP initialisation code.
-
- __INIT
-ENTRY(vfp_testing_entry)
- dec_preempt_count_ti r10, r4
- ldr r0, VFP_arch_address
- str r0, [r0] @ set to non-zero value
- ret r9 @ we have handled the fault
-ENDPROC(vfp_testing_entry)
-
- .align 2
-VFP_arch_address:
- .word VFP_arch
-
- __FINIT
diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h
index 5cd6d5053271..e43a630f8a16 100644
--- a/arch/arm/vfp/vfp.h
+++ b/arch/arm/vfp/vfp.h
@@ -375,3 +375,4 @@ struct op {
};
asmlinkage void vfp_save_state(void *location, u32 fpexc);
+asmlinkage u32 vfp_load_state(const void *location);
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index b2e560290860..d5a03f3c10c5 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -4,12 +4,6 @@
*
* Copyright (C) 2004 ARM Limited.
* Written by Deep Blue Solutions Limited.
- *
- * This code is called from the kernel's undefined instruction trap.
- * r9 holds the return address for successful handling.
- * lr holds the return address for unrecognised instructions.
- * r10 points at the start of the private FP workspace in the thread structure
- * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h)
*/
#include <linux/init.h>
#include <linux/linkage.h>
@@ -19,43 +13,12 @@
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
- .macro DBGSTR, str
-#ifdef DEBUG
- stmfd sp!, {r0-r3, ip, lr}
- ldr r0, =1f
- bl printk
- ldmfd sp!, {r0-r3, ip, lr}
-
- .pushsection .rodata, "a"
-1: .ascii KERN_DEBUG "VFP: \str\n"
- .byte 0
- .previous
-#endif
- .endm
-
.macro DBGSTR1, str, arg
#ifdef DEBUG
stmfd sp!, {r0-r3, ip, lr}
mov r1, \arg
ldr r0, =1f
- bl printk
- ldmfd sp!, {r0-r3, ip, lr}
-
- .pushsection .rodata, "a"
-1: .ascii KERN_DEBUG "VFP: \str\n"
- .byte 0
- .previous
-#endif
- .endm
-
- .macro DBGSTR3, str, arg1, arg2, arg3
-#ifdef DEBUG
- stmfd sp!, {r0-r3, ip, lr}
- mov r3, \arg3
- mov r2, \arg2
- mov r1, \arg1
- ldr r0, =1f
- bl printk
+ bl _printk
ldmfd sp!, {r0-r3, ip, lr}
.pushsection .rodata, "a"
@@ -65,162 +28,25 @@
#endif
.endm
-
-@ VFP hardware support entry point.
-@
-@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@ r2 = PC value to resume execution after successful emulation
-@ r9 = normal "successful" return address
-@ r10 = vfp_state union
-@ r11 = CPU number
-@ lr = unrecognised instruction return address
-@ IRQs enabled.
-ENTRY(vfp_support_entry)
- DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10
-
- ldr r3, [sp, #S_PSR] @ Neither lazy restore nor FP exceptions
- and r3, r3, #MODE_MASK @ are supported in kernel mode
- teq r3, #USR_MODE
- bne vfp_kmode_exception @ Returns through lr
-
- VFPFMRX r1, FPEXC @ Is the VFP enabled?
- DBGSTR1 "fpexc %08x", r1
- tst r1, #FPEXC_EN
- bne look_for_VFP_exceptions @ VFP is already enabled
-
- DBGSTR1 "enable %x", r10
- ldr r3, vfp_current_hw_state_address
- orr r1, r1, #FPEXC_EN @ user FPEXC has the enable bit set
- ldr r4, [r3, r11, lsl #2] @ vfp_current_hw_state pointer
- bic r5, r1, #FPEXC_EX @ make sure exceptions are disabled
- cmp r4, r10 @ this thread owns the hw context?
-#ifndef CONFIG_SMP
- @ For UP, checking that this thread owns the hw context is
- @ sufficient to determine that the hardware state is valid.
- beq vfp_hw_state_valid
-
- @ On UP, we lazily save the VFP context. As a different
- @ thread wants ownership of the VFP hardware, save the old
- @ state if there was a previous (valid) owner.
-
- VFPFMXR FPEXC, r5 @ enable VFP, disable any pending
- @ exceptions, so we can get at the
- @ rest of it
-
- DBGSTR1 "save old state %p", r4
- cmp r4, #0 @ if the vfp_current_hw_state is NULL
- beq vfp_reload_hw @ then the hw state needs reloading
- VFPFSTMIA r4, r5 @ save the working registers
- VFPFMRX r5, FPSCR @ current status
-#ifndef CONFIG_CPU_FEROCEON
- tst r1, #FPEXC_EX @ is there additional state to save?
- beq 1f
- VFPFMRX r6, FPINST @ FPINST (only if FPEXC.EX is set)
- tst r1, #FPEXC_FP2V @ is there an FPINST2 to read?
- beq 1f
- VFPFMRX r8, FPINST2 @ FPINST2 if needed (and present)
-1:
-#endif
- stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2
-vfp_reload_hw:
-
-#else
- @ For SMP, if this thread does not own the hw context, then we
- @ need to reload it. No need to save the old state as on SMP,
- @ we always save the state when we switch away from a thread.
- bne vfp_reload_hw
-
- @ This thread has ownership of the current hardware context.
- @ However, it may have been migrated to another CPU, in which
- @ case the saved state is newer than the hardware context.
- @ Check this by looking at the CPU number which the state was
- @ last loaded onto.
- ldr ip, [r10, #VFP_CPU]
- teq ip, r11
- beq vfp_hw_state_valid
-
-vfp_reload_hw:
- @ We're loading this threads state into the VFP hardware. Update
- @ the CPU number which contains the most up to date VFP context.
- str r11, [r10, #VFP_CPU]
-
- VFPFMXR FPEXC, r5 @ enable VFP, disable any pending
- @ exceptions, so we can get at the
- @ rest of it
-#endif
-
- DBGSTR1 "load state %p", r10
- str r10, [r3, r11, lsl #2] @ update the vfp_current_hw_state pointer
+ENTRY(vfp_load_state)
+ @ Load the current VFP state
+ @ r0 - load location
+ @ returns FPEXC
+ DBGSTR1 "load VFP state %p", r0
@ Load the saved state back into the VFP
- VFPFLDMIA r10, r5 @ reload the working registers while
+ VFPFLDMIA r0, r1 @ reload the working registers while
@ FPEXC is in a safe state
- ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2
-#ifndef CONFIG_CPU_FEROCEON
- tst r1, #FPEXC_EX @ is there additional state to restore?
+ ldmia r0, {r0-r3} @ load FPEXC, FPSCR, FPINST, FPINST2
+ tst r0, #FPEXC_EX @ is there additional state to restore?
beq 1f
- VFPFMXR FPINST, r6 @ restore FPINST (only if FPEXC.EX is set)
- tst r1, #FPEXC_FP2V @ is there an FPINST2 to write?
+ VFPFMXR FPINST, r2 @ restore FPINST (only if FPEXC.EX is set)
+ tst r0, #FPEXC_FP2V @ is there an FPINST2 to write?
beq 1f
- VFPFMXR FPINST2, r8 @ FPINST2 if needed (and present)
+ VFPFMXR FPINST2, r3 @ FPINST2 if needed (and present)
1:
-#endif
- VFPFMXR FPSCR, r5 @ restore status
-
-@ The context stored in the VFP hardware is up to date with this thread
-vfp_hw_state_valid:
- tst r1, #FPEXC_EX
- bne process_exception @ might as well handle the pending
- @ exception before retrying branch
- @ out before setting an FPEXC that
- @ stops us reading stuff
- VFPFMXR FPEXC, r1 @ Restore FPEXC last
- sub r2, r2, #4 @ Retry current instruction - if Thumb
- str r2, [sp, #S_PC] @ mode it's two 16-bit instructions,
- @ else it's one 32-bit instruction, so
- @ always subtract 4 from the following
- @ instruction address.
- dec_preempt_count_ti r10, r4
- ret r9 @ we think we have handled things
-
-
-look_for_VFP_exceptions:
- @ Check for synchronous or asynchronous exception
- tst r1, #FPEXC_EX | FPEXC_DEX
- bne process_exception
- @ On some implementations of the VFP subarch 1, setting FPSCR.IXE
- @ causes all the CDP instructions to be bounced synchronously without
- @ setting the FPEXC.EX bit
- VFPFMRX r5, FPSCR
- tst r5, #FPSCR_IXE
- bne process_exception
-
- tst r5, #FPSCR_LENGTH_MASK
- beq skip
- orr r1, r1, #FPEXC_DEX
- b process_exception
-skip:
-
- @ Fall into hand on to next handler - appropriate coproc instr
- @ not recognised by VFP
-
- DBGSTR "not VFP"
- dec_preempt_count_ti r10, r4
+ VFPFMXR FPSCR, r1 @ restore status
ret lr
-
-process_exception:
- DBGSTR "bounce"
- mov r2, sp @ nothing stacked - regdump is at TOS
- mov lr, r9 @ setup for a return to the user code.
-
- @ Now call the C code to package up the bounce to the support code
- @ r0 holds the trigger instruction
- @ r1 holds the FPEXC value
- @ r2 pointer to register dump
- b VFP_bounce @ we have handled this - the support
- @ code will raise an exception if
- @ required. If not, the user code will
- @ retry the faulted instruction
-ENDPROC(vfp_support_entry)
+ENDPROC(vfp_load_state)
ENTRY(vfp_save_state)
@ Save the current VFP state
@@ -240,10 +66,6 @@ ENTRY(vfp_save_state)
ret lr
ENDPROC(vfp_save_state)
- .align
-vfp_current_hw_state_address:
- .word vfp_current_hw_state
-
.macro tbl_branch, base, tmp, shift
#ifdef CONFIG_THUMB2_KERNEL
adr \tmp, 1f
@@ -258,11 +80,14 @@ vfp_current_hw_state_address:
ENTRY(vfp_get_float)
tbl_branch r0, r3, #3
+ .fpu vfpv2
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1: mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0
+1: vmov r0, s\dr
ret lr
.org 1b + 8
-1: mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1
+ .endr
+ .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+1: vmov r0, s\dr
ret lr
.org 1b + 8
.endr
@@ -270,11 +95,14 @@ ENDPROC(vfp_get_float)
ENTRY(vfp_put_float)
tbl_branch r1, r3, #3
+ .fpu vfpv2
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1: mcr p10, 0, r0, c\dr, c0, 0 @ fmsr r0, s0
+1: vmov s\dr, r0
ret lr
.org 1b + 8
-1: mcr p10, 0, r0, c\dr, c0, 4 @ fmsr r0, s1
+ .endr
+ .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+1: vmov s\dr, r0
ret lr
.org 1b + 8
.endr
@@ -282,15 +110,17 @@ ENDPROC(vfp_put_float)
ENTRY(vfp_get_double)
tbl_branch r0, r3, #3
+ .fpu vfpv2
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1: fmrrd r0, r1, d\dr
+1: vmov r0, r1, d\dr
ret lr
.org 1b + 8
.endr
#ifdef CONFIG_VFPv3
@ d16 - d31 registers
- .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1: mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr
+ .fpu vfpv3
+ .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+1: vmov r0, r1, d\dr
ret lr
.org 1b + 8
.endr
@@ -304,15 +134,17 @@ ENDPROC(vfp_get_double)
ENTRY(vfp_put_double)
tbl_branch r2, r3, #3
+ .fpu vfpv2
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1: fmdrr d\dr, r0, r1
+1: vmov d\dr, r0, r1
ret lr
.org 1b + 8
.endr
#ifdef CONFIG_VFPv3
+ .fpu vfpv3
@ d16 - d31 registers
- .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1: mcrr p11, 3, r0, r1, c\dr @ fmdrr r0, r1, d\dr
+ .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+1: vmov d\dr, r0, r1
ret lr
.org 1b + 8
.endr
diff --git a/arch/arm/vfp/vfpinstr.h b/arch/arm/vfp/vfpinstr.h
index 38dc154e39ff..a2f0c47e0ce7 100644
--- a/arch/arm/vfp/vfpinstr.h
+++ b/arch/arm/vfp/vfpinstr.h
@@ -62,22 +62,19 @@
#define FPSCR_C (1 << 29)
#define FPSCR_V (1 << 28)
-/*
- * Since we aren't building with -mfpu=vfp, we need to code
- * these instructions using their MRC/MCR equivalents.
- */
-#define vfpreg(_vfp_) #_vfp_
-
-#define fmrx(_vfp_) ({ \
- u32 __v; \
- asm("mrc p10, 7, %0, " vfpreg(_vfp_) ", cr0, 0 @ fmrx %0, " #_vfp_ \
- : "=r" (__v) : : "cc"); \
- __v; \
- })
+#define fmrx(_vfp_) ({ \
+ u32 __v; \
+ asm volatile (".fpu vfpv2\n" \
+ "vmrs %0, " #_vfp_ \
+ : "=r" (__v) : : "cc"); \
+ __v; \
+})
-#define fmxr(_vfp_,_var_) \
- asm("mcr p10, 7, %0, " vfpreg(_vfp_) ", cr0, 0 @ fmxr " #_vfp_ ", %0" \
- : : "r" (_var_) : "cc")
+#define fmxr(_vfp_, _var_) ({ \
+ asm volatile (".fpu vfpv2\n" \
+ "vmsr " #_vfp_ ", %0" \
+ : : "r" (_var_) : "cc"); \
+})
u32 vfp_single_cpdo(u32 inst, u32 fpscr);
u32 vfp_single_cprt(u32 inst, u32 fpscr, struct pt_regs *regs);
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 8c9e7f9f0277..e559ad3cd148 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -18,31 +18,31 @@
#include <linux/uaccess.h>
#include <linux/user.h>
#include <linux/export.h>
+#include <linux/perf_event.h>
#include <asm/cp15.h>
#include <asm/cputype.h>
#include <asm/system_info.h>
#include <asm/thread_notify.h>
+#include <asm/traps.h>
#include <asm/vfp.h>
+#include <asm/neon.h>
#include "vfpinstr.h"
#include "vfp.h"
-/*
- * Our undef handlers (in entry.S)
- */
-asmlinkage void vfp_testing_entry(void);
-asmlinkage void vfp_support_entry(void);
-asmlinkage void vfp_null_entry(void);
-
-asmlinkage void (*vfp_vector)(void) = vfp_null_entry;
+static bool have_vfp __ro_after_init;
/*
* Dual-use variable.
* Used in startup: set to non-zero if VFP checks fail
* After startup, holds VFP architecture
*/
-unsigned int VFP_arch;
+static unsigned int VFP_arch;
+
+#ifdef CONFIG_CPU_FEROCEON
+extern unsigned int VFP_arch_feroceon __alias(VFP_arch);
+#endif
/*
* The pointer to the vfpstate structure of the thread which currently
@@ -56,6 +56,34 @@ unsigned int VFP_arch;
union vfp_state *vfp_current_hw_state[NR_CPUS];
/*
+ * Claim ownership of the VFP unit.
+ *
+ * The caller may change VFP registers until vfp_state_release() is called.
+ *
+ * local_bh_disable() is used to disable preemption and to disable VFP
+ * processing in softirq context. On PREEMPT_RT kernels local_bh_disable() is
+ * not sufficient because it only serializes soft interrupt related sections
+ * via a local lock, but stays preemptible. Disabling preemption is the right
+ * choice here as bottom half processing is always in thread context on RT
+ * kernels so it implicitly prevents bottom half processing as well.
+ */
+static void vfp_state_hold(void)
+{
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_disable();
+ else
+ preempt_disable();
+}
+
+static void vfp_state_release(void)
+{
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_enable();
+ else
+ preempt_enable();
+}
+
+/*
* Is 'thread's most up to date state stored in this CPUs hardware?
* Must be called from non-preemptible context.
*/
@@ -140,7 +168,7 @@ static void vfp_thread_copy(struct thread_info *thread)
/*
* When this function is called with the following 'cmd's, the following
* is true while this function is being run:
- * THREAD_NOFTIFY_SWTICH:
+ * THREAD_NOTIFY_SWITCH:
* - the previously running thread will not be scheduled onto another CPU.
* - the next thread to be run (v) will not be running on another CPU.
* - thread->cpu is the local CPU number
@@ -240,7 +268,7 @@ static void vfp_panic(char *reason, u32 inst)
/*
* Process bitmask of exception conditions.
*/
-static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs)
+static int vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr)
{
int si_code = 0;
@@ -248,8 +276,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
if (exceptions == VFP_EXCEPTION_ERROR) {
vfp_panic("unhandled bounce", inst);
- vfp_raise_sigfpe(FPE_FLTINV, regs);
- return;
+ return FPE_FLTINV;
}
/*
@@ -277,8 +304,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF);
RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV);
- if (si_code)
- vfp_raise_sigfpe(si_code, regs);
+ return si_code;
}
/*
@@ -314,15 +340,18 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
* emulate it.
*/
}
+ perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
return exceptions & ~VFP_NAN_FLAG;
}
/*
* Package up a bounce condition.
*/
-void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
+static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
{
u32 fpscr, orig_fpscr, fpsid, exceptions;
+ int si_code2 = 0;
+ int si_code = 0;
pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
@@ -356,21 +385,19 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
}
if (fpexc & FPEXC_EX) {
-#ifndef CONFIG_CPU_FEROCEON
/*
* Asynchronous exception. The instruction is read from FPINST
* and the interrupted instruction has to be restarted.
*/
trigger = fmrx(FPINST);
regs->ARM_pc -= 4;
-#endif
} else if (!(fpexc & FPEXC_DEX)) {
/*
* Illegal combination of bits. It can be caused by an
* unallocated VFP instruction but with FPSCR.IXE set and not
* on VFP subarch 1.
*/
- vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
+ si_code = vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr);
goto exit;
}
@@ -395,7 +422,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
*/
exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
if (exceptions)
- vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
+ si_code2 = vfp_raise_exceptions(exceptions, trigger, orig_fpscr);
/*
* If there isn't a second FP instruction, exit now. Note that
@@ -414,9 +441,13 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
emulate:
exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
if (exceptions)
- vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
- exit:
- preempt_enable();
+ si_code = vfp_raise_exceptions(exceptions, trigger, orig_fpscr);
+exit:
+ vfp_state_release();
+ if (si_code2)
+ vfp_raise_sigfpe(si_code2, regs);
+ if (si_code)
+ vfp_raise_sigfpe(si_code, regs);
}
static void vfp_enable(void *unused)
@@ -436,7 +467,7 @@ static void vfp_enable(void *unused)
* present on all CPUs within a SMP complex. Needs to be called prior to
* vfp_init().
*/
-void vfp_disable(void)
+void __init vfp_disable(void)
{
if (VFP_arch) {
pr_debug("%s: should be called prior to vfp_init\n", __func__);
@@ -515,9 +546,9 @@ static inline void vfp_pm_init(void) { }
*/
void vfp_sync_hwstate(struct thread_info *thread)
{
- unsigned int cpu = get_cpu();
+ vfp_state_hold();
- if (vfp_state_in_hw(cpu, thread)) {
+ if (vfp_state_in_hw(raw_smp_processor_id(), thread)) {
u32 fpexc = fmrx(FPEXC);
/*
@@ -528,7 +559,7 @@ void vfp_sync_hwstate(struct thread_info *thread)
fmxr(FPEXC, fpexc);
}
- put_cpu();
+ vfp_state_release();
}
/* Ensure that the thread reloads the hardware VFP state on the next use. */
@@ -642,7 +673,7 @@ static int vfp_starting_cpu(unsigned int unused)
return 0;
}
-void vfp_kmode_exception(void)
+static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
{
/*
* If we reach this point, a floating point exception has been raised
@@ -660,8 +691,173 @@ void vfp_kmode_exception(void)
pr_crit("BUG: unsupported FP instruction in kernel mode\n");
else
pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n");
+ pr_crit("FPEXC == 0x%08x\n", fmrx(FPEXC));
+ return 1;
+}
+
+/*
+ * vfp_support_entry - Handle VFP exception
+ *
+ * @regs: pt_regs structure holding the register state at exception entry
+ * @trigger: The opcode of the instruction that triggered the exception
+ *
+ * Returns 0 if the exception was handled, or an error code otherwise.
+ */
+static int vfp_support_entry(struct pt_regs *regs, u32 trigger)
+{
+ struct thread_info *ti = current_thread_info();
+ u32 fpexc;
+
+ if (unlikely(!have_vfp))
+ return -ENODEV;
+
+ if (!user_mode(regs))
+ return vfp_kmode_exception(regs, trigger);
+
+ vfp_state_hold();
+ fpexc = fmrx(FPEXC);
+
+ /*
+ * If the VFP unit was not enabled yet, we have to check whether the
+ * VFP state in the CPU's registers is the most recent VFP state
+ * associated with the process. On UP systems, we don't save the VFP
+ * state eagerly on a context switch, so we may need to save the
+ * VFP state to memory first, as it may belong to another process.
+ */
+ if (!(fpexc & FPEXC_EN)) {
+ /*
+ * Enable the VFP unit but mask the FP exception flag for the
+ * time being, so we can access all the registers.
+ */
+ fpexc |= FPEXC_EN;
+ fmxr(FPEXC, fpexc & ~FPEXC_EX);
+
+ /*
+ * Check whether or not the VFP state in the CPU's registers is
+ * the most recent VFP state associated with this task. On SMP,
+ * migration may result in multiple CPUs holding VFP states
+ * that belong to the same task, but only the most recent one
+ * is valid.
+ */
+ if (!vfp_state_in_hw(ti->cpu, ti)) {
+ if (!IS_ENABLED(CONFIG_SMP) &&
+ vfp_current_hw_state[ti->cpu] != NULL) {
+ /*
+ * This CPU is currently holding the most
+ * recent VFP state associated with another
+ * task, and we must save that to memory first.
+ */
+ vfp_save_state(vfp_current_hw_state[ti->cpu],
+ fpexc);
+ }
+
+ /*
+ * We can now proceed with loading the task's VFP state
+ * from memory into the CPU registers.
+ */
+ fpexc = vfp_load_state(&ti->vfpstate);
+ vfp_current_hw_state[ti->cpu] = &ti->vfpstate;
+#ifdef CONFIG_SMP
+ /*
+ * Record that this CPU is now the one holding the most
+ * recent VFP state of the task.
+ */
+ ti->vfpstate.hard.cpu = ti->cpu;
+#endif
+ }
+
+ if (fpexc & FPEXC_EX)
+ /*
+ * Might as well handle the pending exception before
+ * retrying branch out before setting an FPEXC that
+ * stops us reading stuff.
+ */
+ goto bounce;
+
+ /*
+ * No FP exception is pending: just enable the VFP and
+ * replay the instruction that trapped.
+ */
+ fmxr(FPEXC, fpexc);
+ vfp_state_release();
+ } else {
+ /* Check for synchronous or asynchronous exceptions */
+ if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
+ u32 fpscr = fmrx(FPSCR);
+
+ /*
+ * On some implementations of the VFP subarch 1,
+ * setting FPSCR.IXE causes all the CDP instructions to
+ * be bounced synchronously without setting the
+ * FPEXC.EX bit
+ */
+ if (!(fpscr & FPSCR_IXE)) {
+ if (!(fpscr & FPSCR_LENGTH_MASK)) {
+ pr_debug("not VFP\n");
+ vfp_state_release();
+ return -ENOEXEC;
+ }
+ fpexc |= FPEXC_DEX;
+ }
+ }
+bounce: regs->ARM_pc += 4;
+ /* VFP_bounce() will invoke vfp_state_release() */
+ VFP_bounce(trigger, fpexc, regs);
+ }
+
+ return 0;
}
+static struct undef_hook neon_support_hook[] = {{
+ .instr_mask = 0xfe000000,
+ .instr_val = 0xf2000000,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
+}, {
+ .instr_mask = 0xff100000,
+ .instr_val = 0xf4000000,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
+}, {
+ .instr_mask = 0xef000000,
+ .instr_val = 0xef000000,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = PSR_T_BIT,
+ .fn = vfp_support_entry,
+}, {
+ .instr_mask = 0xff100000,
+ .instr_val = 0xf9000000,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = PSR_T_BIT,
+ .fn = vfp_support_entry,
+}, {
+ .instr_mask = 0xff000800,
+ .instr_val = 0xfc000800,
+ .cpsr_mask = 0,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
+}, {
+ .instr_mask = 0xff000800,
+ .instr_val = 0xfd000800,
+ .cpsr_mask = 0,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
+}, {
+ .instr_mask = 0xff000800,
+ .instr_val = 0xfe000800,
+ .cpsr_mask = 0,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
+}};
+
+static struct undef_hook vfp_support_hook = {
+ .instr_mask = 0x0c000e00,
+ .instr_val = 0x0c000a00,
+ .fn = vfp_support_entry,
+};
+
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@@ -673,13 +869,16 @@ void kernel_neon_begin(void)
unsigned int cpu;
u32 fpexc;
+ vfp_state_hold();
+
/*
- * Kernel mode NEON is only allowed outside of interrupt context
- * with preemption disabled. This will make sure that the kernel
- * mode NEON register contents never need to be preserved.
+ * Kernel mode NEON is only allowed outside of hardirq context with
+ * preemption and softirq processing disabled. This will make sure that
+ * the kernel mode NEON register contents never need to be preserved.
*/
- BUG_ON(in_interrupt());
- cpu = get_cpu();
+ BUG_ON(in_hardirq());
+ BUG_ON(irqs_disabled());
+ cpu = __smp_processor_id();
fpexc = fmrx(FPEXC) | FPEXC_EN;
fmxr(FPEXC, fpexc);
@@ -702,12 +901,27 @@ void kernel_neon_end(void)
{
/* Disable the NEON/VFP unit. */
fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
- put_cpu();
+ vfp_state_release();
}
EXPORT_SYMBOL(kernel_neon_end);
#endif /* CONFIG_KERNEL_MODE_NEON */
+static int __init vfp_detect(struct pt_regs *regs, unsigned int instr)
+{
+ VFP_arch = UINT_MAX; /* mark as not present */
+ regs->ARM_pc += 4;
+ return 0;
+}
+
+static struct undef_hook vfp_detect_hook __initdata = {
+ .instr_mask = 0x0c000e00,
+ .instr_val = 0x0c000a00,
+ .cpsr_mask = MODE_MASK,
+ .cpsr_val = SVC_MODE,
+ .fn = vfp_detect,
+};
+
/*
* VFP support code initialisation.
*/
@@ -715,6 +929,7 @@ static int __init vfp_init(void)
{
unsigned int vfpsid;
unsigned int cpu_arch = cpu_architecture();
+ unsigned int isar6;
/*
* Enable the access to the VFP on all online CPUs so the
@@ -728,11 +943,11 @@ static int __init vfp_init(void)
* The handler is already setup to just log calls, so
* we just need to read the VFPSID register.
*/
- vfp_vector = vfp_testing_entry;
+ register_undef_hook(&vfp_detect_hook);
barrier();
vfpsid = fmrx(FPSID);
barrier();
- vfp_vector = vfp_null_entry;
+ unregister_undef_hook(&vfp_detect_hook);
pr_info("VFP support v0.3: ");
if (VFP_arch) {
@@ -749,8 +964,11 @@ static int __init vfp_init(void)
* for NEON if the hardware has the MVFR registers.
*/
if (IS_ENABLED(CONFIG_NEON) &&
- (fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+ (fmrx(MVFR1) & 0x000fff00) == 0x00011100) {
elf_hwcap |= HWCAP_NEON;
+ for (int i = 0; i < ARRAY_SIZE(neon_support_hook); i++)
+ register_undef_hook(&neon_support_hook[i]);
+ }
if (IS_ENABLED(CONFIG_VFPv3)) {
u32 mvfr0 = fmrx(MVFR0);
@@ -771,7 +989,38 @@ static int __init vfp_init(void)
if ((fmrx(MVFR1) & 0xf0000000) == 0x10000000)
elf_hwcap |= HWCAP_VFPv4;
+ if (((fmrx(MVFR1) & MVFR1_ASIMDHP_MASK) >> MVFR1_ASIMDHP_BIT) == 0x2)
+ elf_hwcap |= HWCAP_ASIMDHP;
+ if (((fmrx(MVFR1) & MVFR1_FPHP_MASK) >> MVFR1_FPHP_BIT) == 0x3)
+ elf_hwcap |= HWCAP_FPHP;
}
+
+ /*
+ * Check for the presence of Advanced SIMD Dot Product
+ * instructions.
+ */
+ isar6 = read_cpuid_ext(CPUID_EXT_ISAR6);
+ if (cpuid_feature_extract_field(isar6, 4) == 0x1)
+ elf_hwcap |= HWCAP_ASIMDDP;
+ /*
+ * Check for the presence of Advanced SIMD Floating point
+ * half-precision multiplication instructions.
+ */
+ if (cpuid_feature_extract_field(isar6, 8) == 0x1)
+ elf_hwcap |= HWCAP_ASIMDFHM;
+ /*
+ * Check for the presence of Advanced SIMD Bfloat16
+ * floating point instructions.
+ */
+ if (cpuid_feature_extract_field(isar6, 20) == 0x1)
+ elf_hwcap |= HWCAP_ASIMDBF16;
+ /*
+ * Check for the presence of Advanced SIMD and floating point
+ * Int8 matrix multiplication instructions instructions.
+ */
+ if (cpuid_feature_extract_field(isar6, 24) == 0x1)
+ elf_hwcap |= HWCAP_I8MM;
+
/* Extract the architecture version on pre-cpuid scheme */
} else {
if (vfpsid & FPSID_NODOUBLE) {
@@ -786,8 +1035,9 @@ static int __init vfp_init(void)
"arm/vfp:starting", vfp_starting_cpu,
vfp_dying_cpu);
- vfp_vector = vfp_support_entry;
+ have_vfp = true;
+ register_undef_hook(&vfp_support_hook);
thread_register_notifier(&vfp_notifier_block);
vfp_pm_init();