summaryrefslogtreecommitdiff
path: root/arch/arm
diff options
context:
space:
mode:
authorRussell King (Oracle) <rmk+kernel@armlinux.org.uk>2023-06-27 14:17:19 +0100
committerRussell King (Oracle) <rmk+kernel@armlinux.org.uk>2023-06-27 14:17:19 +0100
commit53ae158f6ddc14df5c44d62c06e33fdb66de1196 (patch)
treed0d0485f5f614e0070bab6ff1b53d56aec7c2d8e /arch/arm
parentac9a78681b921877518763ba0e89202254349d1b (diff)
parent47ba5f39eab3c2a9a1ba878159a6050f2bbfc0e2 (diff)
Merge tag 'arm-vfp-refactor-for-rmk' of git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux into devel-stabledevel-stable
Refactor VFP support code and reimplement in C The VFP related changes to permit kernel mode NEON in softirq context resulted in some issues regarding en/disabling of sofirqs from asm code, and this made it clear that it would be better to handle more of it from C code. Given that we already have infrastructure that associates undefined instruction exceptions with handler code based on value/mask pairs, we can easily move the dispatch of VFP and NEON instructions to C code once we reimplement the actual VFP support routine (which reasons about how to deal with the exception and whether any emulation is needed) in C code first. With those out of the way, we can drop the partial decoding logic in asm that reasons about which ISA is being used by user space, as the remaining cases are all 32-bit ARM only. This leaves a FPE specific routine with some iWMMXT logic that is easily duplicated in C as well, allowing us to move the FPE asm code into the FPE asm source file, and out of the shared entry code.
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/include/asm/thread_info.h17
-rw-r--r--arch/arm/kernel/asm-offsets.c1
-rw-r--r--arch/arm/kernel/entry-armv.S252
-rw-r--r--arch/arm/kernel/iwmmxt.S18
-rw-r--r--arch/arm/kernel/pj4-cp0.c1
-rw-r--r--arch/arm/kernel/process.c1
-rw-r--r--arch/arm/kernel/ptrace.c2
-rw-r--r--arch/arm/kernel/unwind.c25
-rw-r--r--arch/arm/kernel/xscale-cp0.c1
-rw-r--r--arch/arm/mach-sa1100/jornada720_ssp.c5
-rw-r--r--arch/arm/mm/proc-feroceon.S4
-rw-r--r--arch/arm/nwfpe/entry.S77
-rw-r--r--arch/arm/vfp/Makefile2
-rw-r--r--arch/arm/vfp/entry.S28
-rw-r--r--arch/arm/vfp/vfp.h1
-rw-r--r--arch/arm/vfp/vfphw.S206
-rw-r--r--arch/arm/vfp/vfpmodule.c208
17 files changed, 313 insertions, 536 deletions
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 7f092cb55a41..943ffcf069d2 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -40,6 +40,7 @@ struct task_struct;
DECLARE_PER_CPU(struct task_struct *, __entry_task);
#include <asm/types.h>
+#include <asm/traps.h>
struct cpu_context_save {
__u32 r4;
@@ -66,7 +67,6 @@ struct thread_info {
__u32 cpu_domain; /* cpu domain */
struct cpu_context_save cpu_context; /* cpu context */
__u32 abi_syscall; /* ABI type and syscall nr */
- __u8 used_cp[16]; /* thread used copro */
unsigned long tp_value[2]; /* TLS registers */
union fp_state fpstate __attribute__((aligned(8)));
union vfp_state vfpstate;
@@ -105,6 +105,21 @@ extern void iwmmxt_task_restore(struct thread_info *, void *);
extern void iwmmxt_task_release(struct thread_info *);
extern void iwmmxt_task_switch(struct thread_info *);
+extern int iwmmxt_undef_handler(struct pt_regs *, u32);
+
+static inline void register_iwmmxt_undef_handler(void)
+{
+ static struct undef_hook iwmmxt_undef_hook = {
+ .instr_mask = 0x0c000e00,
+ .instr_val = 0x0c000000,
+ .cpsr_mask = MODE_MASK | PSR_T_BIT,
+ .cpsr_val = USR_MODE,
+ .fn = iwmmxt_undef_handler,
+ };
+
+ register_undef_hook(&iwmmxt_undef_hook);
+}
+
extern void vfp_sync_hwstate(struct thread_info *);
extern void vfp_flush_hwstate(struct thread_info *);
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 38121c59cbc2..f9c7111c1d65 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -47,7 +47,6 @@ int main(void)
DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain));
DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context));
DEFINE(TI_ABI_SYSCALL, offsetof(struct thread_info, abi_syscall));
- DEFINE(TI_USED_CP, offsetof(struct thread_info, used_cp));
DEFINE(TI_TP_VALUE, offsetof(struct thread_info, tp_value));
DEFINE(TI_FPSTATE, offsetof(struct thread_info, fpstate));
#ifdef CONFIG_VFP
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index c39303e5c234..682e92664b07 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -446,258 +446,26 @@ ENDPROC(__irq_usr)
__und_usr:
usr_entry uaccess=0
- mov r2, r4
- mov r3, r5
-
- @ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the
- @ faulting instruction depending on Thumb mode.
- @ r3 = regs->ARM_cpsr
- @
- @ The emulation code returns using r9 if it has emulated the
- @ instruction, or the more conventional lr if we are to treat
- @ this as a real undefined instruction
- @
- badr r9, ret_from_exception
-
@ IRQs must be enabled before attempting to read the instruction from
@ user space since that could cause a page/translation fault if the
@ page table was modified by another CPU.
enable_irq
- tst r3, #PSR_T_BIT @ Thumb mode?
- bne __und_usr_thumb
- sub r4, r2, #4 @ ARM instr at LR - 4
-1: ldrt r0, [r4]
- ARM_BE8(rev r0, r0) @ little endian instruction
-
- uaccess_disable ip
-
- @ r0 = 32-bit ARM instruction which caused the exception
- @ r2 = PC value for the following instruction (:= regs->ARM_pc)
- @ r4 = PC value for the faulting instruction
- @ lr = 32-bit undefined instruction function
- badr lr, __und_usr_fault_32
- b call_fpe
-
-__und_usr_thumb:
- @ Thumb instruction
- sub r4, r2, #2 @ First half of thumb instr at LR - 2
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
-/*
- * Thumb-2 instruction handling. Note that because pre-v6 and >= v6 platforms
- * can never be supported in a single kernel, this code is not applicable at
- * all when __LINUX_ARM_ARCH__ < 6. This allows simplifying assumptions to be
- * made about .arch directives.
- */
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
- ldr_va r5, cpu_architecture
- cmp r5, #CPU_ARCH_ARMv7
- blo __und_usr_fault_16 @ 16bit undefined instruction
-/*
- * The following code won't get run unless the running CPU really is v7, so
- * coding round the lack of ldrht on older arches is pointless. Temporarily
- * override the assembler target arch with the minimum required instead:
- */
- .arch armv6t2
+ tst r5, #PSR_T_BIT @ Thumb mode?
+ mov r1, #2 @ set insn size to 2 for Thumb
+ bne 0f @ handle as Thumb undef exception
+#ifdef CONFIG_FPE_NWFPE
+ adr r9, ret_from_exception
+ bl call_fpe @ returns via R9 on success
#endif
-2: ldrht r5, [r4]
-ARM_BE8(rev16 r5, r5) @ little endian instruction
- cmp r5, #0xe800 @ 32bit instruction if xx != 0
- blo __und_usr_fault_16_pan @ 16bit undefined instruction
-3: ldrht r0, [r2]
-ARM_BE8(rev16 r0, r0) @ little endian instruction
+ mov r1, #4 @ set insn size to 4 for ARM
+0: mov r0, sp
uaccess_disable ip
- add r2, r2, #2 @ r2 is PC + 2, make it PC + 4
- str r2, [sp, #S_PC] @ it's a 2x16bit instr, update
- orr r0, r0, r5, lsl #16
- badr lr, __und_usr_fault_32
- @ r0 = the two 16-bit Thumb instructions which caused the exception
- @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
- @ r4 = PC value for the first 16-bit Thumb instruction
- @ lr = 32bit undefined instruction function
-
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target arch was overridden, change it back: */
-#ifdef CONFIG_CPU_32v6K
- .arch armv6k
-#else
- .arch armv6
-#endif
-#endif /* __LINUX_ARM_ARCH__ < 7 */
-#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
- b __und_usr_fault_16
-#endif
+ bl __und_fault
+ b ret_from_exception
UNWIND(.fnend)
ENDPROC(__und_usr)
-/*
- * The out of line fixup for the ldrt instructions above.
- */
- .pushsection .text.fixup, "ax"
- .align 2
-4: str r4, [sp, #S_PC] @ retry current instruction
- ret r9
- .popsection
- .pushsection __ex_table,"a"
- .long 1b, 4b
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
- .long 2b, 4b
- .long 3b, 4b
-#endif
- .popsection
-
-/*
- * Check whether the instruction is a co-processor instruction.
- * If yes, we need to call the relevant co-processor handler.
- *
- * Note that we don't do a full check here for the co-processor
- * instructions; all instructions with bit 27 set are well
- * defined. The only instructions that should fault are the
- * co-processor instructions. However, we have to watch out
- * for the ARM6/ARM7 SWI bug.
- *
- * NEON is a special case that has to be handled here. Not all
- * NEON instructions are co-processor instructions, so we have
- * to make a special case of checking for them. Plus, there's
- * five groups of them, so we have a table of mask/opcode pairs
- * to check against, and if any match then we branch off into the
- * NEON handler code.
- *
- * Emulators may wish to make use of the following registers:
- * r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
- * r2 = PC value to resume execution after successful emulation
- * r9 = normal "successful" return address
- * r10 = this threads thread_info structure
- * lr = unrecognised instruction return address
- * IRQs enabled, FIQs enabled.
- */
- @
- @ Fall-through from Thumb-2 __und_usr
- @
-#ifdef CONFIG_NEON
- get_thread_info r10 @ get current thread
- adr r6, .LCneon_thumb_opcodes
- b 2f
-#endif
-call_fpe:
- get_thread_info r10 @ get current thread
-#ifdef CONFIG_NEON
- adr r6, .LCneon_arm_opcodes
-2: ldr r5, [r6], #4 @ mask value
- ldr r7, [r6], #4 @ opcode bits matching in mask
- cmp r5, #0 @ end mask?
- beq 1f
- and r8, r0, r5
- cmp r8, r7 @ NEON instruction?
- bne 2b
- mov r7, #1
- strb r7, [r10, #TI_USED_CP + 10] @ mark CP#10 as used
- strb r7, [r10, #TI_USED_CP + 11] @ mark CP#11 as used
- b do_vfp @ let VFP handler handle this
-1:
-#endif
- tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
- tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2
- reteq lr
- and r8, r0, #0x00000f00 @ mask out CP number
- mov r7, #1
- add r6, r10, r8, lsr #8 @ add used_cp[] array offset first
- strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[]
-#ifdef CONFIG_IWMMXT
- @ Test if we need to give access to iWMMXt coprocessors
- ldr r5, [r10, #TI_FLAGS]
- rsbs r7, r8, #(1 << 8) @ CP 0 or 1 only
- movscs r7, r5, lsr #(TIF_USING_IWMMXT + 1)
- bcs iwmmxt_task_enable
-#endif
- ARM( add pc, pc, r8, lsr #6 )
- THUMB( lsr r8, r8, #6 )
- THUMB( add pc, r8 )
- nop
-
- ret.w lr @ CP#0
- W(b) do_fpe @ CP#1 (FPE)
- W(b) do_fpe @ CP#2 (FPE)
- ret.w lr @ CP#3
- ret.w lr @ CP#4
- ret.w lr @ CP#5
- ret.w lr @ CP#6
- ret.w lr @ CP#7
- ret.w lr @ CP#8
- ret.w lr @ CP#9
-#ifdef CONFIG_VFP
- W(b) do_vfp @ CP#10 (VFP)
- W(b) do_vfp @ CP#11 (VFP)
-#else
- ret.w lr @ CP#10 (VFP)
- ret.w lr @ CP#11 (VFP)
-#endif
- ret.w lr @ CP#12
- ret.w lr @ CP#13
- ret.w lr @ CP#14 (Debug)
- ret.w lr @ CP#15 (Control)
-
-#ifdef CONFIG_NEON
- .align 6
-
-.LCneon_arm_opcodes:
- .word 0xfe000000 @ mask
- .word 0xf2000000 @ opcode
-
- .word 0xff100000 @ mask
- .word 0xf4000000 @ opcode
-
- .word 0x00000000 @ mask
- .word 0x00000000 @ opcode
-
-.LCneon_thumb_opcodes:
- .word 0xef000000 @ mask
- .word 0xef000000 @ opcode
-
- .word 0xff100000 @ mask
- .word 0xf9000000 @ opcode
-
- .word 0x00000000 @ mask
- .word 0x00000000 @ opcode
-#endif
-
-do_fpe:
- add r10, r10, #TI_FPSTATE @ r10 = workspace
- ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
-
-/*
- * The FP module is called with these registers set:
- * r0 = instruction
- * r2 = PC+4
- * r9 = normal "successful" return address
- * r10 = FP workspace
- * lr = unrecognised FP instruction return address
- */
-
- .pushsection .data
- .align 2
-ENTRY(fp_enter)
- .word no_fp
- .popsection
-
-ENTRY(no_fp)
- ret lr
-ENDPROC(no_fp)
-
-__und_usr_fault_32:
- mov r1, #4
- b 1f
-__und_usr_fault_16_pan:
- uaccess_disable ip
-__und_usr_fault_16:
- mov r1, #2
-1: mov r0, sp
- badr lr, ret_from_exception
- b __und_fault
-ENDPROC(__und_usr_fault_32)
-ENDPROC(__und_usr_fault_16)
-
.align 5
__pabt_usr:
usr_entry
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index d2b4ac06e4ed..a0218c4867b9 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -58,9 +58,19 @@
.text
.arm
+ENTRY(iwmmxt_undef_handler)
+ push {r9, r10, lr}
+ get_thread_info r10
+ mov r9, pc
+ b iwmmxt_task_enable
+ mov r0, #0
+ pop {r9, r10, pc}
+ENDPROC(iwmmxt_undef_handler)
+
/*
* Lazy switching of Concan coprocessor context
*
+ * r0 = struct pt_regs pointer
* r10 = struct thread_info pointer
* r9 = ret_from_exception
* lr = undefined instr exit
@@ -84,12 +94,12 @@ ENTRY(iwmmxt_task_enable)
PJ4(mcr p15, 0, r2, c1, c0, 2)
ldr r3, =concan_owner
- add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area
- ldr r2, [sp, #60] @ current task pc value
+ ldr r2, [r0, #S_PC] @ current task pc value
ldr r1, [r3] @ get current Concan owner
- str r0, [r3] @ this task now owns Concan regs
sub r2, r2, #4 @ adjust pc back
- str r2, [sp, #60]
+ str r2, [r0, #S_PC]
+ add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area
+ str r0, [r3] @ this task now owns Concan regs
mrc p15, 0, r2, c2, c0, 0
mov r2, r2 @ cpwait
diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c
index 1d1fb22f44f3..4bca8098c4ff 100644
--- a/arch/arm/kernel/pj4-cp0.c
+++ b/arch/arm/kernel/pj4-cp0.c
@@ -126,6 +126,7 @@ static int __init pj4_cp0_init(void)
pr_info("PJ4 iWMMXt v%d coprocessor enabled.\n", vers);
elf_hwcap |= HWCAP_IWMMXT;
thread_register_notifier(&iwmmxt_notifier_block);
+ register_iwmmxt_undef_handler();
#endif
return 0;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 0e8ff85890ad..e16ed102960c 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -222,7 +222,6 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
- memset(thread->used_cp, 0, sizeof(thread->used_cp));
memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
memset(&thread->fpstate, 0, sizeof(union fp_state));
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 2d8e2516906b..2b945b9bd366 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -584,8 +584,6 @@ static int fpa_set(struct task_struct *target,
{
struct thread_info *thread = task_thread_info(target);
- thread->used_cp[1] = thread->used_cp[2] = 1;
-
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&thread->fpstate,
0, sizeof(struct user_fp));
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index 53be7ea6181b..9d2192156087 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -308,6 +308,29 @@ static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl,
return URC_OK;
}
+static unsigned long unwind_decode_uleb128(struct unwind_ctrl_block *ctrl)
+{
+ unsigned long bytes = 0;
+ unsigned long insn;
+ unsigned long result = 0;
+
+ /*
+ * unwind_get_byte() will advance `ctrl` one instruction at a time, so
+ * loop until we get an instruction byte where bit 7 is not set.
+ *
+ * Note: This decodes a maximum of 4 bytes to output 28 bits data where
+ * max is 0xfffffff: that will cover a vsp increment of 1073742336, hence
+ * it is sufficient for unwinding the stack.
+ */
+ do {
+ insn = unwind_get_byte(ctrl);
+ result |= (insn & 0x7f) << (bytes * 7);
+ bytes++;
+ } while (!!(insn & 0x80) && (bytes != sizeof(result)));
+
+ return result;
+}
+
/*
* Execute the current unwind instruction.
*/
@@ -361,7 +384,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
if (ret)
goto error;
} else if (insn == 0xb2) {
- unsigned long uleb128 = unwind_get_byte(ctrl);
+ unsigned long uleb128 = unwind_decode_uleb128(ctrl);
ctrl->vrs[SP] += 0x204 + (uleb128 << 2);
} else {
diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c
index ed4f6e77616d..00d00d3aae97 100644
--- a/arch/arm/kernel/xscale-cp0.c
+++ b/arch/arm/kernel/xscale-cp0.c
@@ -166,6 +166,7 @@ static int __init xscale_cp0_init(void)
pr_info("XScale iWMMXt coprocessor detected.\n");
elf_hwcap |= HWCAP_IWMMXT;
thread_register_notifier(&iwmmxt_notifier_block);
+ register_iwmmxt_undef_handler();
#endif
} else {
pr_info("XScale DSP coprocessor detected.\n");
diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c
index 67f72ca984b2..1956b095e699 100644
--- a/arch/arm/mach-sa1100/jornada720_ssp.c
+++ b/arch/arm/mach-sa1100/jornada720_ssp.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
* arch/arm/mac-sa1100/jornada720_ssp.c
*
* Copyright (C) 2006/2007 Kristoffer Ericson <Kristoffer.Ericson@gmail.com>
@@ -26,6 +26,7 @@ static unsigned long jornada_ssp_flags;
/**
* jornada_ssp_reverse - reverses input byte
+ * @byte: input byte to reverse
*
* we need to reverse all data we receive from the mcu due to its physical location
* returns : 01110111 -> 11101110
@@ -46,6 +47,7 @@ EXPORT_SYMBOL(jornada_ssp_reverse);
/**
* jornada_ssp_byte - waits for ready ssp bus and sends byte
+ * @byte: input byte to transmit
*
* waits for fifo buffer to clear and then transmits, if it doesn't then we will
* timeout after <timeout> rounds. Needs mcu running before its called.
@@ -77,6 +79,7 @@ EXPORT_SYMBOL(jornada_ssp_byte);
/**
* jornada_ssp_inout - decide if input is command or trading byte
+ * @byte: input byte to send (may be %TXDUMMY)
*
* returns : (jornada_ssp_byte(byte)) on success
* : %-ETIMEDOUT on timeout failure
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 61ce82aca6f0..072ff9b451f8 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -56,6 +56,10 @@ ENTRY(cpu_feroceon_proc_init)
movne r2, r2, lsr #2 @ turned into # of sets
sub r2, r2, #(1 << 5)
stmia r1, {r2, r3}
+#ifdef CONFIG_VFP
+ mov r1, #1 @ disable quirky VFP
+ str_l r1, VFP_arch_feroceon, r2
+#endif
ret lr
/*
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index d8f9915566e1..354d297a193b 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -7,6 +7,7 @@
Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
*/
+#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/opcodes.h>
@@ -104,6 +105,7 @@ next:
@ plain LDR instruction. Weird, but it seems harmless.
.pushsection .text.fixup,"ax"
.align 2
+.Lrep: str r4, [sp, #S_PC] @ retry current instruction
.Lfix: ret r9 @ let the user eat segfaults
.popsection
@@ -111,3 +113,78 @@ next:
.align 3
.long .Lx1, .Lfix
.popsection
+
+ @
+ @ Check whether the instruction is a co-processor instruction.
+ @ If yes, we need to call the relevant co-processor handler.
+ @ Only FPE instructions are dispatched here, everything else
+ @ is handled by undef hooks.
+ @
+ @ Emulators may wish to make use of the following registers:
+ @ r4 = PC value to resume execution after successful emulation
+ @ r9 = normal "successful" return address
+ @ lr = unrecognised instruction return address
+ @ IRQs enabled, FIQs enabled.
+ @
+ENTRY(call_fpe)
+ mov r2, r4
+ sub r4, r4, #4 @ ARM instruction at user PC - 4
+USERL( .Lrep, ldrt r0, [r4]) @ load opcode from user space
+ARM_BE8(rev r0, r0) @ little endian instruction
+
+ uaccess_disable ip
+
+ get_thread_info r10 @ get current thread
+ tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
+ reteq lr
+ and r8, r0, #0x00000f00 @ mask out CP number
+#ifdef CONFIG_IWMMXT
+ @ Test if we need to give access to iWMMXt coprocessors
+ ldr r5, [r10, #TI_FLAGS]
+ rsbs r7, r8, #(1 << 8) @ CP 0 or 1 only
+ movscs r7, r5, lsr #(TIF_USING_IWMMXT + 1)
+ movcs r0, sp @ pass struct pt_regs
+ bcs iwmmxt_task_enable
+#endif
+ add pc, pc, r8, lsr #6
+ nop
+
+ ret lr @ CP#0
+ b do_fpe @ CP#1 (FPE)
+ b do_fpe @ CP#2 (FPE)
+ ret lr @ CP#3
+ ret lr @ CP#4
+ ret lr @ CP#5
+ ret lr @ CP#6
+ ret lr @ CP#7
+ ret lr @ CP#8
+ ret lr @ CP#9
+ ret lr @ CP#10 (VFP)
+ ret lr @ CP#11 (VFP)
+ ret lr @ CP#12
+ ret lr @ CP#13
+ ret lr @ CP#14 (Debug)
+ ret lr @ CP#15 (Control)
+
+do_fpe:
+ add r10, r10, #TI_FPSTATE @ r10 = workspace
+ ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
+
+ @
+ @ The FP module is called with these registers set:
+ @ r0 = instruction
+ @ r2 = PC+4
+ @ r9 = normal "successful" return address
+ @ r10 = FP workspace
+ @ lr = unrecognised FP instruction return address
+ @
+
+ .pushsection .data
+ .align 2
+ENTRY(fp_enter)
+ .word no_fp
+ .popsection
+
+no_fp:
+ ret lr
+ENDPROC(no_fp)
diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile
index 749901a72d6d..dfd64bc2b2fb 100644
--- a/arch/arm/vfp/Makefile
+++ b/arch/arm/vfp/Makefile
@@ -8,4 +8,4 @@
# ccflags-y := -DDEBUG
# asflags-y := -DDEBUG
-obj-y += vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o
+obj-y += vfpmodule.o vfphw.o vfpsingle.o vfpdouble.o
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
deleted file mode 100644
index 7483ef8bccda..000000000000
--- a/arch/arm/vfp/entry.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/arm/vfp/entry.S
- *
- * Copyright (C) 2004 ARM Limited.
- * Written by Deep Blue Solutions Limited.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/vfpmacros.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-
-@ VFP entry point.
-@
-@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@ r2 = PC value to resume execution after successful emulation
-@ r9 = normal "successful" return address
-@ r10 = this threads thread_info structure
-@ lr = unrecognised instruction return address
-@ IRQs enabled.
-@
-ENTRY(do_vfp)
- mov r1, r10
- mov r3, r9
- b vfp_entry
-ENDPROC(do_vfp)
diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h
index 5cd6d5053271..e43a630f8a16 100644
--- a/arch/arm/vfp/vfp.h
+++ b/arch/arm/vfp/vfp.h
@@ -375,3 +375,4 @@ struct op {
};
asmlinkage void vfp_save_state(void *location, u32 fpexc);
+asmlinkage u32 vfp_load_state(const void *location);
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 4d8478264d82..d5a03f3c10c5 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -4,12 +4,6 @@
*
* Copyright (C) 2004 ARM Limited.
* Written by Deep Blue Solutions Limited.
- *
- * This code is called from the kernel's undefined instruction trap.
- * r1 holds the thread_info pointer
- * r3 holds the return address for successful handling.
- * lr holds the return address for unrecognised instructions.
- * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h)
*/
#include <linux/init.h>
#include <linux/linkage.h>
@@ -19,20 +13,6 @@
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
- .macro DBGSTR, str
-#ifdef DEBUG
- stmfd sp!, {r0-r3, ip, lr}
- ldr r0, =1f
- bl _printk
- ldmfd sp!, {r0-r3, ip, lr}
-
- .pushsection .rodata, "a"
-1: .ascii KERN_DEBUG "VFP: \str\n"
- .byte 0
- .previous
-#endif
- .endm
-
.macro DBGSTR1, str, arg
#ifdef DEBUG
stmfd sp!, {r0-r3, ip, lr}
@@ -48,179 +28,25 @@
#endif
.endm
- .macro DBGSTR3, str, arg1, arg2, arg3
-#ifdef DEBUG
- stmfd sp!, {r0-r3, ip, lr}
- mov r3, \arg3
- mov r2, \arg2
- mov r1, \arg1
- ldr r0, =1f
- bl _printk
- ldmfd sp!, {r0-r3, ip, lr}
-
- .pushsection .rodata, "a"
-1: .ascii KERN_DEBUG "VFP: \str\n"
- .byte 0
- .previous
-#endif
- .endm
-
-
-@ VFP hardware support entry point.
-@
-@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@ r1 = thread_info pointer
-@ r2 = PC value to resume execution after successful emulation
-@ r3 = normal "successful" return address
-@ lr = unrecognised instruction return address
-@ IRQs enabled.
-ENTRY(vfp_support_entry)
- ldr r11, [r1, #TI_CPU] @ CPU number
- add r10, r1, #TI_VFPSTATE @ r10 = workspace
-
- DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10
-
- .fpu vfpv2
- VFPFMRX r1, FPEXC @ Is the VFP enabled?
- DBGSTR1 "fpexc %08x", r1
- tst r1, #FPEXC_EN
- bne look_for_VFP_exceptions @ VFP is already enabled
-
- DBGSTR1 "enable %x", r10
- ldr r9, vfp_current_hw_state_address
- orr r1, r1, #FPEXC_EN @ user FPEXC has the enable bit set
- ldr r4, [r9, r11, lsl #2] @ vfp_current_hw_state pointer
- bic r5, r1, #FPEXC_EX @ make sure exceptions are disabled
- cmp r4, r10 @ this thread owns the hw context?
-#ifndef CONFIG_SMP
- @ For UP, checking that this thread owns the hw context is
- @ sufficient to determine that the hardware state is valid.
- beq vfp_hw_state_valid
-
- @ On UP, we lazily save the VFP context. As a different
- @ thread wants ownership of the VFP hardware, save the old
- @ state if there was a previous (valid) owner.
-
- VFPFMXR FPEXC, r5 @ enable VFP, disable any pending
- @ exceptions, so we can get at the
- @ rest of it
-
- DBGSTR1 "save old state %p", r4
- cmp r4, #0 @ if the vfp_current_hw_state is NULL
- beq vfp_reload_hw @ then the hw state needs reloading
- VFPFSTMIA r4, r5 @ save the working registers
- VFPFMRX r5, FPSCR @ current status
-#ifndef CONFIG_CPU_FEROCEON
- tst r1, #FPEXC_EX @ is there additional state to save?
- beq 1f
- VFPFMRX r6, FPINST @ FPINST (only if FPEXC.EX is set)
- tst r1, #FPEXC_FP2V @ is there an FPINST2 to read?
- beq 1f
- VFPFMRX r8, FPINST2 @ FPINST2 if needed (and present)
-1:
-#endif
- stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2
-vfp_reload_hw:
-
-#else
- @ For SMP, if this thread does not own the hw context, then we
- @ need to reload it. No need to save the old state as on SMP,
- @ we always save the state when we switch away from a thread.
- bne vfp_reload_hw
-
- @ This thread has ownership of the current hardware context.
- @ However, it may have been migrated to another CPU, in which
- @ case the saved state is newer than the hardware context.
- @ Check this by looking at the CPU number which the state was
- @ last loaded onto.
- ldr ip, [r10, #VFP_CPU]
- teq ip, r11
- beq vfp_hw_state_valid
-
-vfp_reload_hw:
- @ We're loading this threads state into the VFP hardware. Update
- @ the CPU number which contains the most up to date VFP context.
- str r11, [r10, #VFP_CPU]
-
- VFPFMXR FPEXC, r5 @ enable VFP, disable any pending
- @ exceptions, so we can get at the
- @ rest of it
-#endif
-
- DBGSTR1 "load state %p", r10
- str r10, [r9, r11, lsl #2] @ update the vfp_current_hw_state pointer
+ENTRY(vfp_load_state)
+ @ Load the current VFP state
+ @ r0 - load location
+ @ returns FPEXC
+ DBGSTR1 "load VFP state %p", r0
@ Load the saved state back into the VFP
- VFPFLDMIA r10, r5 @ reload the working registers while
+ VFPFLDMIA r0, r1 @ reload the working registers while
@ FPEXC is in a safe state
- ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2
-#ifndef CONFIG_CPU_FEROCEON
- tst r1, #FPEXC_EX @ is there additional state to restore?
+ ldmia r0, {r0-r3} @ load FPEXC, FPSCR, FPINST, FPINST2
+ tst r0, #FPEXC_EX @ is there additional state to restore?
beq 1f
- VFPFMXR FPINST, r6 @ restore FPINST (only if FPEXC.EX is set)
- tst r1, #FPEXC_FP2V @ is there an FPINST2 to write?
+ VFPFMXR FPINST, r2 @ restore FPINST (only if FPEXC.EX is set)
+ tst r0, #FPEXC_FP2V @ is there an FPINST2 to write?
beq 1f
- VFPFMXR FPINST2, r8 @ FPINST2 if needed (and present)
+ VFPFMXR FPINST2, r3 @ FPINST2 if needed (and present)
1:
-#endif
- VFPFMXR FPSCR, r5 @ restore status
-
-@ The context stored in the VFP hardware is up to date with this thread
-vfp_hw_state_valid:
- tst r1, #FPEXC_EX
- bne process_exception @ might as well handle the pending
- @ exception before retrying branch
- @ out before setting an FPEXC that
- @ stops us reading stuff
- VFPFMXR FPEXC, r1 @ Restore FPEXC last
- sub r2, r2, #4 @ Retry current instruction - if Thumb
- str r2, [sp, #S_PC] @ mode it's two 16-bit instructions,
- @ else it's one 32-bit instruction, so
- @ always subtract 4 from the following
- @ instruction address.
-
- mov lr, r3 @ we think we have handled things
-local_bh_enable_and_ret:
- adr r0, .
- mov r1, #SOFTIRQ_DISABLE_OFFSET
- b __local_bh_enable_ip @ tail call
-
-look_for_VFP_exceptions:
- @ Check for synchronous or asynchronous exception
- tst r1, #FPEXC_EX | FPEXC_DEX
- bne process_exception
- @ On some implementations of the VFP subarch 1, setting FPSCR.IXE
- @ causes all the CDP instructions to be bounced synchronously without
- @ setting the FPEXC.EX bit
- VFPFMRX r5, FPSCR
- tst r5, #FPSCR_IXE
- bne process_exception
-
- tst r5, #FPSCR_LENGTH_MASK
- beq skip
- orr r1, r1, #FPEXC_DEX
- b process_exception
-skip:
-
- @ Fall into hand on to next handler - appropriate coproc instr
- @ not recognised by VFP
-
- DBGSTR "not VFP"
- b local_bh_enable_and_ret
-
-process_exception:
- DBGSTR "bounce"
- mov r2, sp @ nothing stacked - regdump is at TOS
- mov lr, r3 @ setup for a return to the user code.
-
- @ Now call the C code to package up the bounce to the support code
- @ r0 holds the trigger instruction
- @ r1 holds the FPEXC value
- @ r2 pointer to register dump
- b VFP_bounce @ we have handled this - the support
- @ code will raise an exception if
- @ required. If not, the user code will
- @ retry the faulted instruction
-ENDPROC(vfp_support_entry)
+ VFPFMXR FPSCR, r1 @ restore status
+ ret lr
+ENDPROC(vfp_load_state)
ENTRY(vfp_save_state)
@ Save the current VFP state
@@ -240,10 +66,6 @@ ENTRY(vfp_save_state)
ret lr
ENDPROC(vfp_save_state)
- .align
-vfp_current_hw_state_address:
- .word vfp_current_hw_state
-
.macro tbl_branch, base, tmp, shift
#ifdef CONFIG_THUMB2_KERNEL
adr \tmp, 1f
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 349dcb944a93..58a9442add24 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -18,6 +18,7 @@
#include <linux/uaccess.h>
#include <linux/user.h>
#include <linux/export.h>
+#include <linux/perf_event.h>
#include <asm/cp15.h>
#include <asm/cputype.h>
@@ -29,11 +30,6 @@
#include "vfpinstr.h"
#include "vfp.h"
-/*
- * Our undef handlers (in entry.S)
- */
-asmlinkage void vfp_support_entry(u32, void *, u32, u32);
-
static bool have_vfp __ro_after_init;
/*
@@ -41,7 +37,11 @@ static bool have_vfp __ro_after_init;
* Used in startup: set to non-zero if VFP checks fail
* After startup, holds VFP architecture
*/
-static unsigned int __initdata VFP_arch;
+static unsigned int VFP_arch;
+
+#ifdef CONFIG_CPU_FEROCEON
+extern unsigned int VFP_arch_feroceon __alias(VFP_arch);
+#endif
/*
* The pointer to the vfpstate structure of the thread which currently
@@ -313,13 +313,14 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
* emulate it.
*/
}
+ perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
return exceptions & ~VFP_NAN_FLAG;
}
/*
* Package up a bounce condition.
*/
-void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
+static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
{
u32 fpscr, orig_fpscr, fpsid, exceptions;
@@ -355,14 +356,12 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
}
if (fpexc & FPEXC_EX) {
-#ifndef CONFIG_CPU_FEROCEON
/*
* Asynchronous exception. The instruction is read from FPINST
* and the interrupted instruction has to be restarted.
*/
trigger = fmrx(FPINST);
regs->ARM_pc -= 4;
-#endif
} else if (!(fpexc & FPEXC_DEX)) {
/*
* Illegal combination of bits. It can be caused by an
@@ -370,7 +369,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
* on VFP subarch 1.
*/
vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
- goto exit;
+ return;
}
/*
@@ -401,7 +400,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
* the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
*/
if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
- goto exit;
+ return;
/*
* The barrier() here prevents fpinst2 being read
@@ -414,8 +413,6 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
if (exceptions)
vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
- exit:
- local_bh_enable();
}
static void vfp_enable(void *unused)
@@ -644,27 +641,6 @@ static int vfp_starting_cpu(unsigned int unused)
return 0;
}
-/*
- * Entered with:
- *
- * r0 = instruction opcode (32-bit ARM or two 16-bit Thumb)
- * r1 = thread_info pointer
- * r2 = PC value to resume execution after successful emulation
- * r3 = normal "successful" return address
- * lr = unrecognised instruction return address
- */
-asmlinkage void vfp_entry(u32 trigger, struct thread_info *ti, u32 resume_pc,
- u32 resume_return_address)
-{
- if (unlikely(!have_vfp))
- return;
-
- local_bh_disable();
- vfp_support_entry(trigger, ti, resume_pc, resume_return_address);
-}
-
-#ifdef CONFIG_KERNEL_MODE_NEON
-
static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
{
/*
@@ -687,47 +663,151 @@ static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
return 1;
}
-static struct undef_hook vfp_kmode_exception_hook[] = {{
+/*
+ * vfp_support_entry - Handle VFP exception
+ *
+ * @regs: pt_regs structure holding the register state at exception entry
+ * @trigger: The opcode of the instruction that triggered the exception
+ *
+ * Returns 0 if the exception was handled, or an error code otherwise.
+ */
+static int vfp_support_entry(struct pt_regs *regs, u32 trigger)
+{
+ struct thread_info *ti = current_thread_info();
+ u32 fpexc;
+
+ if (unlikely(!have_vfp))
+ return -ENODEV;
+
+ if (!user_mode(regs))
+ return vfp_kmode_exception(regs, trigger);
+
+ local_bh_disable();
+ fpexc = fmrx(FPEXC);
+
+ /*
+ * If the VFP unit was not enabled yet, we have to check whether the
+ * VFP state in the CPU's registers is the most recent VFP state
+ * associated with the process. On UP systems, we don't save the VFP
+ * state eagerly on a context switch, so we may need to save the
+ * VFP state to memory first, as it may belong to another process.
+ */
+ if (!(fpexc & FPEXC_EN)) {
+ /*
+ * Enable the VFP unit but mask the FP exception flag for the
+ * time being, so we can access all the registers.
+ */
+ fpexc |= FPEXC_EN;
+ fmxr(FPEXC, fpexc & ~FPEXC_EX);
+
+ /*
+ * Check whether or not the VFP state in the CPU's registers is
+ * the most recent VFP state associated with this task. On SMP,
+ * migration may result in multiple CPUs holding VFP states
+ * that belong to the same task, but only the most recent one
+ * is valid.
+ */
+ if (!vfp_state_in_hw(ti->cpu, ti)) {
+ if (!IS_ENABLED(CONFIG_SMP) &&
+ vfp_current_hw_state[ti->cpu] != NULL) {
+ /*
+ * This CPU is currently holding the most
+ * recent VFP state associated with another
+ * task, and we must save that to memory first.
+ */
+ vfp_save_state(vfp_current_hw_state[ti->cpu],
+ fpexc);
+ }
+
+ /*
+ * We can now proceed with loading the task's VFP state
+ * from memory into the CPU registers.
+ */
+ fpexc = vfp_load_state(&ti->vfpstate);
+ vfp_current_hw_state[ti->cpu] = &ti->vfpstate;
+#ifdef CONFIG_SMP
+ /*
+ * Record that this CPU is now the one holding the most
+ * recent VFP state of the task.
+ */
+ ti->vfpstate.hard.cpu = ti->cpu;
+#endif
+ }
+
+ if (fpexc & FPEXC_EX)
+ /*
+ * Might as well handle the pending exception before
+ * retrying branch out before setting an FPEXC that
+ * stops us reading stuff.
+ */
+ goto bounce;
+
+ /*
+ * No FP exception is pending: just enable the VFP and
+ * replay the instruction that trapped.
+ */
+ fmxr(FPEXC, fpexc);
+ } else {
+ /* Check for synchronous or asynchronous exceptions */
+ if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
+ u32 fpscr = fmrx(FPSCR);
+
+ /*
+ * On some implementations of the VFP subarch 1,
+ * setting FPSCR.IXE causes all the CDP instructions to
+ * be bounced synchronously without setting the
+ * FPEXC.EX bit
+ */
+ if (!(fpscr & FPSCR_IXE)) {
+ if (!(fpscr & FPSCR_LENGTH_MASK)) {
+ pr_debug("not VFP\n");
+ local_bh_enable();
+ return -ENOEXEC;
+ }
+ fpexc |= FPEXC_DEX;
+ }
+ }
+bounce: regs->ARM_pc += 4;
+ VFP_bounce(trigger, fpexc, regs);
+ }
+
+ local_bh_enable();
+ return 0;
+}
+
+static struct undef_hook neon_support_hook[] = {{
.instr_mask = 0xfe000000,
.instr_val = 0xf2000000,
- .cpsr_mask = MODE_MASK | PSR_T_BIT,
- .cpsr_val = SVC_MODE,
- .fn = vfp_kmode_exception,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
}, {
.instr_mask = 0xff100000,
.instr_val = 0xf4000000,
- .cpsr_mask = MODE_MASK | PSR_T_BIT,
- .cpsr_val = SVC_MODE,
- .fn = vfp_kmode_exception,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
}, {
.instr_mask = 0xef000000,
.instr_val = 0xef000000,
- .cpsr_mask = MODE_MASK | PSR_T_BIT,
- .cpsr_val = SVC_MODE | PSR_T_BIT,
- .fn = vfp_kmode_exception,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = PSR_T_BIT,
+ .fn = vfp_support_entry,
}, {
.instr_mask = 0xff100000,
.instr_val = 0xf9000000,
- .cpsr_mask = MODE_MASK | PSR_T_BIT,
- .cpsr_val = SVC_MODE | PSR_T_BIT,
- .fn = vfp_kmode_exception,
-}, {
- .instr_mask = 0x0c000e00,
- .instr_val = 0x0c000a00,
- .cpsr_mask = MODE_MASK,
- .cpsr_val = SVC_MODE,
- .fn = vfp_kmode_exception,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = PSR_T_BIT,
+ .fn = vfp_support_entry,
}};
-static int __init vfp_kmode_exception_hook_init(void)
-{
- int i;
+static struct undef_hook vfp_support_hook = {
+ .instr_mask = 0x0c000e00,
+ .instr_val = 0x0c000a00,
+ .fn = vfp_support_entry,
+};
- for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++)
- register_undef_hook(&vfp_kmode_exception_hook[i]);
- return 0;
-}
-subsys_initcall(vfp_kmode_exception_hook_init);
+#ifdef CONFIG_KERNEL_MODE_NEON
/*
* Kernel-side NEON support functions
@@ -832,8 +912,11 @@ static int __init vfp_init(void)
* for NEON if the hardware has the MVFR registers.
*/
if (IS_ENABLED(CONFIG_NEON) &&
- (fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+ (fmrx(MVFR1) & 0x000fff00) == 0x00011100) {
elf_hwcap |= HWCAP_NEON;
+ for (int i = 0; i < ARRAY_SIZE(neon_support_hook); i++)
+ register_undef_hook(&neon_support_hook[i]);
+ }
if (IS_ENABLED(CONFIG_VFPv3)) {
u32 mvfr0 = fmrx(MVFR0);
@@ -902,6 +985,7 @@ static int __init vfp_init(void)
have_vfp = true;
+ register_undef_hook(&vfp_support_hook);
thread_register_notifier(&vfp_notifier_block);
vfp_pm_init();