From 4708fb041346fa9cc6745dafb8c248a3e2f1075b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 16 Mar 2023 00:51:42 +0100 Subject: ARM: vfp: Reimplement VFP exception entry in C code En/disabling softirqs from asm code turned out to be trickier than expected, so vfp_support_entry now returns by tail calling __local_enable_bh_ip() and passing the same arguments that a C call to local_bh_enable() would pass. However, this is slightly hacky, as we don't want to carry our own implementation of local_bh_enable(). So let's bite the bullet, and get rid of the asm logic in vfp_support_entry that reasons about whether or not to save and/or reload the VFP state, and about whether or not an FP exception is pending, and only keep the VFP loading logic as a function that is callable from C. Replicate the removed logic in vfp_entry(), and use the exact same reasoning as in the asm code. To emphasize the correspondence, retain some of the asm comments in the C version as well. Signed-off-by: Ard Biesheuvel Acked-by: Linus Walleij --- arch/arm/vfp/entry.S | 12 +-- arch/arm/vfp/vfp.h | 1 + arch/arm/vfp/vfphw.S | 204 ++++------------------------------------------- arch/arm/vfp/vfpmodule.c | 123 +++++++++++++++++++++++----- 4 files changed, 124 insertions(+), 216 deletions(-) (limited to 'arch/arm/vfp') diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index 62206ef25037..547c94c62cd3 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -22,10 +22,10 @@ @ IRQs enabled. @ ENTRY(do_vfp) - mov r1, r10 - str lr, [sp, #-8]! - add r3, sp, #4 - str r9, [r3] - bl vfp_entry - ldr pc, [sp], #8 + mov r1, r0 @ pass trigger opcode via R1 + mov r0, sp @ pass struct pt_regs via R0 + bl vfp_support_entry @ dispatch the VFP exception + cmp r0, #0 @ handled successfully? + reteq r9 @ then use R9 as return address + ret lr @ pass to undef handler ENDPROC(do_vfp) diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h index 5cd6d5053271..e43a630f8a16 100644 --- a/arch/arm/vfp/vfp.h +++ b/arch/arm/vfp/vfp.h @@ -375,3 +375,4 @@ struct op { }; asmlinkage void vfp_save_state(void *location, u32 fpexc); +asmlinkage u32 vfp_load_state(const void *location); diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index 0aeb60ac3b53..d5a03f3c10c5 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -4,12 +4,6 @@ * * Copyright (C) 2004 ARM Limited. * Written by Deep Blue Solutions Limited. - * - * This code is called from the kernel's undefined instruction trap. - * r1 holds the thread_info pointer - * r3 holds the return address for successful handling. - * lr holds the return address for unrecognised instructions. - * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h) */ #include #include @@ -19,20 +13,6 @@ #include #include - .macro DBGSTR, str -#ifdef DEBUG - stmfd sp!, {r0-r3, ip, lr} - ldr r0, =1f - bl _printk - ldmfd sp!, {r0-r3, ip, lr} - - .pushsection .rodata, "a" -1: .ascii KERN_DEBUG "VFP: \str\n" - .byte 0 - .previous -#endif - .endm - .macro DBGSTR1, str, arg #ifdef DEBUG stmfd sp!, {r0-r3, ip, lr} @@ -48,177 +28,25 @@ #endif .endm - .macro DBGSTR3, str, arg1, arg2, arg3 -#ifdef DEBUG - stmfd sp!, {r0-r3, ip, lr} - mov r3, \arg3 - mov r2, \arg2 - mov r1, \arg1 - ldr r0, =1f - bl _printk - ldmfd sp!, {r0-r3, ip, lr} - - .pushsection .rodata, "a" -1: .ascii KERN_DEBUG "VFP: \str\n" - .byte 0 - .previous -#endif - .endm - - -@ VFP hardware support entry point. -@ -@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) -@ r1 = thread_info pointer -@ r2 = PC value to resume execution after successful emulation -@ r3 = normal "successful" return address -@ lr = unrecognised instruction return address -@ IRQs enabled. -ENTRY(vfp_support_entry) - ldr r11, [r1, #TI_CPU] @ CPU number - add r10, r1, #TI_VFPSTATE @ r10 = workspace - - DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 - - .fpu vfpv2 - VFPFMRX r1, FPEXC @ Is the VFP enabled? - DBGSTR1 "fpexc %08x", r1 - tst r1, #FPEXC_EN - bne look_for_VFP_exceptions @ VFP is already enabled - - DBGSTR1 "enable %x", r10 - ldr r9, vfp_current_hw_state_address - orr r1, r1, #FPEXC_EN @ user FPEXC has the enable bit set - ldr r4, [r9, r11, lsl #2] @ vfp_current_hw_state pointer - bic r5, r1, #FPEXC_EX @ make sure exceptions are disabled - cmp r4, r10 @ this thread owns the hw context? -#ifndef CONFIG_SMP - @ For UP, checking that this thread owns the hw context is - @ sufficient to determine that the hardware state is valid. - beq vfp_hw_state_valid - - @ On UP, we lazily save the VFP context. As a different - @ thread wants ownership of the VFP hardware, save the old - @ state if there was a previous (valid) owner. - - VFPFMXR FPEXC, r5 @ enable VFP, disable any pending - @ exceptions, so we can get at the - @ rest of it - - DBGSTR1 "save old state %p", r4 - cmp r4, #0 @ if the vfp_current_hw_state is NULL - beq vfp_reload_hw @ then the hw state needs reloading - VFPFSTMIA r4, r5 @ save the working registers - VFPFMRX r5, FPSCR @ current status - tst r1, #FPEXC_EX @ is there additional state to save? - beq 1f - VFPFMRX r6, FPINST @ FPINST (only if FPEXC.EX is set) - tst r1, #FPEXC_FP2V @ is there an FPINST2 to read? - beq 1f - VFPFMRX r8, FPINST2 @ FPINST2 if needed (and present) -1: - stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2 -vfp_reload_hw: - -#else - @ For SMP, if this thread does not own the hw context, then we - @ need to reload it. No need to save the old state as on SMP, - @ we always save the state when we switch away from a thread. - bne vfp_reload_hw - - @ This thread has ownership of the current hardware context. - @ However, it may have been migrated to another CPU, in which - @ case the saved state is newer than the hardware context. - @ Check this by looking at the CPU number which the state was - @ last loaded onto. - ldr ip, [r10, #VFP_CPU] - teq ip, r11 - beq vfp_hw_state_valid - -vfp_reload_hw: - @ We're loading this threads state into the VFP hardware. Update - @ the CPU number which contains the most up to date VFP context. - str r11, [r10, #VFP_CPU] - - VFPFMXR FPEXC, r5 @ enable VFP, disable any pending - @ exceptions, so we can get at the - @ rest of it -#endif - - DBGSTR1 "load state %p", r10 - str r10, [r9, r11, lsl #2] @ update the vfp_current_hw_state pointer +ENTRY(vfp_load_state) + @ Load the current VFP state + @ r0 - load location + @ returns FPEXC + DBGSTR1 "load VFP state %p", r0 @ Load the saved state back into the VFP - VFPFLDMIA r10, r5 @ reload the working registers while + VFPFLDMIA r0, r1 @ reload the working registers while @ FPEXC is in a safe state - ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2 - tst r1, #FPEXC_EX @ is there additional state to restore? + ldmia r0, {r0-r3} @ load FPEXC, FPSCR, FPINST, FPINST2 + tst r0, #FPEXC_EX @ is there additional state to restore? beq 1f - VFPFMXR FPINST, r6 @ restore FPINST (only if FPEXC.EX is set) - tst r1, #FPEXC_FP2V @ is there an FPINST2 to write? + VFPFMXR FPINST, r2 @ restore FPINST (only if FPEXC.EX is set) + tst r0, #FPEXC_FP2V @ is there an FPINST2 to write? beq 1f - VFPFMXR FPINST2, r8 @ FPINST2 if needed (and present) + VFPFMXR FPINST2, r3 @ FPINST2 if needed (and present) 1: - VFPFMXR FPSCR, r5 @ restore status - -@ The context stored in the VFP hardware is up to date with this thread -vfp_hw_state_valid: - tst r1, #FPEXC_EX - bne process_exception @ might as well handle the pending - @ exception before retrying branch - @ out before setting an FPEXC that - @ stops us reading stuff - VFPFMXR FPEXC, r1 @ Restore FPEXC last - mov sp, r3 @ we think we have handled things - pop {lr} - sub r2, r2, #4 @ Retry current instruction - if Thumb - str r2, [sp, #S_PC] @ mode it's two 16-bit instructions, - @ else it's one 32-bit instruction, so - @ always subtract 4 from the following - @ instruction address. - -local_bh_enable_and_ret: - adr r0, . - mov r1, #SOFTIRQ_DISABLE_OFFSET - b __local_bh_enable_ip @ tail call - -look_for_VFP_exceptions: - @ Check for synchronous or asynchronous exception - tst r1, #FPEXC_EX | FPEXC_DEX - bne process_exception - @ On some implementations of the VFP subarch 1, setting FPSCR.IXE - @ causes all the CDP instructions to be bounced synchronously without - @ setting the FPEXC.EX bit - VFPFMRX r5, FPSCR - tst r5, #FPSCR_IXE - bne process_exception - - tst r5, #FPSCR_LENGTH_MASK - beq skip - orr r1, r1, #FPEXC_DEX - b process_exception -skip: - - @ Fall into hand on to next handler - appropriate coproc instr - @ not recognised by VFP - - DBGSTR "not VFP" - b local_bh_enable_and_ret - -process_exception: - DBGSTR "bounce" - mov sp, r3 @ setup for a return to the user code. - pop {lr} - mov r2, sp @ nothing stacked - regdump is at TOS - - @ Now call the C code to package up the bounce to the support code - @ r0 holds the trigger instruction - @ r1 holds the FPEXC value - @ r2 pointer to register dump - b VFP_bounce @ we have handled this - the support - @ code will raise an exception if - @ required. If not, the user code will - @ retry the faulted instruction -ENDPROC(vfp_support_entry) + VFPFMXR FPSCR, r1 @ restore status + ret lr +ENDPROC(vfp_load_state) ENTRY(vfp_save_state) @ Save the current VFP state @@ -238,10 +66,6 @@ ENTRY(vfp_save_state) ret lr ENDPROC(vfp_save_state) - .align -vfp_current_hw_state_address: - .word vfp_current_hw_state - .macro tbl_branch, base, tmp, shift #ifdef CONFIG_THUMB2_KERNEL adr \tmp, 1f diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 95628e57807b..7572cb5b28a2 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -30,11 +30,6 @@ #include "vfpinstr.h" #include "vfp.h" -/* - * Our undef handlers (in entry.S) - */ -asmlinkage void vfp_support_entry(u32, void *, u32, u32); - static bool have_vfp __ro_after_init; /* @@ -325,7 +320,7 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) /* * Package up a bounce condition. */ -void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) +static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) { u32 fpscr, orig_fpscr, fpsid, exceptions; @@ -374,7 +369,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) * on VFP subarch 1. */ vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs); - goto exit; + return; } /* @@ -405,7 +400,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1. */ if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) - goto exit; + return; /* * The barrier() here prevents fpinst2 being read @@ -418,8 +413,6 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); if (exceptions) vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); - exit: - local_bh_enable(); } static void vfp_enable(void *unused) @@ -649,22 +642,112 @@ static int vfp_starting_cpu(unsigned int unused) } /* - * Entered with: + * vfp_support_entry - Handle VFP exception from user mode * - * r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) - * r1 = thread_info pointer - * r2 = PC value to resume execution after successful emulation - * r3 = normal "successful" return address - * lr = unrecognised instruction return address + * @regs: pt_regs structure holding the register state at exception entry + * @trigger: The opcode of the instruction that triggered the exception + * + * Returns 0 if the exception was handled, or an error code otherwise. */ -asmlinkage void vfp_entry(u32 trigger, struct thread_info *ti, u32 resume_pc, - u32 resume_return_address) +asmlinkage int vfp_support_entry(struct pt_regs *regs, u32 trigger) { + struct thread_info *ti = current_thread_info(); + u32 fpexc; + if (unlikely(!have_vfp)) - return; + return -ENODEV; local_bh_disable(); - vfp_support_entry(trigger, ti, resume_pc, resume_return_address); + fpexc = fmrx(FPEXC); + + /* + * If the VFP unit was not enabled yet, we have to check whether the + * VFP state in the CPU's registers is the most recent VFP state + * associated with the process. On UP systems, we don't save the VFP + * state eagerly on a context switch, so we may need to save the + * VFP state to memory first, as it may belong to another process. + */ + if (!(fpexc & FPEXC_EN)) { + /* + * Enable the VFP unit but mask the FP exception flag for the + * time being, so we can access all the registers. + */ + fpexc |= FPEXC_EN; + fmxr(FPEXC, fpexc & ~FPEXC_EX); + + /* + * Check whether or not the VFP state in the CPU's registers is + * the most recent VFP state associated with this task. On SMP, + * migration may result in multiple CPUs holding VFP states + * that belong to the same task, but only the most recent one + * is valid. + */ + if (!vfp_state_in_hw(ti->cpu, ti)) { + if (!IS_ENABLED(CONFIG_SMP) && + vfp_current_hw_state[ti->cpu] != NULL) { + /* + * This CPU is currently holding the most + * recent VFP state associated with another + * task, and we must save that to memory first. + */ + vfp_save_state(vfp_current_hw_state[ti->cpu], + fpexc); + } + + /* + * We can now proceed with loading the task's VFP state + * from memory into the CPU registers. + */ + fpexc = vfp_load_state(&ti->vfpstate); + vfp_current_hw_state[ti->cpu] = &ti->vfpstate; +#ifdef CONFIG_SMP + /* + * Record that this CPU is now the one holding the most + * recent VFP state of the task. + */ + ti->vfpstate.hard.cpu = ti->cpu; +#endif + } + + if (fpexc & FPEXC_EX) + /* + * Might as well handle the pending exception before + * retrying branch out before setting an FPEXC that + * stops us reading stuff. + */ + goto bounce; + + /* + * No FP exception is pending: just enable the VFP and + * replay the instruction that trapped. + */ + fmxr(FPEXC, fpexc); + regs->ARM_pc -= 4; + } else { + /* Check for synchronous or asynchronous exceptions */ + if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) { + u32 fpscr = fmrx(FPSCR); + + /* + * On some implementations of the VFP subarch 1, + * setting FPSCR.IXE causes all the CDP instructions to + * be bounced synchronously without setting the + * FPEXC.EX bit + */ + if (!(fpscr & FPSCR_IXE)) { + if (!(fpscr & FPSCR_LENGTH_MASK)) { + pr_debug("not VFP\n"); + local_bh_enable(); + return -ENOEXEC; + } + fpexc |= FPEXC_DEX; + } + } +bounce: VFP_bounce(trigger, fpexc, regs); + } + + local_bh_enable(); + return 0; } #ifdef CONFIG_KERNEL_MODE_NEON -- cgit