diff options
Diffstat (limited to 'arch/powerpc/kernel/interrupt.c')
| -rw-r--r-- | arch/powerpc/kernel/interrupt.c | 209 |
1 files changed, 42 insertions, 167 deletions
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 21bbd615ca41..aea6f7e8e9c6 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -3,12 +3,11 @@ #include <linux/context_tracking.h> #include <linux/err.h> #include <linux/compat.h> +#include <linux/rseq.h> #include <linux/sched/debug.h> /* for show_regs */ -#include <asm/asm-prototypes.h> #include <asm/kup.h> #include <asm/cputime.h> -#include <asm/interrupt.h> #include <asm/hw_irq.h> #include <asm/interrupt.h> #include <asm/kprobes.h> @@ -19,13 +18,16 @@ #include <asm/switch_to.h> #include <asm/syscall.h> #include <asm/time.h> +#include <asm/tm.h> #include <asm/unistd.h> #if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32) unsigned long global_dbcr0[NR_CPUS]; #endif -typedef long (*syscall_fn)(long, long, long, long, long, long); +#if defined(CONFIG_PREEMPT_DYNAMIC) +DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +#endif #ifdef CONFIG_PPC_BOOK3S_64 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); @@ -36,7 +38,7 @@ static inline bool exit_must_hard_disable(void) #else static inline bool exit_must_hard_disable(void) { - return true; + return false; } #endif @@ -53,16 +55,18 @@ static inline bool exit_must_hard_disable(void) */ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) { + bool must_hard_disable = (exit_must_hard_disable() || !restartable); + /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); - if (exit_must_hard_disable() || !restartable) + if (must_hard_disable) __hard_EE_RI_disable(); #ifdef CONFIG_PPC64 /* This pattern matches prep_irq_for_idle */ if (unlikely(lazy_irq_pending_nocheck())) { - if (exit_must_hard_disable() || !restartable) { + if (must_hard_disable) { local_paca->irq_happened |= PACA_IRQ_HARD_DIS; __hard_RI_enable(); } @@ -74,124 +78,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) return true; } -/* Has to run notrace because it is entered not completely "reconciled" */ -notrace long system_call_exception(long r3, long r4, long r5, - long r6, long r7, long r8, - unsigned long r0, struct pt_regs *regs) -{ - syscall_fn f; - - kuep_lock(); - - regs->orig_gpr3 = r3; - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); - - trace_hardirqs_off(); /* finish reconciling */ - - CT_WARN_ON(ct_state() == CONTEXT_KERNEL); - user_exit_irqoff(); - - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) - BUG_ON(!(regs->msr & MSR_RI)); - BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(arch_irq_disabled_regs(regs)); - -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_PKEY)) { - unsigned long amr, iamr; - bool flush_needed = false; - /* - * When entering from userspace we mostly have the AMR/IAMR - * different from kernel default values. Hence don't compare. - */ - amr = mfspr(SPRN_AMR); - iamr = mfspr(SPRN_IAMR); - regs->amr = amr; - regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - flush_needed = true; - } - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { - mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); - flush_needed = true; - } - if (flush_needed) - isync(); - } else -#endif - kuap_assert_locked(); - - booke_restore_dbcr0(); - - account_cpu_user_entry(); - - account_stolen_time(); - - /* - * This is not required for the syscall exit path, but makes the - * stack frame look nicer. If this was initialised in the first stack - * frame, or if the unwinder was taught the first stack frame always - * returns to user with IRQS_ENABLED, this store could be avoided! - */ - irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); - - local_irq_enable(); - - if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - /* - * We use the return value of do_syscall_trace_enter() as the - * syscall number. If the syscall was rejected for any reason - * do_syscall_trace_enter() returns an invalid syscall number - * and the test against NR_syscalls will fail and the return - * value to be used is in regs->gpr[3]. - */ - r0 = do_syscall_trace_enter(regs); - if (unlikely(r0 >= NR_syscalls)) - return regs->gpr[3]; - r3 = regs->gpr[3]; - r4 = regs->gpr[4]; - r5 = regs->gpr[5]; - r6 = regs->gpr[6]; - r7 = regs->gpr[7]; - r8 = regs->gpr[8]; - - } else if (unlikely(r0 >= NR_syscalls)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - return -ENOSYS; - } - - /* May be faster to do array_index_nospec? */ - barrier_nospec(); - - if (unlikely(is_compat_task())) { - f = (void *)compat_sys_call_table[r0]; - - r3 &= 0x00000000ffffffffULL; - r4 &= 0x00000000ffffffffULL; - r5 &= 0x00000000ffffffffULL; - r6 &= 0x00000000ffffffffULL; - r7 &= 0x00000000ffffffffULL; - r8 &= 0x00000000ffffffffULL; - - } else { - f = (void *)sys_call_table[r0]; - } - - return f(r3, r4, r5, r6, r7, r8); -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -214,7 +100,7 @@ static notrace void booke_load_dbcr0(void) #endif } -static void check_return_regs_valid(struct pt_regs *regs) +static notrace void check_return_regs_valid(struct pt_regs *regs) { #ifdef CONFIG_PPC_BOOK3S_64 unsigned long trap, srr0, srr1; @@ -225,7 +111,7 @@ static void check_return_regs_valid(struct pt_regs *regs) if (trap_is_scv(regs)) return; - trap = regs->trap; + trap = TRAP(regs); // EE in HV mode sets HSRRs like 0xea0 if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL) trap = 0xea0; @@ -244,7 +130,7 @@ static void check_return_regs_valid(struct pt_regs *regs) case 0x1600: case 0x1800: validp = &local_paca->hsrr_valid; - if (!*validp) + if (!READ_ONCE(*validp)) return; srr0 = mfspr(SPRN_HSRR0); @@ -254,7 +140,7 @@ static void check_return_regs_valid(struct pt_regs *regs) break; default: validp = &local_paca->srr_valid; - if (!*validp) + if (!READ_ONCE(*validp)) return; srr0 = mfspr(SPRN_SRR0); @@ -280,19 +166,17 @@ static void check_return_regs_valid(struct pt_regs *regs) * such things will get caught most of the time, statistically * enough to be able to get a warning out. */ - barrier(); - - if (!*validp) + if (!READ_ONCE(*validp)) return; - if (!warned) { - warned = true; + if (!data_race(warned)) { + data_race(warned = true); printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); show_regs(regs); } - *validp = 0; /* fixup */ + WRITE_ONCE(*validp, 0); /* fixup */ #endif } @@ -302,10 +186,10 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) unsigned long ti_flags; again: - ti_flags = READ_ONCE(current_thread_info()->flags); + ti_flags = read_thread_flags(); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); - if (ti_flags & _TIF_NEED_RESCHED) { + if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) { schedule(); } else { /* @@ -318,7 +202,7 @@ again: do_notify_resume(regs, ti_flags); } local_irq_disable(); - ti_flags = READ_ONCE(current_thread_info()->flags); + ti_flags = read_thread_flags(); } if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { @@ -365,7 +249,6 @@ again: /* Restore user access locks last */ kuap_user_restore(regs); - kuep_unlock(); return ret; } @@ -387,7 +270,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, unsigned long ret = 0; bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); kuap_assert_locked(); @@ -396,7 +279,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, /* Check whether the syscall is issued inside a restartable sequence */ rseq_syscall(regs); - ti_flags = current_thread_info()->flags; + ti_flags = read_thread_flags(); if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { @@ -463,11 +346,9 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) { unsigned long ret; - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) - BUG_ON(!(regs->msr & MSR_RI)); - BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(regs_is_unrecoverable(regs)); BUG_ON(arch_irq_disabled_regs(regs)); - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); /* * We don't need to restore AMR on the way back to userspace for KUAP. @@ -490,34 +371,38 @@ void preempt_schedule_irq(void); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) { - unsigned long flags; unsigned long ret = 0; unsigned long kuap; - bool stack_store = current_thread_info()->flags & - _TIF_EMULATE_STACK_STORE; + bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE; - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && - unlikely(!(regs->msr & MSR_RI))) + if (regs_is_unrecoverable(regs)) unrecoverable_exception(regs); - BUG_ON(regs->msr & MSR_PR); /* - * CT_WARN_ON comes here via program_check_exception, - * so avoid recursion. + * CT_WARN_ON comes here via program_check_exception, so avoid + * recursion. + * + * Skip the assertion on PMIs on 64e to work around a problem caused + * by NMI PMIs incorrectly taking this interrupt return path, it's + * possible for this to hit after interrupt exit to user switches + * context to user. See also the comment in the performance monitor + * handler in exceptions-64e.S */ - if (TRAP(regs) != INTERRUPT_PROGRAM) - CT_WARN_ON(ct_state() == CONTEXT_USER); + if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) && + TRAP(regs) != INTERRUPT_PROGRAM && + TRAP(regs) != INTERRUPT_PERFMON) + CT_WARN_ON(ct_state() == CT_STATE_USER); kuap = kuap_get_and_assert_locked(); - local_irq_save(flags); + local_irq_disable(); if (!arch_irq_disabled_regs(regs)) { /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: - if (IS_ENABLED(CONFIG_PREEMPT)) { + if (need_irq_preemption()) { /* Return to preemptible kernel context */ - if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { + if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) preempt_schedule_irq(); } @@ -558,16 +443,6 @@ again: if (unlikely(stack_store)) __hard_EE_RI_disable(); - /* - * Returning to a kernel context with local irqs disabled. - * Here, if EE was enabled in the interrupted context, enable - * it on return as well. A problem exists here where a soft - * masked interrupt may have cleared MSR[EE] and set HARD_DIS - * here, and it will still exist on return to the caller. This - * will be resolved by the masked interrupt firing again. - */ - if (regs->msr & MSR_EE) - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; #endif /* CONFIG_PPC64 */ } |
