summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/interrupt.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/interrupt.c')
-rw-r--r--arch/powerpc/kernel/interrupt.c209
1 files changed, 42 insertions, 167 deletions
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 21bbd615ca41..aea6f7e8e9c6 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -3,12 +3,11 @@
#include <linux/context_tracking.h>
#include <linux/err.h>
#include <linux/compat.h>
+#include <linux/rseq.h>
#include <linux/sched/debug.h> /* for show_regs */
-#include <asm/asm-prototypes.h>
#include <asm/kup.h>
#include <asm/cputime.h>
-#include <asm/interrupt.h>
#include <asm/hw_irq.h>
#include <asm/interrupt.h>
#include <asm/kprobes.h>
@@ -19,13 +18,16 @@
#include <asm/switch_to.h>
#include <asm/syscall.h>
#include <asm/time.h>
+#include <asm/tm.h>
#include <asm/unistd.h>
#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32)
unsigned long global_dbcr0[NR_CPUS];
#endif
-typedef long (*syscall_fn)(long, long, long, long, long, long);
+#if defined(CONFIG_PREEMPT_DYNAMIC)
+DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#endif
#ifdef CONFIG_PPC_BOOK3S_64
DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
@@ -36,7 +38,7 @@ static inline bool exit_must_hard_disable(void)
#else
static inline bool exit_must_hard_disable(void)
{
- return true;
+ return false;
}
#endif
@@ -53,16 +55,18 @@ static inline bool exit_must_hard_disable(void)
*/
static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
{
+ bool must_hard_disable = (exit_must_hard_disable() || !restartable);
+
/* This must be done with RI=1 because tracing may touch vmaps */
trace_hardirqs_on();
- if (exit_must_hard_disable() || !restartable)
+ if (must_hard_disable)
__hard_EE_RI_disable();
#ifdef CONFIG_PPC64
/* This pattern matches prep_irq_for_idle */
if (unlikely(lazy_irq_pending_nocheck())) {
- if (exit_must_hard_disable() || !restartable) {
+ if (must_hard_disable) {
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
__hard_RI_enable();
}
@@ -74,124 +78,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
return true;
}
-/* Has to run notrace because it is entered not completely "reconciled" */
-notrace long system_call_exception(long r3, long r4, long r5,
- long r6, long r7, long r8,
- unsigned long r0, struct pt_regs *regs)
-{
- syscall_fn f;
-
- kuep_lock();
-
- regs->orig_gpr3 = r3;
-
- if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
- BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
-
- trace_hardirqs_off(); /* finish reconciling */
-
- CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
- user_exit_irqoff();
-
- if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
- BUG_ON(!(regs->msr & MSR_RI));
- BUG_ON(!(regs->msr & MSR_PR));
- BUG_ON(arch_irq_disabled_regs(regs));
-
-#ifdef CONFIG_PPC_PKEY
- if (mmu_has_feature(MMU_FTR_PKEY)) {
- unsigned long amr, iamr;
- bool flush_needed = false;
- /*
- * When entering from userspace we mostly have the AMR/IAMR
- * different from kernel default values. Hence don't compare.
- */
- amr = mfspr(SPRN_AMR);
- iamr = mfspr(SPRN_IAMR);
- regs->amr = amr;
- regs->iamr = iamr;
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
- mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
- flush_needed = true;
- }
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
- mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
- flush_needed = true;
- }
- if (flush_needed)
- isync();
- } else
-#endif
- kuap_assert_locked();
-
- booke_restore_dbcr0();
-
- account_cpu_user_entry();
-
- account_stolen_time();
-
- /*
- * This is not required for the syscall exit path, but makes the
- * stack frame look nicer. If this was initialised in the first stack
- * frame, or if the unwinder was taught the first stack frame always
- * returns to user with IRQS_ENABLED, this store could be avoided!
- */
- irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
-
- local_irq_enable();
-
- if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
- if (unlikely(trap_is_unsupported_scv(regs))) {
- /* Unsupported scv vector */
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- return regs->gpr[3];
- }
- /*
- * We use the return value of do_syscall_trace_enter() as the
- * syscall number. If the syscall was rejected for any reason
- * do_syscall_trace_enter() returns an invalid syscall number
- * and the test against NR_syscalls will fail and the return
- * value to be used is in regs->gpr[3].
- */
- r0 = do_syscall_trace_enter(regs);
- if (unlikely(r0 >= NR_syscalls))
- return regs->gpr[3];
- r3 = regs->gpr[3];
- r4 = regs->gpr[4];
- r5 = regs->gpr[5];
- r6 = regs->gpr[6];
- r7 = regs->gpr[7];
- r8 = regs->gpr[8];
-
- } else if (unlikely(r0 >= NR_syscalls)) {
- if (unlikely(trap_is_unsupported_scv(regs))) {
- /* Unsupported scv vector */
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- return regs->gpr[3];
- }
- return -ENOSYS;
- }
-
- /* May be faster to do array_index_nospec? */
- barrier_nospec();
-
- if (unlikely(is_compat_task())) {
- f = (void *)compat_sys_call_table[r0];
-
- r3 &= 0x00000000ffffffffULL;
- r4 &= 0x00000000ffffffffULL;
- r5 &= 0x00000000ffffffffULL;
- r6 &= 0x00000000ffffffffULL;
- r7 &= 0x00000000ffffffffULL;
- r8 &= 0x00000000ffffffffULL;
-
- } else {
- f = (void *)sys_call_table[r0];
- }
-
- return f(r3, r4, r5, r6, r7, r8);
-}
-
static notrace void booke_load_dbcr0(void)
{
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -214,7 +100,7 @@ static notrace void booke_load_dbcr0(void)
#endif
}
-static void check_return_regs_valid(struct pt_regs *regs)
+static notrace void check_return_regs_valid(struct pt_regs *regs)
{
#ifdef CONFIG_PPC_BOOK3S_64
unsigned long trap, srr0, srr1;
@@ -225,7 +111,7 @@ static void check_return_regs_valid(struct pt_regs *regs)
if (trap_is_scv(regs))
return;
- trap = regs->trap;
+ trap = TRAP(regs);
// EE in HV mode sets HSRRs like 0xea0
if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
trap = 0xea0;
@@ -244,7 +130,7 @@ static void check_return_regs_valid(struct pt_regs *regs)
case 0x1600:
case 0x1800:
validp = &local_paca->hsrr_valid;
- if (!*validp)
+ if (!READ_ONCE(*validp))
return;
srr0 = mfspr(SPRN_HSRR0);
@@ -254,7 +140,7 @@ static void check_return_regs_valid(struct pt_regs *regs)
break;
default:
validp = &local_paca->srr_valid;
- if (!*validp)
+ if (!READ_ONCE(*validp))
return;
srr0 = mfspr(SPRN_SRR0);
@@ -280,19 +166,17 @@ static void check_return_regs_valid(struct pt_regs *regs)
* such things will get caught most of the time, statistically
* enough to be able to get a warning out.
*/
- barrier();
-
- if (!*validp)
+ if (!READ_ONCE(*validp))
return;
- if (!warned) {
- warned = true;
+ if (!data_race(warned)) {
+ data_race(warned = true);
printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
show_regs(regs);
}
- *validp = 0; /* fixup */
+ WRITE_ONCE(*validp, 0); /* fixup */
#endif
}
@@ -302,10 +186,10 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
unsigned long ti_flags;
again:
- ti_flags = READ_ONCE(current_thread_info()->flags);
+ ti_flags = read_thread_flags();
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
local_irq_enable();
- if (ti_flags & _TIF_NEED_RESCHED) {
+ if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {
schedule();
} else {
/*
@@ -318,7 +202,7 @@ again:
do_notify_resume(regs, ti_flags);
}
local_irq_disable();
- ti_flags = READ_ONCE(current_thread_info()->flags);
+ ti_flags = read_thread_flags();
}
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
@@ -365,7 +249,6 @@ again:
/* Restore user access locks last */
kuap_user_restore(regs);
- kuep_unlock();
return ret;
}
@@ -387,7 +270,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
unsigned long ret = 0;
bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
- CT_WARN_ON(ct_state() == CONTEXT_USER);
+ CT_WARN_ON(ct_state() == CT_STATE_USER);
kuap_assert_locked();
@@ -396,7 +279,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
/* Check whether the syscall is issued inside a restartable sequence */
rseq_syscall(regs);
- ti_flags = current_thread_info()->flags;
+ ti_flags = read_thread_flags();
if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
@@ -463,11 +346,9 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
{
unsigned long ret;
- if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
- BUG_ON(!(regs->msr & MSR_RI));
- BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(regs_is_unrecoverable(regs));
BUG_ON(arch_irq_disabled_regs(regs));
- CT_WARN_ON(ct_state() == CONTEXT_USER);
+ CT_WARN_ON(ct_state() == CT_STATE_USER);
/*
* We don't need to restore AMR on the way back to userspace for KUAP.
@@ -490,34 +371,38 @@ void preempt_schedule_irq(void);
notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
{
- unsigned long flags;
unsigned long ret = 0;
unsigned long kuap;
- bool stack_store = current_thread_info()->flags &
- _TIF_EMULATE_STACK_STORE;
+ bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE;
- if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
- unlikely(!(regs->msr & MSR_RI)))
+ if (regs_is_unrecoverable(regs))
unrecoverable_exception(regs);
- BUG_ON(regs->msr & MSR_PR);
/*
- * CT_WARN_ON comes here via program_check_exception,
- * so avoid recursion.
+ * CT_WARN_ON comes here via program_check_exception, so avoid
+ * recursion.
+ *
+ * Skip the assertion on PMIs on 64e to work around a problem caused
+ * by NMI PMIs incorrectly taking this interrupt return path, it's
+ * possible for this to hit after interrupt exit to user switches
+ * context to user. See also the comment in the performance monitor
+ * handler in exceptions-64e.S
*/
- if (TRAP(regs) != INTERRUPT_PROGRAM)
- CT_WARN_ON(ct_state() == CONTEXT_USER);
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
+ TRAP(regs) != INTERRUPT_PROGRAM &&
+ TRAP(regs) != INTERRUPT_PERFMON)
+ CT_WARN_ON(ct_state() == CT_STATE_USER);
kuap = kuap_get_and_assert_locked();
- local_irq_save(flags);
+ local_irq_disable();
if (!arch_irq_disabled_regs(regs)) {
/* Returning to a kernel context with local irqs enabled. */
WARN_ON_ONCE(!(regs->msr & MSR_EE));
again:
- if (IS_ENABLED(CONFIG_PREEMPT)) {
+ if (need_irq_preemption()) {
/* Return to preemptible kernel context */
- if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) {
+ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
if (preempt_count() == 0)
preempt_schedule_irq();
}
@@ -558,16 +443,6 @@ again:
if (unlikely(stack_store))
__hard_EE_RI_disable();
- /*
- * Returning to a kernel context with local irqs disabled.
- * Here, if EE was enabled in the interrupted context, enable
- * it on return as well. A problem exists here where a soft
- * masked interrupt may have cleared MSR[EE] and set HARD_DIS
- * here, and it will still exist on return to the caller. This
- * will be resolved by the masked interrupt firing again.
- */
- if (regs->msr & MSR_EE)
- local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
#endif /* CONFIG_PPC64 */
}