summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/nmi.c6
-rw-r--r--arch/x86/kernel/reboot.c20
-rw-r--r--include/linux/kernel.h16
-rw-r--r--kernel/panic.c9
-rw-r--r--kernel/watchdog.c2
5 files changed, 45 insertions, 8 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index fca87938d739..424aec4a4c71 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -231,7 +231,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
#endif
if (panic_on_unrecovered_nmi)
- nmi_panic("NMI: Not continuing");
+ nmi_panic(regs, "NMI: Not continuing");
pr_emerg("Dazed and confused, but trying to continue\n");
@@ -256,7 +256,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
show_regs(regs);
if (panic_on_io_nmi) {
- nmi_panic("NMI IOCK error: Not continuing");
+ nmi_panic(regs, "NMI IOCK error: Not continuing");
/*
* If we end up here, it means we have received an NMI while
@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
pr_emerg("Do you have a strange power saving mode enabled?\n");
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
- nmi_panic("NMI: Not continuing");
+ nmi_panic(regs, "NMI: Not continuing");
pr_emerg("Dazed and confused, but trying to continue\n");
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 02693dd9a079..1da13022d544 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -718,6 +718,7 @@ static int crashing_cpu;
static nmi_shootdown_cb shootdown_callback;
static atomic_t waiting_for_crash_ipi;
+static int crash_ipi_issued;
static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
{
@@ -780,6 +781,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
smp_send_nmi_allbutself();
+ /* Kick CPUs looping in NMI context. */
+ WRITE_ONCE(crash_ipi_issued, 1);
+
msecs = 1000; /* Wait at most a second for the other cpus to stop */
while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
mdelay(1);
@@ -788,6 +792,22 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* Leave the nmi callback set */
}
+
+/* Override the weak function in kernel/panic.c */
+void nmi_panic_self_stop(struct pt_regs *regs)
+{
+ while (1) {
+ /*
+ * Wait for the crash dumping IPI to be issued, and then
+ * call its callback directly.
+ */
+ if (READ_ONCE(crash_ipi_issued))
+ crash_nmi_callback(0, regs); /* Don't return */
+
+ cpu_relax();
+ }
+}
+
#else /* !CONFIG_SMP */
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 750cc5c7c999..7311c3294e25 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -255,6 +255,7 @@ extern long (*panic_blink)(int state);
__printf(1, 2)
void panic(const char *fmt, ...)
__noreturn __cold;
+void nmi_panic_self_stop(struct pt_regs *);
extern void oops_enter(void);
extern void oops_exit(void);
void print_oops_end_marker(void);
@@ -455,14 +456,21 @@ extern atomic_t panic_cpu;
/*
* A variant of panic() called from NMI context. We return if we've already
- * panicked on this CPU.
+ * panicked on this CPU. If another CPU already panicked, loop in
+ * nmi_panic_self_stop() which can provide architecture dependent code such
+ * as saving register state for crash dump.
*/
-#define nmi_panic(fmt, ...) \
+#define nmi_panic(regs, fmt, ...) \
do { \
- int cpu = raw_smp_processor_id(); \
+ int old_cpu, cpu; \
\
- if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu) \
+ cpu = raw_smp_processor_id(); \
+ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); \
+ \
+ if (old_cpu == PANIC_CPU_INVALID) \
panic(fmt, ##__VA_ARGS__); \
+ else if (old_cpu != cpu) \
+ nmi_panic_self_stop(regs); \
} while (0)
/*
diff --git a/kernel/panic.c b/kernel/panic.c
index 3344524cf6ff..06f31b49b3b4 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -61,6 +61,15 @@ void __weak panic_smp_self_stop(void)
cpu_relax();
}
+/*
+ * Stop ourselves in NMI context if another CPU has already panicked. Arch code
+ * may override this to prepare for crash dumping, e.g. save regs info.
+ */
+void __weak nmi_panic_self_stop(struct pt_regs *regs)
+{
+ panic_smp_self_stop();
+}
+
atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
/**
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b9be18fae154..84b5035cb6a5 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -351,7 +351,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
trigger_allbutself_cpu_backtrace();
if (hardlockup_panic)
- nmi_panic("Hard LOCKUP");
+ nmi_panic(regs, "Hard LOCKUP");
__this_cpu_write(hard_watchdog_warn, true);
return;