From 05c6257433b7212f07a7e53479a8ab038fc1666a Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 30 Jun 2022 23:32:58 +0100 Subject: panic, kexec: make __crash_kexec() NMI safe Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider Cc: Arnd Bergmann Cc: Baoquan He Cc: "Eric W . Biederman" Cc: Juri Lelli Cc: Luis Claudio R. Goncalves Cc: Miaohe Lin Cc: Petr Mladek Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- kernel/kexec_core.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel/kexec_core.c') diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 8fa4eeb95b30..5ca4d40c9ec1 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -46,7 +46,7 @@ #include #include "kexec_internal.h" -DEFINE_MUTEX(kexec_mutex); +atomic_t __kexec_lock = ATOMIC_INIT(0); /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t __percpu *crash_notes; @@ -959,7 +959,7 @@ late_initcall(kexec_core_sysctl_init); */ void __noclone __crash_kexec(struct pt_regs *regs) { - /* Take the kexec_mutex here to prevent sys_kexec_load + /* Take the kexec_lock here to prevent sys_kexec_load * running on one cpu from replacing the crash kernel * we are using after a panic on a different cpu. * @@ -967,7 +967,7 @@ void __noclone __crash_kexec(struct pt_regs *regs) * of memory the xchg(&kexec_crash_image) would be * sufficient. But since I reuse the memory... */ - if (mutex_trylock(&kexec_mutex)) { + if (kexec_trylock()) { if (kexec_crash_image) { struct pt_regs fixed_regs; @@ -976,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs) machine_crash_shutdown(&fixed_regs); machine_kexec(kexec_crash_image); } - mutex_unlock(&kexec_mutex); + kexec_unlock(); } } STACK_FRAME_NON_STANDARD(__crash_kexec); @@ -1008,13 +1008,13 @@ ssize_t crash_get_memory_size(void) { ssize_t size = 0; - if (!mutex_trylock(&kexec_mutex)) + if (!kexec_trylock()) return -EBUSY; if (crashk_res.end != crashk_res.start) size = resource_size(&crashk_res); - mutex_unlock(&kexec_mutex); + kexec_unlock(); return size; } @@ -1025,7 +1025,7 @@ int crash_shrink_memory(unsigned long new_size) unsigned long old_size; struct resource *ram_res; - if (!mutex_trylock(&kexec_mutex)) + if (!kexec_trylock()) return -EBUSY; if (kexec_crash_image) { @@ -1064,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size) insert_resource(&iomem_resource, ram_res); unlock: - mutex_unlock(&kexec_mutex); + kexec_unlock(); return ret; } @@ -1136,7 +1136,7 @@ int kernel_kexec(void) { int error = 0; - if (!mutex_trylock(&kexec_mutex)) + if (!kexec_trylock()) return -EBUSY; if (!kexec_image) { error = -EINVAL; @@ -1212,6 +1212,6 @@ int kernel_kexec(void) #endif Unlock: - mutex_unlock(&kexec_mutex); + kexec_unlock(); return error; } -- cgit