From bd66a89249892acc9d938ba4956066b21403fa5f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Dec 2016 23:16:04 +0900 Subject: printk: use vprintk_func in vprintk() vprintk(), just like printk(), better be using per-cpu printk_func instead of direct vprintk_emit() call. Just in case if vprintk() will ever be called from NMI, or from any other context that can deadlock in printk(). Link: http://lkml.kernel.org/r/20161227141611.940-2-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Linus Torvalds Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Reviewed-by: Steven Rostedt Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8b2696420abb..106843a83b63 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1803,7 +1803,7 @@ EXPORT_SYMBOL(vprintk_emit); asmlinkage int vprintk(const char *fmt, va_list args) { - return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); + return vprintk_func(fmt, args); } EXPORT_SYMBOL(vprintk); -- cgit From f92bac3b141b8233e34ddf32d227e12bfba07b48 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Dec 2016 23:16:05 +0900 Subject: printk: rename nmi.c and exported api A preparation patch for printk_safe work. No functional change. - rename nmi.c to print_safe.c - add `printk_safe' prefix to some (which used both by printk-safe and printk-nmi) of the exported functions. Link: http://lkml.kernel.org/r/20161227141611.940-3-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Linus Torvalds Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Steven Rostedt Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/Makefile | 2 +- kernel/printk/nmi.c | 291 -------------------------------------------- kernel/printk/printk_safe.c | 291 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 292 insertions(+), 292 deletions(-) delete mode 100644 kernel/printk/nmi.c create mode 100644 kernel/printk/printk_safe.c (limited to 'kernel/printk') diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index abb0042a427b..607928119f26 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,3 +1,3 @@ obj-y = printk.o -obj-$(CONFIG_PRINTK_NMI) += nmi.o +obj-$(CONFIG_PRINTK_NMI) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c deleted file mode 100644 index f011aaef583c..000000000000 --- a/kernel/printk/nmi.c +++ /dev/null @@ -1,291 +0,0 @@ -/* - * nmi.c - Safe printk in NMI context - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "internal.h" - -/* - * printk() could not take logbuf_lock in NMI context. Instead, - * it uses an alternative implementation that temporary stores - * the strings into a per-CPU buffer. The content of the buffer - * is later flushed into the main ring buffer via IRQ work. - * - * The alternative implementation is chosen transparently - * via @printk_func per-CPU variable. - * - * The implementation allows to flush the strings also from another CPU. - * There are situations when we want to make sure that all buffers - * were handled or when IRQs are blocked. - */ -DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; -static int printk_nmi_irq_ready; -atomic_t nmi_message_lost; - -#define NMI_LOG_BUF_LEN ((1 << CONFIG_NMI_LOG_BUF_SHIFT) - \ - sizeof(atomic_t) - sizeof(struct irq_work)) - -struct nmi_seq_buf { - atomic_t len; /* length of written data */ - struct irq_work work; /* IRQ work that flushes the buffer */ - unsigned char buffer[NMI_LOG_BUF_LEN]; -}; -static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); - -/* - * Safe printk() for NMI context. It uses a per-CPU buffer to - * store the message. NMIs are not nested, so there is always only - * one writer running. But the buffer might get flushed from another - * CPU, so we need to be careful. - */ -static int vprintk_nmi(const char *fmt, va_list args) -{ - struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - int add = 0; - size_t len; - -again: - len = atomic_read(&s->len); - - /* The trailing '\0' is not counted into len. */ - if (len >= sizeof(s->buffer) - 1) { - atomic_inc(&nmi_message_lost); - return 0; - } - - /* - * Make sure that all old data have been read before the buffer was - * reseted. This is not needed when we just append data. - */ - if (!len) - smp_rmb(); - - add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); - - /* - * Do it once again if the buffer has been flushed in the meantime. - * Note that atomic_cmpxchg() is an implicit memory barrier that - * makes sure that the data were written before updating s->len. - */ - if (atomic_cmpxchg(&s->len, len, len + add) != len) - goto again; - - /* Get flushed in a more safe context. */ - if (add && printk_nmi_irq_ready) { - /* Make sure that IRQ work is really initialized. */ - smp_rmb(); - irq_work_queue(&s->work); - } - - return add; -} - -static void printk_nmi_flush_line(const char *text, int len) -{ - /* - * The buffers are flushed in NMI only on panic. The messages must - * go only into the ring buffer at this stage. Consoles will get - * explicitly called later when a crashdump is not generated. - */ - if (in_nmi()) - printk_deferred("%.*s", len, text); - else - printk("%.*s", len, text); - -} - -/* printk part of the temporary buffer line by line */ -static int printk_nmi_flush_buffer(const char *start, size_t len) -{ - const char *c, *end; - bool header; - - c = start; - end = start + len; - header = true; - - /* Print line by line. */ - while (c < end) { - if (*c == '\n') { - printk_nmi_flush_line(start, c - start + 1); - start = ++c; - header = true; - continue; - } - - /* Handle continuous lines or missing new line. */ - if ((c + 1 < end) && printk_get_level(c)) { - if (header) { - c = printk_skip_level(c); - continue; - } - - printk_nmi_flush_line(start, c - start); - start = c++; - header = true; - continue; - } - - header = false; - c++; - } - - /* Check if there was a partial line. Ignore pure header. */ - if (start < end && !header) { - static const char newline[] = KERN_CONT "\n"; - - printk_nmi_flush_line(start, end - start); - printk_nmi_flush_line(newline, strlen(newline)); - } - - return len; -} - -/* - * Flush data from the associated per_CPU buffer. The function - * can be called either via IRQ work or independently. - */ -static void __printk_nmi_flush(struct irq_work *work) -{ - static raw_spinlock_t read_lock = - __RAW_SPIN_LOCK_INITIALIZER(read_lock); - struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work); - unsigned long flags; - size_t len; - int i; - - /* - * The lock has two functions. First, one reader has to flush all - * available message to make the lockless synchronization with - * writers easier. Second, we do not want to mix messages from - * different CPUs. This is especially important when printing - * a backtrace. - */ - raw_spin_lock_irqsave(&read_lock, flags); - - i = 0; -more: - len = atomic_read(&s->len); - - /* - * This is just a paranoid check that nobody has manipulated - * the buffer an unexpected way. If we printed something then - * @len must only increase. Also it should never overflow the - * buffer size. - */ - if ((i && i >= len) || len > sizeof(s->buffer)) { - const char *msg = "printk_nmi_flush: internal error\n"; - - printk_nmi_flush_line(msg, strlen(msg)); - len = 0; - } - - if (!len) - goto out; /* Someone else has already flushed the buffer. */ - - /* Make sure that data has been written up to the @len */ - smp_rmb(); - i += printk_nmi_flush_buffer(s->buffer + i, len - i); - - /* - * Check that nothing has got added in the meantime and truncate - * the buffer. Note that atomic_cmpxchg() is an implicit memory - * barrier that makes sure that the data were copied before - * updating s->len. - */ - if (atomic_cmpxchg(&s->len, len, 0) != len) - goto more; - -out: - raw_spin_unlock_irqrestore(&read_lock, flags); -} - -/** - * printk_nmi_flush - flush all per-cpu nmi buffers. - * - * The buffers are flushed automatically via IRQ work. This function - * is useful only when someone wants to be sure that all buffers have - * been flushed at some point. - */ -void printk_nmi_flush(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - __printk_nmi_flush(&per_cpu(nmi_print_seq, cpu).work); -} - -/** - * printk_nmi_flush_on_panic - flush all per-cpu nmi buffers when the system - * goes down. - * - * Similar to printk_nmi_flush() but it can be called even in NMI context when - * the system goes down. It does the best effort to get NMI messages into - * the main ring buffer. - * - * Note that it could try harder when there is only one CPU online. - */ -void printk_nmi_flush_on_panic(void) -{ - /* - * Make sure that we could access the main ring buffer. - * Do not risk a double release when more CPUs are up. - */ - if (in_nmi() && raw_spin_is_locked(&logbuf_lock)) { - if (num_online_cpus() > 1) - return; - - debug_locks_off(); - raw_spin_lock_init(&logbuf_lock); - } - - printk_nmi_flush(); -} - -void __init printk_nmi_init(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct nmi_seq_buf *s = &per_cpu(nmi_print_seq, cpu); - - init_irq_work(&s->work, __printk_nmi_flush); - } - - /* Make sure that IRQ works are initialized before enabling. */ - smp_wmb(); - printk_nmi_irq_ready = 1; - - /* Flush pending messages that did not have scheduled IRQ works. */ - printk_nmi_flush(); -} - -void printk_nmi_enter(void) -{ - this_cpu_write(printk_func, vprintk_nmi); -} - -void printk_nmi_exit(void) -{ - this_cpu_write(printk_func, vprintk_default); -} diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c new file mode 100644 index 000000000000..fc80359dcd78 --- /dev/null +++ b/kernel/printk/printk_safe.c @@ -0,0 +1,291 @@ +/* + * printk_safe.c - Safe printk in NMI context + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * printk() could not take logbuf_lock in NMI context. Instead, + * it uses an alternative implementation that temporary stores + * the strings into a per-CPU buffer. The content of the buffer + * is later flushed into the main ring buffer via IRQ work. + * + * The alternative implementation is chosen transparently + * via @printk_func per-CPU variable. + * + * The implementation allows to flush the strings also from another CPU. + * There are situations when we want to make sure that all buffers + * were handled or when IRQs are blocked. + */ +DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; +static int printk_safe_irq_ready; +atomic_t nmi_message_lost; + +#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ + sizeof(atomic_t) - sizeof(struct irq_work)) + +struct printk_safe_seq_buf { + atomic_t len; /* length of written data */ + struct irq_work work; /* IRQ work that flushes the buffer */ + unsigned char buffer[SAFE_LOG_BUF_LEN]; +}; +static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); + +/* + * Safe printk() for NMI context. It uses a per-CPU buffer to + * store the message. NMIs are not nested, so there is always only + * one writer running. But the buffer might get flushed from another + * CPU, so we need to be careful. + */ +static int vprintk_nmi(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + int add = 0; + size_t len; + +again: + len = atomic_read(&s->len); + + /* The trailing '\0' is not counted into len. */ + if (len >= sizeof(s->buffer) - 1) { + atomic_inc(&nmi_message_lost); + return 0; + } + + /* + * Make sure that all old data have been read before the buffer was + * reseted. This is not needed when we just append data. + */ + if (!len) + smp_rmb(); + + add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); + + /* + * Do it once again if the buffer has been flushed in the meantime. + * Note that atomic_cmpxchg() is an implicit memory barrier that + * makes sure that the data were written before updating s->len. + */ + if (atomic_cmpxchg(&s->len, len, len + add) != len) + goto again; + + /* Get flushed in a more safe context. */ + if (add && printk_safe_irq_ready) { + /* Make sure that IRQ work is really initialized. */ + smp_rmb(); + irq_work_queue(&s->work); + } + + return add; +} + +static void printk_safe_flush_line(const char *text, int len) +{ + /* + * The buffers are flushed in NMI only on panic. The messages must + * go only into the ring buffer at this stage. Consoles will get + * explicitly called later when a crashdump is not generated. + */ + if (in_nmi()) + printk_deferred("%.*s", len, text); + else + printk("%.*s", len, text); +} + +/* printk part of the temporary buffer line by line */ +static int printk_safe_flush_buffer(const char *start, size_t len) +{ + const char *c, *end; + bool header; + + c = start; + end = start + len; + header = true; + + /* Print line by line. */ + while (c < end) { + if (*c == '\n') { + printk_safe_flush_line(start, c - start + 1); + start = ++c; + header = true; + continue; + } + + /* Handle continuous lines or missing new line. */ + if ((c + 1 < end) && printk_get_level(c)) { + if (header) { + c = printk_skip_level(c); + continue; + } + + printk_safe_flush_line(start, c - start); + start = c++; + header = true; + continue; + } + + header = false; + c++; + } + + /* Check if there was a partial line. Ignore pure header. */ + if (start < end && !header) { + static const char newline[] = KERN_CONT "\n"; + + printk_safe_flush_line(start, end - start); + printk_safe_flush_line(newline, strlen(newline)); + } + + return len; +} + +/* + * Flush data from the associated per_CPU buffer. The function + * can be called either via IRQ work or independently. + */ +static void __printk_safe_flush(struct irq_work *work) +{ + static raw_spinlock_t read_lock = + __RAW_SPIN_LOCK_INITIALIZER(read_lock); + struct printk_safe_seq_buf *s = + container_of(work, struct printk_safe_seq_buf, work); + unsigned long flags; + size_t len; + int i; + + /* + * The lock has two functions. First, one reader has to flush all + * available message to make the lockless synchronization with + * writers easier. Second, we do not want to mix messages from + * different CPUs. This is especially important when printing + * a backtrace. + */ + raw_spin_lock_irqsave(&read_lock, flags); + + i = 0; +more: + len = atomic_read(&s->len); + + /* + * This is just a paranoid check that nobody has manipulated + * the buffer an unexpected way. If we printed something then + * @len must only increase. Also it should never overflow the + * buffer size. + */ + if ((i && i >= len) || len > sizeof(s->buffer)) { + const char *msg = "printk_safe_flush: internal error\n"; + + printk_safe_flush_line(msg, strlen(msg)); + len = 0; + } + + if (!len) + goto out; /* Someone else has already flushed the buffer. */ + + /* Make sure that data has been written up to the @len */ + smp_rmb(); + i += printk_safe_flush_buffer(s->buffer + i, len - i); + + /* + * Check that nothing has got added in the meantime and truncate + * the buffer. Note that atomic_cmpxchg() is an implicit memory + * barrier that makes sure that the data were copied before + * updating s->len. + */ + if (atomic_cmpxchg(&s->len, len, 0) != len) + goto more; + +out: + raw_spin_unlock_irqrestore(&read_lock, flags); +} + +/** + * printk_safe_flush - flush all per-cpu nmi buffers. + * + * The buffers are flushed automatically via IRQ work. This function + * is useful only when someone wants to be sure that all buffers have + * been flushed at some point. + */ +void printk_safe_flush(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); +} + +/** + * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system + * goes down. + * + * Similar to printk_safe_flush() but it can be called even in NMI context when + * the system goes down. It does the best effort to get NMI messages into + * the main ring buffer. + * + * Note that it could try harder when there is only one CPU online. + */ +void printk_safe_flush_on_panic(void) +{ + /* + * Make sure that we could access the main ring buffer. + * Do not risk a double release when more CPUs are up. + */ + if (in_nmi() && raw_spin_is_locked(&logbuf_lock)) { + if (num_online_cpus() > 1) + return; + + debug_locks_off(); + raw_spin_lock_init(&logbuf_lock); + } + + printk_safe_flush(); +} + +void __init printk_safe_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct printk_safe_seq_buf *s = &per_cpu(nmi_print_seq, cpu); + + init_irq_work(&s->work, __printk_safe_flush); + } + + /* Make sure that IRQ works are initialized before enabling. */ + smp_wmb(); + printk_safe_irq_ready = 1; + + /* Flush pending messages that did not have scheduled IRQ works. */ + printk_safe_flush(); +} + +void printk_nmi_enter(void) +{ + this_cpu_write(printk_func, vprintk_nmi); +} + +void printk_nmi_exit(void) +{ + this_cpu_write(printk_func, vprintk_default); +} -- cgit From 099f1c84c0052ec1b27f1c3942eed5830d86bdbb Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Dec 2016 23:16:06 +0900 Subject: printk: introduce per-cpu safe_print seq buffer This patch extends the idea of NMI per-cpu buffers to regions that may cause recursive printk() calls and possible deadlocks. Namely, printk() can't handle printk calls from schedule code or printk() calls from lock debugging code (spin_dump() for instance); because those may be called with `sem->lock' already taken or any other `critical' locks (p->pi_lock, etc.). An example of deadlock can be vprintk_emit() console_unlock() up() << raw_spin_lock_irqsave(&sem->lock, flags); wake_up_process() try_to_wake_up() ttwu_queue() ttwu_activate() activate_task() enqueue_task() enqueue_task_fair() cfs_rq_of() task_of() WARN_ON_ONCE(!entity_is_task(se)) vprintk_emit() console_trylock() down_trylock() raw_spin_lock_irqsave(&sem->lock, flags) ^^^^ deadlock and some other cases. Just like in NMI implementation, the solution uses a per-cpu `printk_func' pointer to 'redirect' printk() calls to a 'safe' callback, that store messages in a per-cpu buffer and flushes them back to logbuf buffer later. Usage example: printk() printk_safe_enter_irqsave(flags) // // any printk() call from here will endup in vprintk_safe(), // that stores messages in a special per-CPU buffer. // printk_safe_exit_irqrestore(flags) The 'redirection' mechanism, though, has been reworked, as suggested by Petr Mladek. Instead of using a per-cpu @print_func callback we now keep a per-cpu printk-context variable and call either default or nmi vprintk function depending on its value. printk_nmi_entrer/exit and printk_safe_enter/exit, thus, just set/celar corresponding bits in printk-context functions. The patch only adds printk_safe support, we don't use it yet. Link: http://lkml.kernel.org/r/20161227141611.940-4-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Linus Torvalds Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Reviewed-by: Steven Rostedt (VMware) --- kernel/printk/Makefile | 2 +- kernel/printk/internal.h | 76 ++++++++++++++++++-------- kernel/printk/printk.c | 3 -- kernel/printk/printk_safe.c | 128 +++++++++++++++++++++++++++++++++++--------- 4 files changed, 157 insertions(+), 52 deletions(-) (limited to 'kernel/printk') diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 607928119f26..4a2ffc39eb95 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,3 +1,3 @@ obj-y = printk.o -obj-$(CONFIG_PRINTK_NMI) += printk_safe.o +obj-$(CONFIG_PRINTK) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7fd2838fa417..c65e36509f3b 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -16,26 +16,8 @@ */ #include -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); - -int __printf(1, 0) vprintk_default(const char *fmt, va_list args); - #ifdef CONFIG_PRINTK_NMI -extern raw_spinlock_t logbuf_lock; - -/* - * printk() could not take logbuf_lock in NMI context. Instead, - * it temporary stores the strings into a per-CPU buffer. - * The alternative implementation is chosen transparently - * via per-CPU variable. - */ -DECLARE_PER_CPU(printk_func_t, printk_func); -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return this_cpu_read(printk_func)(fmt, args); -} - extern atomic_t nmi_message_lost; static inline int get_nmi_message_lost(void) { @@ -44,14 +26,62 @@ static inline int get_nmi_message_lost(void) #else /* CONFIG_PRINTK_NMI */ -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return vprintk_default(fmt, args); -} - static inline int get_nmi_message_lost(void) { return 0; } #endif /* CONFIG_PRINTK_NMI */ + +#ifdef CONFIG_PRINTK + +#define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff +#define PRINTK_NMI_CONTEXT_MASK 0x80000000 + +extern raw_spinlock_t logbuf_lock; + +__printf(1, 0) int vprintk_default(const char *fmt, va_list args); +__printf(1, 0) int vprintk_func(const char *fmt, va_list args); +void __printk_safe_enter(void); +void __printk_safe_exit(void); + +#define printk_safe_enter_irqsave(flags) \ + do { \ + local_irq_save(flags); \ + __printk_safe_enter(); \ + } while (0) + +#define printk_safe_exit_irqrestore(flags) \ + do { \ + __printk_safe_exit(); \ + local_irq_restore(flags); \ + } while (0) + +#define printk_safe_enter_irq() \ + do { \ + local_irq_disable(); \ + __printk_safe_enter(); \ + } while (0) + +#define printk_safe_exit_irq() \ + do { \ + __printk_safe_exit(); \ + local_irq_enable(); \ + } while (0) + +#else + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } + +/* + * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem + * semaphore and some of console functions (console_unlock()/etc.), so + * printk-safe must preserve the existing local IRQ guarantees. + */ +#define printk_safe_enter_irqsave(flags) local_irq_save(flags) +#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) + +#define printk_safe_enter_irq() local_irq_disable() +#define printk_safe_exit_irq() local_irq_enable() + +#endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 106843a83b63..f73046f7a6df 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1902,9 +1902,6 @@ static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *buf, size_t size) { return 0; } static bool suppress_message_printing(int level) { return false; } -/* Still needs to be defined for users */ -DEFINE_PER_CPU(printk_func_t, printk_func); - #endif /* CONFIG_PRINTK */ #ifdef CONFIG_EARLY_PRINTK diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index fc80359dcd78..efc89a4e9df5 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -1,5 +1,5 @@ /* - * printk_safe.c - Safe printk in NMI context + * printk_safe.c - Safe printk for printk-deadlock-prone contexts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -32,13 +32,13 @@ * is later flushed into the main ring buffer via IRQ work. * * The alternative implementation is chosen transparently - * via @printk_func per-CPU variable. + * by examinig current printk() context mask stored in @printk_context + * per-CPU variable. * * The implementation allows to flush the strings also from another CPU. * There are situations when we want to make sure that all buffers * were handled or when IRQs are blocked. */ -DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; static int printk_safe_irq_ready; atomic_t nmi_message_lost; @@ -50,18 +50,28 @@ struct printk_safe_seq_buf { struct irq_work work; /* IRQ work that flushes the buffer */ unsigned char buffer[SAFE_LOG_BUF_LEN]; }; + +static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); +static DEFINE_PER_CPU(int, printk_context); + +#ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); +#endif /* - * Safe printk() for NMI context. It uses a per-CPU buffer to - * store the message. NMIs are not nested, so there is always only - * one writer running. But the buffer might get flushed from another - * CPU, so we need to be careful. + * Add a message to per-CPU context-dependent buffer. NMI and printk-safe + * have dedicated buffers, because otherwise printk-safe preempted by + * NMI-printk would have overwritten the NMI messages. + * + * The messages are fushed from irq work (or from panic()), possibly, + * from other CPU, concurrently with printk_safe_log_store(). Should this + * happen, printk_safe_log_store() will notice the buffer->len mismatch + * and repeat the write. */ -static int vprintk_nmi(const char *fmt, va_list args) +static int printk_safe_log_store(struct printk_safe_seq_buf *s, + const char *fmt, va_list args) { - struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - int add = 0; + int add; size_t len; again: @@ -74,8 +84,8 @@ again: } /* - * Make sure that all old data have been read before the buffer was - * reseted. This is not needed when we just append data. + * Make sure that all old data have been read before the buffer + * was reset. This is not needed when we just append data. */ if (!len) smp_rmb(); @@ -161,7 +171,7 @@ static int printk_safe_flush_buffer(const char *start, size_t len) } /* - * Flush data from the associated per_CPU buffer. The function + * Flush data from the associated per-CPU buffer. The function * can be called either via IRQ work or independently. */ static void __printk_safe_flush(struct irq_work *work) @@ -231,8 +241,12 @@ void printk_safe_flush(void) { int cpu; - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { +#ifdef CONFIG_PRINTK_NMI __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); +#endif + __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work); + } } /** @@ -262,14 +276,88 @@ void printk_safe_flush_on_panic(void) printk_safe_flush(); } +#ifdef CONFIG_PRINTK_NMI +/* + * Safe printk() for NMI context. It uses a per-CPU buffer to + * store the message. NMIs are not nested, so there is always only + * one writer running. But the buffer might get flushed from another + * CPU, so we need to be careful. + */ +static int vprintk_nmi(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + + return printk_safe_log_store(s, fmt, args); +} + +void printk_nmi_enter(void) +{ + this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); +} + +void printk_nmi_exit(void) +{ + this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); +} + +#else + +static int vprintk_nmi(const char *fmt, va_list args) +{ + return 0; +} + +#endif /* CONFIG_PRINTK_NMI */ + +/* + * Lock-less printk(), to avoid deadlocks should the printk() recurse + * into itself. It uses a per-CPU buffer to store the message, just like + * NMI. + */ +static int vprintk_safe(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq); + + return printk_safe_log_store(s, fmt, args); +} + +/* Can be preempted by NMI. */ +void __printk_safe_enter(void) +{ + this_cpu_inc(printk_context); +} + +/* Can be preempted by NMI. */ +void __printk_safe_exit(void) +{ + this_cpu_dec(printk_context); +} + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) +{ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + return vprintk_nmi(fmt, args); + + if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) + return vprintk_safe(fmt, args); + + return vprintk_default(fmt, args); +} + void __init printk_safe_init(void) { int cpu; for_each_possible_cpu(cpu) { - struct printk_safe_seq_buf *s = &per_cpu(nmi_print_seq, cpu); + struct printk_safe_seq_buf *s; + + s = &per_cpu(safe_print_seq, cpu); + init_irq_work(&s->work, __printk_safe_flush); +#ifdef CONFIG_PRINTK_NMI + s = &per_cpu(nmi_print_seq, cpu); init_irq_work(&s->work, __printk_safe_flush); +#endif } /* Make sure that IRQ works are initialized before enabling. */ @@ -279,13 +367,3 @@ void __init printk_safe_init(void) /* Flush pending messages that did not have scheduled IRQ works. */ printk_safe_flush(); } - -void printk_nmi_enter(void) -{ - this_cpu_write(printk_func, vprintk_nmi); -} - -void printk_nmi_exit(void) -{ - this_cpu_write(printk_func, vprintk_default); -} -- cgit From 7acac3445acde1c94054cde69ab53503d296c393 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 7 Feb 2017 01:42:53 +0900 Subject: printk: always use deferred printk when flush printk_safe lines Always use printk_deferred() in printk_safe_flush_line(). Flushing can be done from NMI or printk_safe contexts (when we are in panic), so we can't call console drivers, yet still want to store the messages in the logbuf buffer. Therefore we use a deferred printk version. Link: http://lkml.kernel.org/r/20170206164253.GA463@tigerII.localdomain Cc: Andrew Morton Cc: Linus Torvalds Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Suggested-by: Petr Mladek Signed-off-by: Petr Mladek Reviewed-by: Steven Rostedt (VMware) --- kernel/printk/printk_safe.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel/printk') diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index efc89a4e9df5..5214d326d3ba 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -110,17 +110,15 @@ again: return add; } -static void printk_safe_flush_line(const char *text, int len) +static inline void printk_safe_flush_line(const char *text, int len) { /* - * The buffers are flushed in NMI only on panic. The messages must - * go only into the ring buffer at this stage. Consoles will get - * explicitly called later when a crashdump is not generated. + * Avoid any console drivers calls from here, because we may be + * in NMI or printk_safe context (when in panic). The messages + * must go only into the ring buffer at this stage. Consoles will + * get explicitly called later when a crashdump is not generated. */ - if (in_nmi()) - printk_deferred("%.*s", len, text); - else - printk("%.*s", len, text); + printk_deferred("%.*s", len, text); } /* printk part of the temporary buffer line by line */ -- cgit From ddb9baa822265b55afffd9815a2758a4b70006c1 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Dec 2016 23:16:08 +0900 Subject: printk: report lost messages in printk safe/nmi contexts Account lost messages in pritk-safe and printk-safe-nmi contexts and report those numbers during printk_safe_flush(). The patch also moves lost message counter to struct `printk_safe_seq_buf' instead of having dedicated static counters - this simplifies the code. Link: http://lkml.kernel.org/r/20161227141611.940-6-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Linus Torvalds Cc: Steven Rostedt Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/internal.h | 17 ----------------- kernel/printk/printk.c | 10 ---------- kernel/printk/printk_safe.c | 38 ++++++++++++++++++++++++++++---------- 3 files changed, 28 insertions(+), 37 deletions(-) (limited to 'kernel/printk') diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index c65e36509f3b..1db044f808b7 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -16,23 +16,6 @@ */ #include -#ifdef CONFIG_PRINTK_NMI - -extern atomic_t nmi_message_lost; -static inline int get_nmi_message_lost(void) -{ - return atomic_xchg(&nmi_message_lost, 0); -} - -#else /* CONFIG_PRINTK_NMI */ - -static inline int get_nmi_message_lost(void) -{ - return 0; -} - -#endif /* CONFIG_PRINTK_NMI */ - #ifdef CONFIG_PRINTK #define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f73046f7a6df..6bb77eff502d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1677,7 +1677,6 @@ asmlinkage int vprintk_emit(int facility, int level, unsigned long flags; int this_cpu; int printed_len = 0; - int nmi_message_lost; bool in_sched = false; /* cpu currently holding logbuf_lock in this function */ static unsigned int logbuf_cpu = UINT_MAX; @@ -1728,15 +1727,6 @@ asmlinkage int vprintk_emit(int facility, int level, strlen(recursion_msg)); } - nmi_message_lost = get_nmi_message_lost(); - if (unlikely(nmi_message_lost)) { - text_len = scnprintf(textbuf, sizeof(textbuf), - "BAD LUCK: lost %d message(s) from NMI context!", - nmi_message_lost); - printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, - NULL, 0, textbuf, text_len); - } - /* * The printf needs to come first; we need the syslog * prefix which might be passed-in as a parameter. diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 5214d326d3ba..033e50a7d706 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -40,13 +40,15 @@ * were handled or when IRQs are blocked. */ static int printk_safe_irq_ready; -atomic_t nmi_message_lost; #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ - sizeof(atomic_t) - sizeof(struct irq_work)) + sizeof(atomic_t) - \ + sizeof(atomic_t) - \ + sizeof(struct irq_work)) struct printk_safe_seq_buf { atomic_t len; /* length of written data */ + atomic_t message_lost; struct irq_work work; /* IRQ work that flushes the buffer */ unsigned char buffer[SAFE_LOG_BUF_LEN]; }; @@ -58,6 +60,16 @@ static DEFINE_PER_CPU(int, printk_context); static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); #endif +/* Get flushed in a more safe context. */ +static void queue_flush_work(struct printk_safe_seq_buf *s) +{ + if (printk_safe_irq_ready) { + /* Make sure that IRQ work is really initialized. */ + smp_rmb(); + irq_work_queue(&s->work); + } +} + /* * Add a message to per-CPU context-dependent buffer. NMI and printk-safe * have dedicated buffers, because otherwise printk-safe preempted by @@ -79,7 +91,8 @@ again: /* The trailing '\0' is not counted into len. */ if (len >= sizeof(s->buffer) - 1) { - atomic_inc(&nmi_message_lost); + atomic_inc(&s->message_lost); + queue_flush_work(s); return 0; } @@ -91,6 +104,8 @@ again: smp_rmb(); add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); + if (!add) + return 0; /* * Do it once again if the buffer has been flushed in the meantime. @@ -100,13 +115,7 @@ again: if (atomic_cmpxchg(&s->len, len, len + add) != len) goto again; - /* Get flushed in a more safe context. */ - if (add && printk_safe_irq_ready) { - /* Make sure that IRQ work is really initialized. */ - smp_rmb(); - irq_work_queue(&s->work); - } - + queue_flush_work(s); return add; } @@ -168,6 +177,14 @@ static int printk_safe_flush_buffer(const char *start, size_t len) return len; } +static void report_message_lost(struct printk_safe_seq_buf *s) +{ + int lost = atomic_xchg(&s->message_lost, 0); + + if (lost) + printk_deferred("Lost %d message(s)!\n", lost); +} + /* * Flush data from the associated per-CPU buffer. The function * can be called either via IRQ work or independently. @@ -225,6 +242,7 @@ more: goto more; out: + report_message_lost(s); raw_spin_unlock_irqrestore(&read_lock, flags); } -- cgit From f975237b76827956fe13ecfe993a319158e2c303 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Dec 2016 23:16:09 +0900 Subject: printk: use printk_safe buffers in printk Use printk_safe per-CPU buffers in printk recursion-prone blocks: -- around logbuf_lock protected sections in vprintk_emit() and console_unlock() -- around down_trylock_console_sem() and up_console_sem() Note that this solution addresses deadlocks caused by printk() recursive calls only. That is vprintk_emit() and console_unlock(). The rest will be converted in a followup patch. Another thing to note is that we now keep lockdep enabled in printk, because we are protected against the printk recursion caused by lockdep in vprintk_emit() by the printk-safe mechanism - we first switch to per-CPU buffers and only then access the deadlock-prone locks. Examples: 1) printk() from logbuf_lock spin_lock section Assume the following code: printk() raw_spin_lock(&logbuf_lock); WARN_ON(1); raw_spin_unlock(&logbuf_lock); which now produces: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 366 at kernel/printk/printk.c:1811 vprintk_emit CPU: 0 PID: 366 Comm: bash Call Trace: warn_slowpath_null+0x1d/0x1f vprintk_emit+0x1cd/0x438 vprintk_default+0x1d/0x1f printk+0x48/0x50 [..] 2) printk() from semaphore sem->lock spin_lock section Assume the following code printk() console_trylock() down_trylock() raw_spin_lock_irqsave(&sem->lock, flags); WARN_ON(1); raw_spin_unlock_irqrestore(&sem->lock, flags); which now produces: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 363 at kernel/locking/semaphore.c:141 down_trylock CPU: 1 PID: 363 Comm: bash Call Trace: warn_slowpath_null+0x1d/0x1f down_trylock+0x3d/0x62 ? vprintk_emit+0x3f9/0x414 console_trylock+0x31/0xeb vprintk_emit+0x3f9/0x414 vprintk_default+0x1d/0x1f printk+0x48/0x50 [..] 3) printk() from console_unlock() Assume the following code: printk() console_unlock() raw_spin_lock(&logbuf_lock); WARN_ON(1); raw_spin_unlock(&logbuf_lock); which now produces: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 329 at kernel/printk/printk.c:2384 console_unlock CPU: 1 PID: 329 Comm: bash Call Trace: warn_slowpath_null+0x18/0x1a console_unlock+0x12d/0x559 ? trace_hardirqs_on_caller+0x16d/0x189 ? trace_hardirqs_on+0xd/0xf vprintk_emit+0x363/0x374 vprintk_default+0x18/0x1a printk+0x43/0x4b [..] 4) printk() from try_to_wake_up() Assume the following code: printk() console_unlock() up() try_to_wake_up() raw_spin_lock_irqsave(&p->pi_lock, flags); WARN_ON(1); raw_spin_unlock_irqrestore(&p->pi_lock, flags); which now produces: ------------[ cut here ]------------ WARNING: CPU: 3 PID: 363 at kernel/sched/core.c:2028 try_to_wake_up CPU: 3 PID: 363 Comm: bash Call Trace: warn_slowpath_null+0x1d/0x1f try_to_wake_up+0x7f/0x4f7 wake_up_process+0x15/0x17 __up.isra.0+0x56/0x63 up+0x32/0x42 __up_console_sem+0x37/0x55 console_unlock+0x21e/0x4c2 vprintk_emit+0x41c/0x462 vprintk_default+0x1d/0x1f printk+0x48/0x50 [..] 5) printk() from call_console_drivers() Assume the following code: printk() console_unlock() call_console_drivers() ... WARN_ON(1); which now produces: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 305 at kernel/printk/printk.c:1604 call_console_drivers CPU: 2 PID: 305 Comm: bash Call Trace: warn_slowpath_null+0x18/0x1a call_console_drivers.isra.6.constprop.16+0x3a/0xb0 console_unlock+0x471/0x48e vprintk_emit+0x1f4/0x206 vprintk_default+0x18/0x1a vprintk_func+0x6e/0x70 printk+0x3e/0x46 [..] 6) unsupported placeholder in printk() format now prints an actual warning from vscnprintf(), instead of 'BUG: recent printk recursion!'. ------------[ cut here ]------------ WARNING: CPU: 5 PID: 337 at lib/vsprintf.c:1900 format_decode Please remove unsupported % in format string CPU: 5 PID: 337 Comm: bash Call Trace: dump_stack+0x4f/0x65 __warn+0xc2/0xdd warn_slowpath_fmt+0x4b/0x53 format_decode+0x22c/0x308 vsnprintf+0x89/0x3b7 vscnprintf+0xd/0x26 vprintk_emit+0xb4/0x238 vprintk_default+0x1d/0x1f vprintk_func+0x6c/0x73 printk+0x43/0x4b [..] Link: http://lkml.kernel.org/r/20161227141611.940-7-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Linus Torvalds Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Reviewed-by: Steven Rostedt (VMware) --- kernel/printk/printk.c | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 6bb77eff502d..f426a2b12a9b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -213,17 +213,36 @@ static int nr_ext_console_drivers; static int __down_trylock_console_sem(unsigned long ip) { - if (down_trylock(&console_sem)) + int lock_failed; + unsigned long flags; + + /* + * Here and in __up_console_sem() we need to be in safe mode, + * because spindump/WARN/etc from under console ->lock will + * deadlock in printk()->down_trylock_console_sem() otherwise. + */ + printk_safe_enter_irqsave(flags); + lock_failed = down_trylock(&console_sem); + printk_safe_exit_irqrestore(flags); + + if (lock_failed) return 1; mutex_acquire(&console_lock_dep_map, 0, 1, ip); return 0; } #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) -#define up_console_sem() do { \ - mutex_release(&console_lock_dep_map, 1, _RET_IP_);\ - up(&console_sem);\ -} while (0) +static void __up_console_sem(unsigned long ip) +{ + unsigned long flags; + + mutex_release(&console_lock_dep_map, 1, ip); + + printk_safe_enter_irqsave(flags); + up(&console_sem); + printk_safe_exit_irqrestore(flags); +} +#define up_console_sem() __up_console_sem(_RET_IP_) /* * This is used for debugging the mess that is the VT code by @@ -1689,7 +1708,7 @@ asmlinkage int vprintk_emit(int facility, int level, boot_delay_msec(level); printk_delay(); - local_irq_save(flags); + printk_safe_enter_irqsave(flags); this_cpu = smp_processor_id(); /* @@ -1705,13 +1724,12 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (!oops_in_progress && !lockdep_recursing(current)) { recursion_bug = true; - local_irq_restore(flags); + printk_safe_exit_irqrestore(flags); return 0; } zap_locks(); } - lockdep_off(); /* This stops the holder of console_sem just where we want him */ raw_spin_lock(&logbuf_lock); logbuf_cpu = this_cpu; @@ -1771,12 +1789,10 @@ asmlinkage int vprintk_emit(int facility, int level, logbuf_cpu = UINT_MAX; raw_spin_unlock(&logbuf_lock); - lockdep_on(); - local_irq_restore(flags); + printk_safe_exit_irqrestore(flags); /* If called from the scheduler, we can not call up(). */ if (!in_sched) { - lockdep_off(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up @@ -1784,7 +1800,6 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (console_trylock()) console_unlock(); - lockdep_on(); } return printed_len; @@ -2209,7 +2224,8 @@ again: size_t len; int level; - raw_spin_lock_irqsave(&logbuf_lock, flags); + printk_safe_enter_irqsave(flags); + raw_spin_lock(&logbuf_lock); if (seen_seq != log_next_seq) { wake_klogd = true; seen_seq = log_next_seq; @@ -2259,7 +2275,7 @@ skip: stop_critical_timings(); /* don't trace print latency */ call_console_drivers(level, ext_text, ext_len, text, len); start_critical_timings(); - local_irq_restore(flags); + printk_safe_exit_irqrestore(flags); if (do_cond_resched) cond_resched(); @@ -2282,7 +2298,8 @@ skip: */ raw_spin_lock(&logbuf_lock); retry = console_seq != log_next_seq; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + raw_spin_unlock(&logbuf_lock); + printk_safe_exit_irqrestore(flags); if (retry && console_trylock()) goto again; -- cgit From 8b1742c9c2071520efacf44e974dc03421360471 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Dec 2016 23:16:10 +0900 Subject: printk: remove zap_locks() function We use printk-safe now which makes printk-recursion detection code in vprintk_emit() unreachable. The tricky thing here is that, apart from detecting and reporting printk recursions, that code also used to zap_locks() in case of panic() from the same CPU. However, zap_locks() does not look to be needed anymore: 1) Since commit 08d78658f393 ("panic: release stale console lock to always get the logbuf printed out") panic flushing of `logbuf' to console ignores the state of `console_sem' by doing panic() console_trylock(); console_unlock(); 2) Since commit cf9b1106c81c ("printk/nmi: flush NMI messages on the system panic") panic attempts to zap the `logbuf_lock' spin_lock to successfully flush nmi messages to `logbuf'. Basically, it seems that we either already do what zap_locks() used to do but in other places or we ignore the state of the lock. The only reaming difference is that we don't re-init the console semaphore in printk_safe_flush_on_panic(), but this is not necessary because we don't call console drivers from printk_safe_flush_on_panic() due to the fact that we are using a deferred printk() version (as was suggested by Petr Mladek). Link: http://lkml.kernel.org/r/20161227141611.940-8-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Linus Torvalds Cc: Steven Rostedt Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 61 -------------------------------------------------- 1 file changed, 61 deletions(-) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f426a2b12a9b..e17d384f8c6b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1557,28 +1557,6 @@ static void call_console_drivers(int level, } } -/* - * Zap console related locks when oopsing. - * To leave time for slow consoles to print a full oops, - * only zap at most once every 30 seconds. - */ -static void zap_locks(void) -{ - static unsigned long oops_timestamp; - - if (time_after_eq(jiffies, oops_timestamp) && - !time_after(jiffies, oops_timestamp + 30 * HZ)) - return; - - oops_timestamp = jiffies; - - debug_locks_off(); - /* If a crash is occurring, make sure we can't deadlock */ - raw_spin_lock_init(&logbuf_lock); - /* And make sure that we print immediately */ - sema_init(&console_sem, 1); -} - int printk_delay_msec __read_mostly; static inline void printk_delay(void) @@ -1688,17 +1666,13 @@ asmlinkage int vprintk_emit(int facility, int level, const char *dict, size_t dictlen, const char *fmt, va_list args) { - static bool recursion_bug; static char textbuf[LOG_LINE_MAX]; char *text = textbuf; size_t text_len = 0; enum log_flags lflags = 0; unsigned long flags; - int this_cpu; int printed_len = 0; bool in_sched = false; - /* cpu currently holding logbuf_lock in this function */ - static unsigned int logbuf_cpu = UINT_MAX; if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; @@ -1709,42 +1683,8 @@ asmlinkage int vprintk_emit(int facility, int level, printk_delay(); printk_safe_enter_irqsave(flags); - this_cpu = smp_processor_id(); - - /* - * Ouch, printk recursed into itself! - */ - if (unlikely(logbuf_cpu == this_cpu)) { - /* - * If a crash is occurring during printk() on this CPU, - * then try to get the crash message out but make sure - * we can't deadlock. Otherwise just return to avoid the - * recursion and return - but flag the recursion so that - * it can be printed at the next appropriate moment: - */ - if (!oops_in_progress && !lockdep_recursing(current)) { - recursion_bug = true; - printk_safe_exit_irqrestore(flags); - return 0; - } - zap_locks(); - } - /* This stops the holder of console_sem just where we want him */ raw_spin_lock(&logbuf_lock); - logbuf_cpu = this_cpu; - - if (unlikely(recursion_bug)) { - static const char recursion_msg[] = - "BUG: recent printk recursion!"; - - recursion_bug = false; - /* emit KERN_CRIT message */ - printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, - NULL, 0, recursion_msg, - strlen(recursion_msg)); - } - /* * The printf needs to come first; we need the syslog * prefix which might be passed-in as a parameter. @@ -1787,7 +1727,6 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len); - logbuf_cpu = UINT_MAX; raw_spin_unlock(&logbuf_lock); printk_safe_exit_irqrestore(flags); -- cgit From de6fcbdb68b29b475f4e6368cd465fd778ea421f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Dec 2016 23:16:11 +0900 Subject: printk: convert the rest to printk-safe This patch converts the rest of logbuf users (which are out of printk recursion case, but can deadlock in printk). To make printk-safe usage easier the patch introduces 4 helper macros: - logbuf_lock_irq()/logbuf_unlock_irq() lock/unlock the logbuf lock and disable/enable local IRQ - logbuf_lock_irqsave(flags)/logbuf_unlock_irqrestore(flags) lock/unlock the logbuf lock and saves/restores local IRQ state Link: http://lkml.kernel.org/r/20161227141611.940-9-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Linus Torvalds Cc: Steven Rostedt Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 103 +++++++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 38 deletions(-) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e17d384f8c6b..080843148e8d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -370,6 +370,34 @@ __packed __aligned(4) */ DEFINE_RAW_SPINLOCK(logbuf_lock); +/* + * Helper macros to lock/unlock logbuf_lock and switch between + * printk-safe/unsafe modes. + */ +#define logbuf_lock_irq() \ + do { \ + printk_safe_enter_irq(); \ + raw_spin_lock(&logbuf_lock); \ + } while (0) + +#define logbuf_unlock_irq() \ + do { \ + raw_spin_unlock(&logbuf_lock); \ + printk_safe_exit_irq(); \ + } while (0) + +#define logbuf_lock_irqsave(flags) \ + do { \ + printk_safe_enter_irqsave(flags); \ + raw_spin_lock(&logbuf_lock); \ + } while (0) + +#define logbuf_unlock_irqrestore(flags) \ + do { \ + raw_spin_unlock(&logbuf_lock); \ + printk_safe_exit_irqrestore(flags); \ + } while (0) + #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); /* the next printk record to read by syslog(READ) or /proc/kmsg */ @@ -801,20 +829,21 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, ret = mutex_lock_interruptible(&user->lock); if (ret) return ret; - raw_spin_lock_irq(&logbuf_lock); + + logbuf_lock_irq(); while (user->seq == log_next_seq) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); goto out; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); ret = wait_event_interruptible(log_wait, user->seq != log_next_seq); if (ret) goto out; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); } if (user->seq < log_first_seq) { @@ -822,7 +851,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, user->idx = log_first_idx; user->seq = log_first_seq; ret = -EPIPE; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); goto out; } @@ -835,7 +864,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, user->idx = log_next(user->idx); user->seq++; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); if (len > count) { ret = -EINVAL; @@ -862,7 +891,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) if (offset) return -ESPIPE; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); switch (whence) { case SEEK_SET: /* the first record */ @@ -886,7 +915,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) default: ret = -EINVAL; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); return ret; } @@ -900,7 +929,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (user->seq < log_next_seq) { /* return error when data has vanished underneath us */ if (user->seq < log_first_seq) @@ -908,7 +937,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) else ret = POLLIN|POLLRDNORM; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); return ret; } @@ -938,10 +967,10 @@ static int devkmsg_open(struct inode *inode, struct file *file) mutex_init(&user->lock); - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); user->idx = log_first_idx; user->seq = log_first_seq; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); file->private_data = user; return 0; @@ -1083,13 +1112,13 @@ void __init setup_log_buf(int early) return; } - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); log_buf_len = new_log_buf_len; log_buf = new_log_buf; new_log_buf_len = 0; free = __LOG_BUF_LEN - log_next_idx; memcpy(log_buf, __log_buf, __LOG_BUF_LEN); - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); pr_info("log_buf_len: %d bytes\n", log_buf_len); pr_info("early log buf free: %d(%d%%)\n", @@ -1267,7 +1296,7 @@ static int syslog_print(char __user *buf, int size) size_t n; size_t skip; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -1275,7 +1304,7 @@ static int syslog_print(char __user *buf, int size) syslog_partial = 0; } if (syslog_seq == log_next_seq) { - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); break; } @@ -1294,7 +1323,7 @@ static int syslog_print(char __user *buf, int size) syslog_partial += n; } else n = 0; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); if (!n) break; @@ -1323,7 +1352,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) if (!text) return -ENOMEM; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (buf) { u64 next_seq; u64 seq; @@ -1371,12 +1400,12 @@ static int syslog_print_all(char __user *buf, int size, bool clear) idx = log_next(idx); seq++; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); if (copy_to_user(buf + len, text, textlen)) len = -EFAULT; else len += textlen; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (seq < log_first_seq) { /* messages are gone, move to next one */ @@ -1390,7 +1419,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) clear_seq = log_next_seq; clear_idx = log_next_idx; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); kfree(text); return len; @@ -1477,7 +1506,7 @@ int do_syslog(int type, char __user *buf, int len, int source) break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -1505,7 +1534,7 @@ int do_syslog(int type, char __user *buf, int len, int source) } error -= syslog_partial; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: @@ -1682,9 +1711,8 @@ asmlinkage int vprintk_emit(int facility, int level, boot_delay_msec(level); printk_delay(); - printk_safe_enter_irqsave(flags); /* This stops the holder of console_sem just where we want him */ - raw_spin_lock(&logbuf_lock); + logbuf_lock_irqsave(flags); /* * The printf needs to come first; we need the syslog * prefix which might be passed-in as a parameter. @@ -1727,8 +1755,7 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len); - raw_spin_unlock(&logbuf_lock); - printk_safe_exit_irqrestore(flags); + logbuf_unlock_irqrestore(flags); /* If called from the scheduler, we can not call up(). */ if (!in_sched) { @@ -2501,10 +2528,10 @@ void register_console(struct console *newcon) * console_unlock(); will print out the buffered messages * for us. */ - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); console_seq = syslog_seq; console_idx = syslog_idx; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); /* * We're about to replay the log buffer. Only do this to the * just-registered console to avoid excessive message spam to @@ -2803,12 +2830,12 @@ void kmsg_dump(enum kmsg_dump_reason reason) /* initialize iterator with data about the stored records */ dumper->active = true; - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); dumper->cur_seq = clear_seq; dumper->cur_idx = clear_idx; dumper->next_seq = log_next_seq; dumper->next_idx = log_next_idx; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); /* invoke dumper which will iterate over records */ dumper->dump(dumper, reason); @@ -2893,9 +2920,9 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, unsigned long flags; bool ret; - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); return ret; } @@ -2934,7 +2961,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, if (!dumper->active) goto out; - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); if (dumper->cur_seq < log_first_seq) { /* messages are gone, move to first available one */ dumper->cur_seq = log_first_seq; @@ -2943,7 +2970,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, /* last entry */ if (dumper->cur_seq >= dumper->next_seq) { - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); goto out; } @@ -2985,7 +3012,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, dumper->next_seq = next_seq; dumper->next_idx = next_idx; ret = true; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); out: if (len) *len = l; @@ -3023,9 +3050,9 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) { unsigned long flags; - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); kmsg_dump_rewind_nolock(dumper); - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); -- cgit From d9c23523ed98a3acaa0bfd8fef143595d6aa631d Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 24 Dec 2016 23:09:01 +0900 Subject: printk: drop call_console_drivers() unused param We do suppress_message_printing() check before we call call_console_drivers() now, so `level' param is not needed anymore. Link: http://lkml.kernel.org/r/20161224140902.1962-2-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Steven Rostedt Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 080843148e8d..7180088cbb23 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1558,8 +1558,7 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) * log_buf[start] to log_buf[end - 1]. * The console_lock must be held. */ -static void call_console_drivers(int level, - const char *ext_text, size_t ext_len, +static void call_console_drivers(const char *ext_text, size_t ext_len, const char *text, size_t len) { struct console *con; @@ -1866,8 +1865,7 @@ static ssize_t msg_print_ext_header(char *buf, size_t size, static ssize_t msg_print_ext_body(char *buf, size_t size, char *dict, size_t dict_len, char *text, size_t text_len) { return 0; } -static void call_console_drivers(int level, - const char *ext_text, size_t ext_len, +static void call_console_drivers(const char *ext_text, size_t ext_len, const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *buf, size_t size) { return 0; } @@ -2188,7 +2186,6 @@ again: struct printk_log *msg; size_t ext_len = 0; size_t len; - int level; printk_safe_enter_irqsave(flags); raw_spin_lock(&logbuf_lock); @@ -2212,8 +2209,7 @@ skip: break; msg = log_from_idx(console_idx); - level = msg->level; - if (suppress_message_printing(level)) { + if (suppress_message_printing(msg->level)) { /* * Skip record we have buffered and already printed * directly to the console when we received it, and @@ -2239,7 +2235,7 @@ skip: raw_spin_unlock(&logbuf_lock); stop_critical_timings(); /* don't trace print latency */ - call_console_drivers(level, ext_text, ext_len, text, len); + call_console_drivers(ext_text, ext_len, text, len); start_critical_timings(); printk_safe_exit_irqrestore(flags); -- cgit