diff options
Diffstat (limited to 'kernel/printk')
| -rw-r--r-- | kernel/printk/.kunitconfig | 3 | ||||
| -rw-r--r-- | kernel/printk/Makefile | 2 | ||||
| -rw-r--r-- | kernel/printk/console_cmdline.h | 1 | ||||
| -rw-r--r-- | kernel/printk/internal.h | 180 | ||||
| -rw-r--r-- | kernel/printk/nbcon.c | 1131 | ||||
| -rw-r--r-- | kernel/printk/printk.c | 1191 | ||||
| -rw-r--r-- | kernel/printk/printk_ringbuffer.c | 126 | ||||
| -rw-r--r-- | kernel/printk/printk_ringbuffer.h | 7 | ||||
| -rw-r--r-- | kernel/printk/printk_ringbuffer_kunit_test.c | 327 | ||||
| -rw-r--r-- | kernel/printk/printk_safe.c | 55 | ||||
| -rw-r--r-- | kernel/printk/sysctl.c | 4 |
11 files changed, 2665 insertions, 362 deletions
diff --git a/kernel/printk/.kunitconfig b/kernel/printk/.kunitconfig new file mode 100644 index 000000000000..f31458fd1a92 --- /dev/null +++ b/kernel/printk/.kunitconfig @@ -0,0 +1,3 @@ +CONFIG_KUNIT=y +CONFIG_PRINTK=y +CONFIG_PRINTK_RINGBUFFER_KUNIT_TEST=y diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 39a2b61c7232..f8004ac3983d 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -7,3 +7,5 @@ obj-$(CONFIG_PRINTK_INDEX) += index.o obj-$(CONFIG_PRINTK) += printk_support.o printk_support-y := printk_ringbuffer.o printk_support-$(CONFIG_SYSCTL) += sysctl.o + +obj-$(CONFIG_PRINTK_RINGBUFFER_KUNIT_TEST) += printk_ringbuffer_kunit_test.o diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h index 3ca74ad391d6..0ab573b6d4dc 100644 --- a/kernel/printk/console_cmdline.h +++ b/kernel/printk/console_cmdline.h @@ -6,6 +6,7 @@ struct console_cmdline { char name[16]; /* Name of the driver */ int index; /* Minor dev. to use */ + char devname[32]; /* DEVNAME:0.0 style device name */ bool user_specified; /* Specified by command line vs. platform */ char *options; /* Options for the driver */ #ifdef CONFIG_A11Y_BRAILLE_CONSOLE diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 6c2afee5ef62..5f5f626f4279 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -2,13 +2,13 @@ /* * internal.h - printk internal definitions */ -#include <linux/percpu.h> #include <linux/console.h> -#include "printk_ringbuffer.h" +#include <linux/types.h> #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) +struct ctl_table; void __init printk_sysctl_init(void); -int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, +int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #else #define printk_sysctl_init() do { } while (0) @@ -20,6 +20,19 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, (con->flags & CON_BOOT) ? "boot" : "", \ con->name, con->index, ##__VA_ARGS__) +/* + * Identify if legacy printing is forced in a dedicated kthread. If + * true, all printing via console lock occurs within a dedicated + * legacy printer thread. The only exception is on panic, after the + * nbcon consoles have had their chance to print the panic messages + * first. + */ +#ifdef CONFIG_PREEMPT_RT +# define force_legacy_kthread() (true) +#else +# define force_legacy_kthread() (false) +#endif + #ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK_CALLER @@ -39,11 +52,19 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, /* Flags for a single printk record. */ enum printk_info_flags { + /* always show on console, ignore console_loglevel */ + LOG_FORCE_CON = 1, LOG_NEWLINE = 2, /* text ended with a newline */ LOG_CONT = 8, /* text is a fragment of a continuation line */ }; +struct printk_ringbuffer; +struct dev_printk_info; + extern struct printk_ringbuffer *prb; +extern bool printk_kthreads_running; +extern bool printk_kthreads_ready; +extern bool debug_non_panic_cpus; __printf(4, 0) int vprintk_store(int facility, int level, @@ -51,7 +72,9 @@ int vprintk_store(int facility, int level, const char *fmt, va_list args); __printf(1, 0) int vprintk_default(const char *fmt, va_list args); -__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); + +void __printk_safe_enter(void); +void __printk_safe_exit(void); bool printk_percpu_data_ready(void); @@ -68,15 +91,45 @@ bool printk_percpu_data_ready(void); } while (0) void defer_console_output(void); +bool is_printk_legacy_deferred(void); +bool is_printk_force_console(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); +void console_lock_spinning_enable(void); +int console_lock_spinning_disable_and_check(int cookie); u64 nbcon_seq_read(struct console *con); void nbcon_seq_force(struct console *con, u64 seq); bool nbcon_alloc(struct console *con); -void nbcon_init(struct console *con); void nbcon_free(struct console *con); +enum nbcon_prio nbcon_get_default_prio(void); +void nbcon_atomic_flush_pending(void); +bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, + int cookie, bool use_atomic); +bool nbcon_kthread_create(struct console *con); +void nbcon_kthread_stop(struct console *con); +void nbcon_kthreads_wake(void); + +/** + * nbcon_kthread_wake - Wake up a console printing thread + * @con: Console to operate on + */ +static inline void nbcon_kthread_wake(struct console *con) +{ + /* + * Guarantee any new records can be seen by tasks preparing to wait + * before this context checks if the rcuwait is empty. + * + * The full memory barrier in rcuwait_wake_up() pairs with the full + * memory barrier within set_current_state() of + * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait() + * adds the waiter but before it has checked the wait condition. + * + * This pairs with nbcon_kthread_func:A. + */ + rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */ +} #else @@ -84,6 +137,9 @@ void nbcon_free(struct console *con); #define PRINTK_MESSAGE_MAX 0 #define PRINTKRB_RECORD_MAX 0 +#define printk_kthreads_running (false) +#define printk_kthreads_ready (false) + /* * In !PRINTK builds we still export console_sem * semaphore and some of console functions (console_unlock()/etc.), so @@ -93,14 +149,118 @@ void nbcon_free(struct console *con); #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline void defer_console_output(void) { } +static inline bool is_printk_legacy_deferred(void) { return false; } static inline u64 nbcon_seq_read(struct console *con) { return 0; } static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } -static inline void nbcon_init(struct console *con) { } static inline void nbcon_free(struct console *con) { } +static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; } +static inline void nbcon_atomic_flush_pending(void) { } +static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, + int cookie, bool use_atomic) { return false; } +static inline void nbcon_kthread_wake(struct console *con) { } +static inline void nbcon_kthreads_wake(void) { } #endif /* CONFIG_PRINTK */ +extern bool have_boot_console; +extern bool have_nbcon_console; +extern bool have_legacy_console; +extern bool legacy_allow_panic_sync; + +/** + * struct console_flush_type - Define available console flush methods + * @nbcon_atomic: Flush directly using nbcon_atomic() callback + * @nbcon_offload: Offload flush to printer thread + * @legacy_direct: Call the legacy loop in this context + * @legacy_offload: Offload the legacy loop into IRQ or legacy thread + * + * Note that the legacy loop also flushes the nbcon consoles. + */ +struct console_flush_type { + bool nbcon_atomic; + bool nbcon_offload; + bool legacy_direct; + bool legacy_offload; +}; + +extern bool console_irqwork_blocked; + +/* + * Identify which console flushing methods should be used in the context of + * the caller. + */ +static inline void printk_get_console_flush_type(struct console_flush_type *ft) +{ + memset(ft, 0, sizeof(*ft)); + + switch (nbcon_get_default_prio()) { + case NBCON_PRIO_NORMAL: + if (have_nbcon_console && !have_boot_console) { + if (printk_kthreads_running && !console_irqwork_blocked) + ft->nbcon_offload = true; + else + ft->nbcon_atomic = true; + } + + /* Legacy consoles are flushed directly when possible. */ + if (have_legacy_console || have_boot_console) { + if (!is_printk_legacy_deferred()) + ft->legacy_direct = true; + else if (!console_irqwork_blocked) + ft->legacy_offload = true; + } + break; + + case NBCON_PRIO_EMERGENCY: + if (have_nbcon_console && !have_boot_console) + ft->nbcon_atomic = true; + + /* Legacy consoles are flushed directly when possible. */ + if (have_legacy_console || have_boot_console) { + if (!is_printk_legacy_deferred()) + ft->legacy_direct = true; + else if (!console_irqwork_blocked) + ft->legacy_offload = true; + } + break; + + case NBCON_PRIO_PANIC: + /* + * In panic, the nbcon consoles will directly print. But + * only allowed if there are no boot consoles. + */ + if (have_nbcon_console && !have_boot_console) + ft->nbcon_atomic = true; + + if (have_legacy_console || have_boot_console) { + /* + * This is the same decision as NBCON_PRIO_NORMAL + * except that offloading never occurs in panic. + * + * Note that console_flush_on_panic() will flush + * legacy consoles anyway, even if unsafe. + */ + if (!is_printk_legacy_deferred()) + ft->legacy_direct = true; + + /* + * In panic, if nbcon atomic printing occurs, + * the legacy consoles must remain silent until + * explicitly allowed. + */ + if (ft->nbcon_atomic && !legacy_allow_panic_sync) + ft->legacy_direct = false; + } + break; + + default: + WARN_ON_ONCE(1); + break; + } +} + extern struct printk_buffers printk_shared_pbufs; /** @@ -129,10 +289,16 @@ struct printk_message { unsigned long dropped; }; -bool other_cpu_in_panic(void); bool printk_get_next_message(struct printk_message *pmsg, u64 seq, bool is_extended, bool may_supress); #ifdef CONFIG_PRINTK void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped); +void console_prepend_replay(struct printk_message *pmsg); +#endif + +#ifdef CONFIG_SMP +bool is_printk_cpu_sync_owner(void); +#else +static inline bool is_printk_cpu_sync_owner(void) { return false; } #endif diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index c8093bcc01fe..3fa403f9831f 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -2,11 +2,27 @@ // Copyright (C) 2022 Linutronix GmbH, John Ogness // Copyright (C) 2022 Intel, Thomas Gleixner -#include <linux/kernel.h> +#include <linux/atomic.h> +#include <linux/bug.h> #include <linux/console.h> #include <linux/delay.h> +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/irqflags.h> +#include <linux/kdb.h> +#include <linux/kthread.h> +#include <linux/minmax.h> +#include <linux/panic.h> +#include <linux/percpu.h> +#include <linux/preempt.h> #include <linux/slab.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/types.h> #include "internal.h" +#include "printk_ringbuffer.h" /* * Printk console printing implementation for consoles which does not depend * on the legacy style console_lock mechanism. @@ -103,6 +119,9 @@ * from scratch. */ +/* Counter of active nbcon emergency contexts. */ +static atomic_t nbcon_cpu_emergency_cnt = ATOMIC_INIT(0); + /** * nbcon_state_set - Helper function to set the console state * @con: Console to update @@ -172,9 +191,6 @@ void nbcon_seq_force(struct console *con, u64 seq) u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); - - /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ - con->seq = 0; } /** @@ -203,8 +219,9 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) /** * nbcon_context_try_acquire_direct - Try to acquire directly - * @ctxt: The context of the caller - * @cur: The current console state + * @ctxt: The context of the caller + * @cur: The current console state + * @is_reacquire: This acquire is a reacquire * * Acquire the console when it is released. Also acquire the console when * the current owner has a lower priority and the console is in a safe state. @@ -214,25 +231,41 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) * * Errors: * - * -EPERM: A panic is in progress and this is not the panic CPU. - * Or the current owner or waiter has the same or higher - * priority. No acquire method can be successful in - * this case. + * -EPERM: A panic is in progress and this is neither the panic + * CPU nor is this a reacquire. Or the current owner or + * waiter has the same or higher priority. No acquire + * method can be successful in these cases. * * -EBUSY: The current owner has a lower priority but the console * in an unsafe state. The caller should try using * the handover acquire method. */ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, - struct nbcon_state *cur) + struct nbcon_state *cur, bool is_reacquire) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; do { - if (other_cpu_in_panic()) + /* + * Panic does not imply that the console is owned. However, + * since all non-panic CPUs are stopped during panic(), it + * is safer to have them avoid gaining console ownership. + * + * One exception is when kdb has locked for printing on this CPU. + * + * Second exception is a reacquire (and an unsafe takeover + * has not previously occurred) then it is allowed to attempt + * a direct acquire in panic. This gives console drivers an + * opportunity to perform any necessary cleanup if they were + * interrupted by the panic CPU while printing. + */ + if (panic_on_other_cpu() && + !kdb_printf_on_this_cpu() && + (!is_reacquire || cur->unsafe_takeover)) { return -EPERM; + } if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) return -EPERM; @@ -262,18 +295,30 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) /* * The request context is well defined by the @req_prio because: * - * - Only a context with a higher priority can take over the request. + * - Only a context with a priority higher than the owner can become + * a waiter. + * - Only a context with a priority higher than the waiter can + * directly take over the request. * - There are only three priorities. * - Only one CPU is allowed to request PANIC priority. * - Lower priorities are ignored during panic() until reboot. * * As a result, the following scenario is *not* possible: * - * 1. Another context with a higher priority directly takes ownership. - * 2. The higher priority context releases the ownership. - * 3. A lower priority context takes the ownership. - * 4. Another context with the same priority as this context + * 1. This context is currently a waiter. + * 2. Another context with a higher priority than this context + * directly takes ownership. + * 3. The higher priority context releases the ownership. + * 4. Another lower priority context takes the ownership. + * 5. Another context with the same priority as this context * creates a request and starts waiting. + * + * Event #1 implies this context is EMERGENCY. + * Event #2 implies the new context is PANIC. + * Event #3 occurs when panic() has flushed the console. + * Event #4 occurs when a non-panic CPU reacquires. + * Event #5 is not possible due to the panic_on_other_cpu() check + * in nbcon_context_try_acquire_handover(). */ return (cur->req_prio == expected_prio); @@ -311,7 +356,7 @@ static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt, struct nbcon_state new; /* Note that the caller must still remove the request! */ - if (other_cpu_in_panic()) + if (panic_on_other_cpu()) return -EPERM; /* @@ -402,6 +447,16 @@ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); WARN_ON_ONCE(!cur->unsafe); + /* + * Panic does not imply that the console is owned. However, it + * is critical that non-panic CPUs during panic are unable to + * wait for a handover in order to satisfy the assumptions of + * nbcon_waiter_matches(). In particular, the assumption that + * lower priorities are ignored during panic. + */ + if (panic_on_other_cpu()) + return -EPERM; + /* Handover is not possible on the same CPU. */ if (cur->cpu == cpu) return -EBUSY; @@ -529,8 +584,10 @@ static struct printk_buffers panic_nbcon_pbufs; /** * nbcon_context_try_acquire - Try to acquire nbcon console - * @ctxt: The context of the caller + * @ctxt: The context of the caller + * @is_reacquire: This acquire is a reacquire * + * Context: Under @ctxt->con->device_lock() or local_irq_save(). * Return: True if the console was acquired. False otherwise. * * If the caller allowed an unsafe hostile takeover, on success the @@ -538,17 +595,15 @@ static struct printk_buffers panic_nbcon_pbufs; * in an unsafe state. Otherwise, on success the caller may assume * the console is not in an unsafe state. */ -__maybe_unused -static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) +static bool nbcon_context_try_acquire(struct nbcon_context *ctxt, bool is_reacquire) { - unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state cur; int err; nbcon_state_read(con, &cur); try_again: - err = nbcon_context_try_acquire_direct(ctxt, &cur); + err = nbcon_context_try_acquire_direct(ctxt, &cur, is_reacquire); if (err != -EBUSY) goto out; @@ -566,7 +621,7 @@ out: /* Acquire succeeded. */ /* Assign the appropriate buffer for this context. */ - if (atomic_read(&panic_cpu) == cpu) + if (panic_on_this_cpu()) ctxt->pbufs = &panic_nbcon_pbufs; else ctxt->pbufs = con->pbufs; @@ -581,11 +636,29 @@ static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, int expected_prio) { /* - * Since consoles can only be acquired by higher priorities, - * owning contexts are uniquely identified by @prio. However, - * since contexts can unexpectedly lose ownership, it is - * possible that later another owner appears with the same - * priority. For this reason @cpu is also needed. + * A similar function, nbcon_waiter_matches(), only deals with + * EMERGENCY and PANIC priorities. However, this function must also + * deal with the NORMAL priority, which requires additional checks + * and constraints. + * + * For the case where preemption and interrupts are disabled, it is + * enough to also verify that the owning CPU has not changed. + * + * For the case where preemption or interrupts are enabled, an + * external synchronization method *must* be used. In particular, + * the driver-specific locking mechanism used in device_lock() + * (including disabling migration) should be used. It prevents + * scenarios such as: + * + * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X] and + * is scheduled out. + * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY + * and releases it. + * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X] + * and is scheduled out. + * 4. [Task A] gets running on [CPU X] and sees that the console is + * still owned by a task on [CPU X] with NBON_PRIO_NORMAL. Thus + * [Task A] thinks it is the owner when it is not. */ if (cur->prio != expected_prio) @@ -784,6 +857,19 @@ out: return nbcon_context_can_proceed(ctxt, &cur); } +void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt, + char *buf, unsigned int len) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + struct nbcon_state cur; + + wctxt->outbuf = buf; + wctxt->len = len; + nbcon_state_read(con, &cur); + wctxt->unsafe_takeover = cur.unsafe_takeover; +} + /** * nbcon_enter_unsafe - Enter an unsafe region in the driver * @wctxt: The write context that was handed to the write function @@ -799,8 +885,12 @@ out: bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + bool is_owner; - return nbcon_context_enter_unsafe(ctxt); + is_owner = nbcon_context_enter_unsafe(ctxt); + if (!is_owner) + nbcon_write_context_set_buf(wctxt, NULL, 0); + return is_owner; } EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); @@ -819,14 +909,47 @@ EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + bool ret; - return nbcon_context_exit_unsafe(ctxt); + ret = nbcon_context_exit_unsafe(ctxt); + if (!ret) + nbcon_write_context_set_buf(wctxt, NULL, 0); + return ret; } EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); /** + * nbcon_reacquire_nobuf - Reacquire a console after losing ownership + * while printing + * @wctxt: The write context that was handed to the write callback + * + * Since ownership can be lost at any time due to handover or takeover, a + * printing context _must_ be prepared to back out immediately and + * carefully. However, there are scenarios where the printing context must + * reacquire ownership in order to finalize or revert hardware changes. + * + * This function allows a printing context to reacquire ownership using the + * same priority as its previous ownership. + * + * Note that after a successful reacquire the printing context will have no + * output buffer because that has been lost. This function cannot be used to + * resume printing. + */ +void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + while (!nbcon_context_try_acquire(ctxt, true)) + cpu_relax(); + + nbcon_write_context_set_buf(wctxt, NULL, 0); +} +EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf); + +/** * nbcon_emit_next_record - Emit a record in the acquired context * @wctxt: The write context that will be handed to the write function + * @use_atomic: True if the write_atomic() callback is to be used * * Return: True if this context still owns the console. False if * ownership was handed over or taken. @@ -840,8 +963,7 @@ EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); * When true is returned, @wctxt->ctxt.backlog indicates whether there are * still records pending in the ringbuffer, */ -__maybe_unused -static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) +static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; @@ -852,7 +974,22 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) unsigned long con_dropped; struct nbcon_state cur; unsigned long dropped; - bool done; + unsigned long ulseq; + + /* + * This function should never be called for consoles that have not + * implemented the necessary callback for writing: i.e. legacy + * consoles and, when atomic, nbcon consoles with no write_atomic(). + * Handle it as if ownership was lost and try to continue. + * + * Note that for nbcon consoles the write_thread() callback is + * mandatory and was already checked in nbcon_alloc(). + */ + if (WARN_ON_ONCE((use_atomic && !con->write_atomic) || + !(console_srcu_read_flags(con) & CON_NBCON))) { + nbcon_context_release(ctxt); + return false; + } /* * The printk buffers are filled within an unsafe section. This @@ -878,6 +1015,29 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) if (dropped && !is_extended) console_prepend_dropped(&pmsg, dropped); + /* + * If the previous owner was assigned the same record, this context + * has taken over ownership and is replaying the record. Prepend a + * message to let the user know the record is replayed. + */ + ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq)); + if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) { + console_prepend_replay(&pmsg); + } else { + /* + * Ensure this context is still the owner before trying to + * update @nbcon_prev_seq. Otherwise the value in @ulseq may + * not be from the previous owner and instead be some later + * value from the context that took over ownership. + */ + nbcon_state_read(con, &cur); + if (!nbcon_context_can_proceed(ctxt, &cur)) + return false; + + atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq, + __u64seq_to_ulseq(pmsg.seq)); + } + if (!nbcon_context_exit_unsafe(ctxt)) return false; @@ -886,22 +1046,27 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) goto update_con; /* Initialize the write context for driver callbacks. */ - wctxt->outbuf = &pmsg.pbufs->outbuf[0]; - wctxt->len = pmsg.outbuf_len; - nbcon_state_read(con, &cur); - wctxt->unsafe_takeover = cur.unsafe_takeover; + nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len); - if (con->write_atomic) { - done = con->write_atomic(con, wctxt); - } else { + if (use_atomic) + con->write_atomic(con, wctxt); + else + con->write_thread(con, wctxt); + + if (!wctxt->outbuf) { + /* + * Ownership was lost and reacquired by the driver. Handle it + * as if ownership was lost. + */ nbcon_context_release(ctxt); - WARN_ON_ONCE(1); - done = false; + return false; } - /* If not done, the emit was aborted. */ - if (!done) - return false; + /* + * Ownership may have been lost but _not_ reacquired by the driver. + * This case is detected and handled when entering unsafe to update + * dropped/seq values. + */ /* * Since any dropped message was successfully output, reset the @@ -928,64 +1093,745 @@ update_con: return nbcon_context_exit_unsafe(ctxt); } +/* + * nbcon_emit_one - Print one record for an nbcon console using the + * specified callback + * @wctxt: An initialized write context struct to use for this context + * @use_atomic: True if the write_atomic() callback is to be used + * + * Return: True, when a record has been printed and there are still + * pending records. The caller might want to continue flushing. + * + * False, when there is no pending record, or when the console + * context cannot be acquired, or the ownership has been lost. + * The caller should give up. Either the job is done, cannot be + * done, or will be handled by the owning context. + * + * This is an internal helper to handle the locking of the console before + * calling nbcon_emit_next_record(). + */ +static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + unsigned long flags; + bool ret = false; + + if (!use_atomic) { + con->device_lock(con, &flags); + + /* + * Ensure this stays on the CPU to make handover and + * takeover possible. + */ + cant_migrate(); + } + + if (!nbcon_context_try_acquire(ctxt, false)) + goto out; + + /* + * nbcon_emit_next_record() returns false when the console was + * handed over or taken over. In both cases the context is no + * longer valid. + * + * The higher priority printing context takes over responsibility + * to print the pending records. + */ + if (!nbcon_emit_next_record(wctxt, use_atomic)) + goto out; + + nbcon_context_release(ctxt); + + ret = ctxt->backlog; +out: + if (!use_atomic) + con->device_unlock(con, flags); + return ret; +} + /** - * nbcon_alloc - Allocate buffers needed by the nbcon console - * @con: Console to allocate buffers for + * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup + * @con: Console to operate on + * @ctxt: The nbcon context from nbcon_context_try_acquire() * - * Return: True on success. False otherwise and the console cannot - * be used. + * Return: True if the thread should shutdown or if the console is + * allowed to print and a record is available. False otherwise. * - * This is not part of nbcon_init() because buffer allocation must - * be performed earlier in the console registration process. + * After the thread wakes up, it must first check if it should shutdown before + * attempting any printing. */ -bool nbcon_alloc(struct console *con) +static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt) { - if (con->flags & CON_BOOT) { + bool ret = false; + short flags; + int cookie; + + if (kthread_should_stop()) + return true; + + /* + * Block the kthread when the system is in an emergency or panic mode. + * It increases the chance that these contexts would be able to show + * the messages directly. And it reduces the risk of interrupted writes + * where the context with a higher priority takes over the nbcon console + * ownership in the middle of a message. + */ + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) || + unlikely(panic_in_progress())) + return false; + + cookie = console_srcu_read_lock(); + + flags = console_srcu_read_flags(con); + if (console_is_usable(con, flags, false)) { + /* Bring the sequence in @ctxt up to date */ + ctxt->seq = nbcon_seq_read(con); + + ret = prb_read_valid(prb, ctxt->seq, NULL); + } + + console_srcu_read_unlock(cookie); + return ret; +} + +/** + * nbcon_kthread_func - The printer thread function + * @__console: Console to operate on + * + * Return: 0 + */ +static int nbcon_kthread_func(void *__console) +{ + struct console *con = __console; + struct nbcon_write_context wctxt = { + .ctxt.console = con, + .ctxt.prio = NBCON_PRIO_NORMAL, + }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); + short con_flags; + bool backlog; + int cookie; + +wait_for_event: + /* + * Guarantee this task is visible on the rcuwait before + * checking the wake condition. + * + * The full memory barrier within set_current_state() of + * ___rcuwait_wait_event() pairs with the full memory + * barrier within rcuwait_has_sleeper(). + * + * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A. + */ + rcuwait_wait_event(&con->rcuwait, + nbcon_kthread_should_wakeup(con, ctxt), + TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */ + + do { + if (kthread_should_stop()) + return 0; + /* - * Boot console printing is synchronized with legacy console - * printing, so boot consoles can share the same global printk - * buffers. + * Block the kthread when the system is in an emergency or panic + * mode. See nbcon_kthread_should_wakeup() for more details. */ - con->pbufs = &printk_shared_pbufs; + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) || + unlikely(panic_in_progress())) + goto wait_for_event; + + backlog = false; + + /* + * Keep the srcu read lock around the entire operation so that + * synchronize_srcu() can guarantee that the kthread stopped + * or suspended printing. + */ + cookie = console_srcu_read_lock(); + + con_flags = console_srcu_read_flags(con); + + if (console_is_usable(con, con_flags, false)) + backlog = nbcon_emit_one(&wctxt, false); + + console_srcu_read_unlock(cookie); + + cond_resched(); + + } while (backlog); + + goto wait_for_event; +} + +/** + * nbcon_irq_work - irq work to wake console printer thread + * @irq_work: The irq work to operate on + */ +static void nbcon_irq_work(struct irq_work *irq_work) +{ + struct console *con = container_of(irq_work, struct console, irq_work); + + nbcon_kthread_wake(con); +} + +static inline bool rcuwait_has_sleeper(struct rcuwait *w) +{ + /* + * Guarantee any new records can be seen by tasks preparing to wait + * before this context checks if the rcuwait is empty. + * + * This full memory barrier pairs with the full memory barrier within + * set_current_state() of ___rcuwait_wait_event(), which is called + * after prepare_to_rcuwait() adds the waiter but before it has + * checked the wait condition. + * + * This pairs with nbcon_kthread_func:A. + */ + smp_mb(); /* LMM(rcuwait_has_sleeper:A) */ + return rcuwait_active(w); +} + +/** + * nbcon_kthreads_wake - Wake up printing threads using irq_work + */ +void nbcon_kthreads_wake(void) +{ + struct console *con; + int cookie; + + if (!printk_kthreads_running) + return; + + /* + * It is not allowed to call this function when console irq_work + * is blocked. + */ + if (WARN_ON_ONCE(console_irqwork_blocked)) + return; + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + if (!(console_srcu_read_flags(con) & CON_NBCON)) + continue; + + /* + * Only schedule irq_work if the printing thread is + * actively waiting. If not waiting, the thread will + * notice by itself that it has work to do. + */ + if (rcuwait_has_sleeper(&con->rcuwait)) + irq_work_queue(&con->irq_work); + } + console_srcu_read_unlock(cookie); +} + +/* + * nbcon_kthread_stop - Stop a console printer thread + * @con: Console to operate on + */ +void nbcon_kthread_stop(struct console *con) +{ + lockdep_assert_console_list_lock_held(); + + if (!con->kthread) + return; + + kthread_stop(con->kthread); + con->kthread = NULL; +} + +/** + * nbcon_kthread_create - Create a console printer thread + * @con: Console to operate on + * + * Return: True if the kthread was started or already exists. + * Otherwise false and @con must not be registered. + * + * This function is called when it will be expected that nbcon consoles are + * flushed using the kthread. The messages printed with NBCON_PRIO_NORMAL + * will be no longer flushed by the legacy loop. This is why failure must + * be fatal for console registration. + * + * If @con was already registered and this function fails, @con must be + * unregistered before the global state variable @printk_kthreads_running + * can be set. + */ +bool nbcon_kthread_create(struct console *con) +{ + struct task_struct *kt; + + lockdep_assert_console_list_lock_held(); + + if (con->kthread) + return true; + + kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index); + if (WARN_ON(IS_ERR(kt))) { + con_printk(KERN_ERR, con, "failed to start printing thread\n"); + return false; + } + + con->kthread = kt; + + /* + * It is important that console printing threads are scheduled + * shortly after a printk call and with generous runtime budgets. + */ + sched_set_normal(con->kthread, -20); + + return true; +} + +/* Track the nbcon emergency nesting per CPU. */ +static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); +static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; + +/** + * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer + * + * Context: For reading, any context. For writing, any context which could + * not be migrated to another CPU. + * Return: Either a pointer to the per CPU emergency nesting counter of + * the current CPU or to the init data during early boot. + * + * The function is safe for reading per-CPU variables in any context because + * preemption is disabled if the current CPU is in the emergency state. See + * also nbcon_cpu_emergency_enter(). + */ +static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void) +{ + /* + * The value of __printk_percpu_data_ready gets set in normal + * context and before SMP initialization. As a result it could + * never change while inside an nbcon emergency section. + */ + if (!printk_percpu_data_ready()) + return &early_nbcon_pcpu_emergency_nesting; + + return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting); +} + +/** + * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon + * printing on the current CPU + * + * Context: Any context. + * Return: The nbcon_prio to use for acquiring an nbcon console in this + * context for printing. + * + * The function is safe for reading per-CPU data in any context because + * preemption is disabled if the current CPU is in the emergency or panic + * state. + */ +enum nbcon_prio nbcon_get_default_prio(void) +{ + unsigned int *cpu_emergency_nesting; + + if (panic_on_this_cpu()) + return NBCON_PRIO_PANIC; + + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + if (*cpu_emergency_nesting) + return NBCON_PRIO_EMERGENCY; + + return NBCON_PRIO_NORMAL; +} + +/* + * Track if it is allowed to perform unsafe hostile takeovers of console + * ownership. When true, console drivers might perform unsafe actions while + * printing. It is externally available via nbcon_allow_unsafe_takeover(). + */ +static bool panic_nbcon_allow_unsafe_takeover; + +/** + * nbcon_allow_unsafe_takeover - Check if unsafe console takeovers are allowed + * + * Return: True, when it is permitted to perform unsafe console printing + * + * This is also used by console_is_usable() to determine if it is allowed to + * call write_atomic() callbacks flagged as unsafe (CON_NBCON_ATOMIC_UNSAFE). + */ +bool nbcon_allow_unsafe_takeover(void) +{ + return panic_on_this_cpu() && panic_nbcon_allow_unsafe_takeover; +} + +/** + * nbcon_legacy_emit_next_record - Print one record for an nbcon console + * in legacy contexts + * @con: The console to print on + * @handover: Will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding + * both the console_lock and the SRCU read lock. Otherwise it + * is set to false. + * @cookie: The cookie from the SRCU read lock. + * @use_atomic: Set true when called in an atomic or unknown context. + * It affects which nbcon callback will be used: write_atomic() + * or write_thread(). + * + * When false, the write_thread() callback is used and would be + * called in a preemtible context unless disabled by the + * device_lock. The legacy handover is not allowed in this mode. + * + * Context: Any context except NMI. + * Return: True, when a record has been printed and there are still + * pending records. The caller might want to continue flushing. + * + * False, when there is no pending record, or when the console + * context cannot be acquired, or the ownership has been lost. + * The caller should give up. Either the job is done, cannot be + * done, or will be handled by the owning context. + * + * This function is meant to be called by console_flush_all() to print records + * on nbcon consoles from legacy context (printing via console unlocking). + * Essentially it is the nbcon version of console_emit_next_record(). + */ +bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, + int cookie, bool use_atomic) +{ + struct nbcon_write_context wctxt = { }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); + unsigned long flags; + bool progress; + + ctxt->console = con; + ctxt->prio = nbcon_get_default_prio(); + + if (use_atomic) { + /* + * In an atomic or unknown context, use the same procedure as + * in console_emit_next_record(). It allows to handover. + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); + stop_critical_timings(); + } + + progress = nbcon_emit_one(&wctxt, use_atomic); + + if (use_atomic) { + start_critical_timings(); + *handover = console_lock_spinning_disable_and_check(cookie); + printk_safe_exit_irqrestore(flags); } else { - con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); - if (!con->pbufs) { - con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); - return false; + /* Non-atomic does not perform legacy spinning handovers. */ + *handover = false; + } + + return progress; +} + +/** + * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its + * write_atomic() callback + * @con: The nbcon console to flush + * @stop_seq: Flush up until this record + * + * Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on + * failure. + * + * Errors: + * + * -EPERM: Unable to acquire console ownership. + * + * -EAGAIN: Another context took over ownership while printing. + * + * -ENOENT: A record before @stop_seq is not available. + * + * If flushing up to @stop_seq was not successful, it only makes sense for the + * caller to try again when -EAGAIN was returned. When -EPERM is returned, + * this context is not allowed to acquire the console. When -ENOENT is + * returned, it cannot be expected that the unfinalized record will become + * available. + */ +static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) +{ + struct nbcon_write_context wctxt = { }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); + int err = 0; + + ctxt->console = con; + ctxt->spinwait_max_us = 2000; + ctxt->prio = nbcon_get_default_prio(); + ctxt->allow_unsafe_takeover = nbcon_allow_unsafe_takeover(); + + while (nbcon_seq_read(con) < stop_seq) { + if (!nbcon_context_try_acquire(ctxt, false)) + return -EPERM; + + /* + * nbcon_emit_next_record() returns false when the console was + * handed over or taken over. In both cases the context is no + * longer valid. + */ + if (!nbcon_emit_next_record(&wctxt, true)) + return -EAGAIN; + + nbcon_context_release(ctxt); + + if (!ctxt->backlog) { + /* Are there reserved but not yet finalized records? */ + if (nbcon_seq_read(con) < stop_seq) + err = -ENOENT; + break; } } - return true; + return err; +} + +/** + * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its + * write_atomic() callback + * @con: The nbcon console to flush + * @stop_seq: Flush up until this record + * + * This will stop flushing before @stop_seq if another context has ownership. + * That context is then responsible for the flushing. Likewise, if new records + * are added while this context was flushing and there is no other context + * to handle the printing, this context must also flush those records. + */ +static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq) +{ + struct console_flush_type ft; + unsigned long flags; + int err; + +again: + /* + * Atomic flushing does not use console driver synchronization (i.e. + * it does not hold the port lock for uart consoles). Therefore IRQs + * must be disabled to avoid being interrupted and then calling into + * a driver that will deadlock trying to acquire console ownership. + */ + local_irq_save(flags); + + err = __nbcon_atomic_flush_pending_con(con, stop_seq); + + local_irq_restore(flags); + + /* + * If there was a new owner (-EPERM, -EAGAIN), that context is + * responsible for completing. + * + * Do not wait for records not yet finalized (-ENOENT) to avoid a + * possible deadlock. They will either get flushed by the writer or + * eventually skipped on panic CPU. + */ + if (err) + return; + + /* + * If flushing was successful but more records are available, this + * context must flush those remaining records if the printer thread + * is not available do it. + */ + printk_get_console_flush_type(&ft); + if (!ft.nbcon_offload && + prb_read_valid(prb, nbcon_seq_read(con), NULL)) { + stop_seq = prb_next_reserve_seq(prb); + goto again; + } +} + +/** + * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their + * write_atomic() callback + * @stop_seq: Flush up until this record + */ +static void __nbcon_atomic_flush_pending(u64 stop_seq) +{ + struct console *con; + int cookie; + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + + if (!(flags & CON_NBCON)) + continue; + + if (!console_is_usable(con, flags, true)) + continue; + + if (nbcon_seq_read(con) >= stop_seq) + continue; + + nbcon_atomic_flush_pending_con(con, stop_seq); + } + console_srcu_read_unlock(cookie); +} + +/** + * nbcon_atomic_flush_pending - Flush all nbcon consoles using their + * write_atomic() callback + * + * Flush the backlog up through the currently newest record. Any new + * records added while flushing will not be flushed if there is another + * context available to handle the flushing. This is to avoid one CPU + * printing unbounded because other CPUs continue to add records. + */ +void nbcon_atomic_flush_pending(void) +{ + __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb)); +} + +/** + * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their + * write_atomic() callback and allowing unsafe hostile takeovers + * + * Flush the backlog up through the currently newest record. Unsafe hostile + * takeovers will be performed, if necessary. + */ +void nbcon_atomic_flush_unsafe(void) +{ + panic_nbcon_allow_unsafe_takeover = true; + __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb)); + panic_nbcon_allow_unsafe_takeover = false; } /** - * nbcon_init - Initialize the nbcon console specific data + * nbcon_cpu_emergency_enter - Enter an emergency section where printk() + * messages for that CPU are flushed directly + * + * Context: Any context. Disables preemption. + * + * When within an emergency section, printk() calls will attempt to flush any + * pending messages in the ringbuffer. + */ +void nbcon_cpu_emergency_enter(void) +{ + unsigned int *cpu_emergency_nesting; + + preempt_disable(); + + atomic_inc(&nbcon_cpu_emergency_cnt); + + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + (*cpu_emergency_nesting)++; +} + +/** + * nbcon_cpu_emergency_exit - Exit an emergency section + * + * Context: Within an emergency section. Enables preemption. + */ +void nbcon_cpu_emergency_exit(void) +{ + unsigned int *cpu_emergency_nesting; + + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0)) + (*cpu_emergency_nesting)--; + + /* + * Wake up kthreads because there might be some pending messages + * added by other CPUs with normal priority since the last flush + * in the emergency context. + */ + if (!WARN_ON_ONCE(atomic_read(&nbcon_cpu_emergency_cnt) == 0)) { + if (atomic_dec_return(&nbcon_cpu_emergency_cnt) == 0) { + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + } + } + + preempt_enable(); +} + +/** + * nbcon_alloc - Allocate and init the nbcon console specific data * @con: Console to initialize * - * nbcon_alloc() *must* be called and succeed before this function - * is called. + * Return: True if the console was fully allocated and initialized. + * Otherwise @con must not be registered. * - * This function expects that the legacy @con->seq has been set. + * When allocation and init was successful, the console must be properly + * freed using nbcon_free() once it is no longer needed. */ -void nbcon_init(struct console *con) +bool nbcon_alloc(struct console *con) { struct nbcon_state state = { }; - /* nbcon_alloc() must have been called and successful! */ - BUG_ON(!con->pbufs); + /* Synchronize the kthread start. */ + lockdep_assert_console_list_lock_held(); + + /* The write_thread() callback is mandatory. */ + if (WARN_ON(!con->write_thread)) + return false; - nbcon_seq_force(con, con->seq); + rcuwait_init(&con->rcuwait); + init_irq_work(&con->irq_work, nbcon_irq_work); + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL); nbcon_state_set(con, &state); + + /* + * Initialize @nbcon_seq to the highest possible sequence number so + * that practically speaking it will have nothing to print until a + * desired initial sequence number has been set via nbcon_seq_force(). + */ + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), ULSEQ_MAX(prb)); + + if (con->flags & CON_BOOT) { + /* + * Boot console printing is synchronized with legacy console + * printing, so boot consoles can share the same global printk + * buffers. + */ + con->pbufs = &printk_shared_pbufs; + } else { + con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); + if (!con->pbufs) { + con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); + return false; + } + + if (printk_kthreads_ready && !have_boot_console) { + if (!nbcon_kthread_create(con)) { + kfree(con->pbufs); + con->pbufs = NULL; + return false; + } + + /* Might be the first kthread. */ + printk_kthreads_running = true; + } + } + + return true; } /** * nbcon_free - Free and cleanup the nbcon console specific data * @con: Console to free/cleanup nbcon data + * + * Important: @have_nbcon_console must be updated before calling + * this function. In particular, it can be set only when there + * is still another nbcon console registered. */ void nbcon_free(struct console *con) { struct nbcon_state state = { }; + /* Synchronize the kthread stop. */ + lockdep_assert_console_list_lock_held(); + + if (printk_kthreads_running) { + nbcon_kthread_stop(con); + + /* Might be the last nbcon console. + * + * Do not rely on printk_kthreads_check_locked(). It is not + * called in some code paths, see nbcon_free() callers. + */ + if (!have_nbcon_console) + printk_kthreads_running = false; + } + nbcon_state_set(con, &state); /* Boot consoles share global printk buffers. */ @@ -994,3 +1840,146 @@ void nbcon_free(struct console *con) con->pbufs = NULL; } + +/** + * nbcon_device_try_acquire - Try to acquire nbcon console and enter unsafe + * section + * @con: The nbcon console to acquire + * + * Context: Under the locking mechanism implemented in + * @con->device_lock() including disabling migration. + * Return: True if the console was acquired. False otherwise. + * + * Console drivers will usually use their own internal synchronization + * mechasism to synchronize between console printing and non-printing + * activities (such as setting baud rates). However, nbcon console drivers + * supporting atomic consoles may also want to mark unsafe sections when + * performing non-printing activities in order to synchronize against their + * atomic_write() callback. + * + * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL + * and marks it unsafe for handover/takeover. + */ +bool nbcon_device_try_acquire(struct console *con) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); + + cant_migrate(); + + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->console = con; + ctxt->prio = NBCON_PRIO_NORMAL; + + if (!nbcon_context_try_acquire(ctxt, false)) + return false; + + if (!nbcon_context_enter_unsafe(ctxt)) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(nbcon_device_try_acquire); + +/** + * nbcon_device_release - Exit unsafe section and release the nbcon console + * @con: The nbcon console acquired in nbcon_device_try_acquire() + */ +void nbcon_device_release(struct console *con) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); + struct console_flush_type ft; + int cookie; + + if (!nbcon_context_exit_unsafe(ctxt)) + return; + + nbcon_context_release(ctxt); + + /* + * This context must flush any new records added while the console + * was locked if the printer thread is not available to do it. The + * console_srcu_read_lock must be taken to ensure the console is + * usable throughout flushing. + */ + cookie = console_srcu_read_lock(); + printk_get_console_flush_type(&ft); + if (console_is_usable(con, console_srcu_read_flags(con), true) && + !ft.nbcon_offload && + prb_read_valid(prb, nbcon_seq_read(con), NULL)) { + /* + * If nbcon_atomic flushing is not available, fallback to + * using the legacy loop. + */ + if (ft.nbcon_atomic) { + __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb)); + } else if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } else if (ft.legacy_offload) { + defer_console_output(); + } + } + console_srcu_read_unlock(cookie); +} +EXPORT_SYMBOL_GPL(nbcon_device_release); + +/** + * nbcon_kdb_try_acquire - Try to acquire nbcon console and enter unsafe + * section + * @con: The nbcon console to acquire + * @wctxt: The nbcon write context to be used on success + * + * Context: Under console_srcu_read_lock() for emitting a single kdb message + * using the given con->write_atomic() callback. Can be called + * only when the console is usable at the moment. + * + * Return: True if the console was acquired. False otherwise. + * + * kdb emits messages on consoles registered for printk() without + * storing them into the ring buffer. It has to acquire the console + * ownerhip so that it could call con->write_atomic() callback a safe way. + * + * This function acquires the nbcon console using priority NBCON_PRIO_EMERGENCY + * and marks it unsafe for handover/takeover. + */ +bool nbcon_kdb_try_acquire(struct console *con, + struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->console = con; + ctxt->prio = NBCON_PRIO_EMERGENCY; + + if (!nbcon_context_try_acquire(ctxt, false)) + return false; + + if (!nbcon_context_enter_unsafe(ctxt)) + return false; + + return true; +} + +/** + * nbcon_kdb_release - Exit unsafe section and release the nbcon console + * + * @wctxt: The nbcon write context initialized by a successful + * nbcon_kdb_try_acquire() + */ +void nbcon_kdb_release(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + if (!nbcon_context_exit_unsafe(ctxt)) + return; + + nbcon_context_release(ctxt); + + /* + * Flush any new printk() messages added when the console was blocked. + * Only the console used by the given write context was blocked. + * The console was locked only when the write_atomic() callback + * was usable. + */ + __nbcon_atomic_flush_pending_con(ctxt->console, prb_next_reserve_seq(prb)); +} diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index dddb15f48d59..1d765ad242b8 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -34,6 +34,7 @@ #include <linux/security.h> #include <linux/memblock.h> #include <linux/syscalls.h> +#include <linux/syscore_ops.h> #include <linux/vmcore_info.h> #include <linux/ratelimit.h> #include <linux/kmsg_dump.h> @@ -47,6 +48,7 @@ #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> +#include <linux/panic.h> #include <linux/uaccess.h> #include <asm/sections.h> @@ -197,7 +199,7 @@ __setup("printk.devkmsg=", control_devkmsg); char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) -int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, +int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { char old_str[DEVKMSG_STR_MAX_SIZE]; @@ -282,6 +284,7 @@ EXPORT_SYMBOL(console_list_unlock); * Return: A cookie to pass to console_srcu_read_unlock(). */ int console_srcu_read_lock(void) + __acquires(&console_srcu) { return srcu_read_lock_nmisafe(&console_srcu); } @@ -295,6 +298,7 @@ EXPORT_SYMBOL(console_srcu_read_lock); * Counterpart to console_srcu_read_lock() */ void console_srcu_read_unlock(int cookie) + __releases(&console_srcu) { srcu_read_unlock_nmisafe(&console_srcu, cookie); } @@ -342,34 +346,6 @@ static void __up_console_sem(unsigned long ip) } #define up_console_sem() __up_console_sem(_RET_IP_) -static bool panic_in_progress(void) -{ - return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); -} - -/* Return true if a panic is in progress on the current CPU. */ -bool this_cpu_in_panic(void) -{ - /* - * We can use raw_smp_processor_id() here because it is impossible for - * the task to be migrated to the panic_cpu, or away from it. If - * panic_cpu has already been set, and we're not currently executing on - * that CPU, then we never will be. - */ - return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); -} - -/* - * Return true if a panic is in progress on a remote CPU. - * - * On true, the local CPU should immediately release any printing resources - * that may be needed by the panic CPU. - */ -bool other_cpu_in_panic(void) -{ - return (panic_in_progress() && !this_cpu_in_panic()); -} - /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's @@ -461,14 +437,46 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); +/* + * Specifies if a legacy console is registered. If legacy consoles are + * present, it is necessary to perform the console lock/unlock dance + * whenever console flushing should occur. + */ +bool have_legacy_console; + +/* + * Specifies if an nbcon console is registered. If nbcon consoles are present, + * synchronous printing of legacy consoles will not occur during panic until + * the backtrace has been stored to the ringbuffer. + */ +bool have_nbcon_console; + +/* + * Specifies if a boot console is registered. If boot consoles are present, + * nbcon consoles cannot print simultaneously and must be synchronized by + * the console lock. This is because boot consoles and nbcon consoles may + * have mapped the same hardware. + */ +bool have_boot_console; + +/* See printk_legacy_allow_panic_sync() for details. */ +bool legacy_allow_panic_sync; + +/* Avoid using irq_work when suspending. */ +bool console_irqwork_blocked; + #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); +static DECLARE_WAIT_QUEUE_HEAD(legacy_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; static size_t syslog_partial; static bool syslog_time; +/* True when _all_ printer threads are available for printing. */ +bool printk_kthreads_running; + struct latched_seq { seqcount_latch_t latch; u64 val[2]; @@ -491,7 +499,7 @@ static struct latched_seq clear_seq = { /* record buffer */ #define LOG_ALIGN __alignof__(unsigned long) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) -#define LOG_BUF_LEN_MAX (u32)(1 << 31) +#define LOG_BUF_LEN_MAX ((u32)1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; @@ -528,10 +536,11 @@ bool printk_percpu_data_ready(void) /* Must be called under syslog_lock. */ static void latched_seq_write(struct latched_seq *ls, u64 val) { - raw_write_seqcount_latch(&ls->latch); + write_seqcount_latch_begin(&ls->latch); ls->val[0] = val; - raw_write_seqcount_latch(&ls->latch); + write_seqcount_latch(&ls->latch); ls->val[1] = val; + write_seqcount_latch_end(&ls->latch); } /* Can be called from any context. */ @@ -542,10 +551,10 @@ static u64 latched_seq_read_nolock(struct latched_seq *ls) u64 val; do { - seq = raw_read_seqcount_latch(&ls->latch); + seq = read_seqcount_latch(&ls->latch); idx = seq & 0x1; val = ls->val[idx]; - } while (raw_read_seqcount_latch_retry(&ls->latch, seq)); + } while (read_seqcount_latch_retry(&ls->latch, seq)); return val; } @@ -1124,6 +1133,17 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, static char setup_text_buf[PRINTKRB_RECORD_MAX] __initdata; +static void print_log_buf_usage_stats(void) +{ + unsigned int descs_count = log_buf_len >> PRB_AVGBITS; + size_t meta_data_size; + + meta_data_size = descs_count * (sizeof(struct prb_desc) + sizeof(struct printk_info)); + + pr_info("log buffer data + meta data: %u + %zu = %zu bytes\n", + log_buf_len, meta_data_size, log_buf_len + meta_data_size); +} + void __init setup_log_buf(int early) { struct printk_info *new_infos; @@ -1153,20 +1173,25 @@ void __init setup_log_buf(int early) if (!early && !new_log_buf_len) log_buf_add_cpu(); - if (!new_log_buf_len) + if (!new_log_buf_len) { + /* Show the memory stats only once. */ + if (!early) + goto out; + return; + } new_descs_count = new_log_buf_len >> PRB_AVGBITS; if (new_descs_count == 0) { pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); - return; + goto out; } new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); if (unlikely(!new_log_buf)) { pr_err("log_buf_len: %lu text bytes not available\n", new_log_buf_len); - return; + goto out; } new_descs_size = new_descs_count * sizeof(struct prb_desc); @@ -1229,7 +1254,7 @@ void __init setup_log_buf(int early) prb_next_seq(&printk_rb_static) - seq); } - pr_info("log_buf_len: %u bytes\n", log_buf_len); + print_log_buf_usage_stats(); pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); return; @@ -1238,6 +1263,8 @@ err_free_descs: memblock_free(new_descs, new_descs_size); err_free_log_buf: memblock_free(new_log_buf, new_log_buf_len); +out: + print_log_buf_usage_stats(); } static bool __read_mostly ignore_loglevel; @@ -1287,11 +1314,11 @@ static void boot_delay_msec(int level) { unsigned long long k; unsigned long timeout; + bool suppress = !is_printk_force_console() && + suppress_message_printing(level); - if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) - || suppress_message_printing(level)) { + if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) || suppress) return; - } k = (unsigned long long)loops_per_msec * boot_delay; @@ -1850,7 +1877,7 @@ static bool console_waiter; * there may be a waiter spinning (like a spinlock). Also it must be * ready to hand over the lock at the end of the section. */ -static void console_lock_spinning_enable(void) +void console_lock_spinning_enable(void) { /* * Do not use spinning in panic(). The panic CPU wants to keep the lock. @@ -1889,7 +1916,7 @@ lockdep: * * Return: 1 if the lock rights were passed, 0 otherwise. */ -static int console_lock_spinning_disable_and_check(int cookie) +int console_lock_spinning_disable_and_check(int cookie) { int waiter; @@ -2241,6 +2268,9 @@ int vprintk_store(int facility, int level, if (dev_info) flags |= LOG_NEWLINE; + if (is_printk_force_console()) + flags |= LOG_FORCE_CON; + if (flags & LOG_CONT) { prb_rec_init_wr(&r, reserve_size); if (prb_reserve_in_last(&e, prb, &r, caller_id, PRINTKRB_RECORD_MAX)) { @@ -2248,6 +2278,9 @@ int vprintk_store(int facility, int level, facility, &flags, fmt, args); r.info->text_len += text_len; + if (flags & LOG_FORCE_CON) + r.info->flags |= LOG_FORCE_CON; + if (flags & LOG_NEWLINE) { r.info->flags |= LOG_NEWLINE; prb_final_commit(&e); @@ -2300,12 +2333,46 @@ out: return ret; } +/* + * This acts as a one-way switch to allow legacy consoles to print from + * the printk() caller context on a panic CPU. It also attempts to flush + * the legacy consoles in this context. + */ +void printk_legacy_allow_panic_sync(void) +{ + struct console_flush_type ft; + + legacy_allow_panic_sync = true; + + printk_get_console_flush_type(&ft); + if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } +} + +bool __read_mostly debug_non_panic_cpus; + +#ifdef CONFIG_PRINTK_CALLER +static int __init debug_non_panic_cpus_setup(char *str) +{ + debug_non_panic_cpus = true; + pr_info("allow messages from non-panic CPUs in panic()\n"); + + return 0; +} +early_param("debug_non_panic_cpus", debug_non_panic_cpus_setup); +module_param(debug_non_panic_cpus, bool, 0644); +MODULE_PARM_DESC(debug_non_panic_cpus, + "allow messages from non-panic CPUs in panic()"); +#endif + asmlinkage int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args) { + struct console_flush_type ft; int printed_len; - bool in_sched = false; /* Suppress unimportant messages after panic happens */ if (unlikely(suppress_printk)) @@ -2316,20 +2383,31 @@ asmlinkage int vprintk_emit(int facility, int level, * non-panic CPUs are generating any messages, they will be * silently dropped. */ - if (other_cpu_in_panic()) + if (panic_on_other_cpu() && + !debug_non_panic_cpus && + !panic_triggering_all_cpu_backtrace) return 0; + printk_get_console_flush_type(&ft); + + /* If called from the scheduler, we can not call up(). */ if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; - in_sched = true; + ft.legacy_offload |= ft.legacy_direct && !console_irqwork_blocked; + ft.legacy_direct = false; } printk_delay(level); printed_len = vprintk_store(facility, level, dev_info, fmt, args); - /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + + if (ft.legacy_direct) { /* * The caller may be holding system-critical or * timing-sensitive locks. Disable preemption during @@ -2349,9 +2427,9 @@ asmlinkage int vprintk_emit(int facility, int level, preempt_enable(); } - if (in_sched) + if (ft.legacy_offload) defer_console_output(); - else + else if (!console_irqwork_blocked) wake_up_klogd(); return printed_len; @@ -2377,7 +2455,6 @@ asmlinkage __visible int _printk(const char *fmt, ...) } EXPORT_SYMBOL(_printk); -static bool pr_flush(int timeout_ms, bool reset_on_progress); static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); #else /* CONFIG_PRINTK */ @@ -2390,7 +2467,6 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre static u64 syslog_seq; -static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } #endif /* CONFIG_PRINTK */ @@ -2429,18 +2505,23 @@ static void set_user_specified(struct console_cmdline *c, bool user_specified) console_set_on_cmdline = 1; } -static int __add_preferred_console(const char *name, const short idx, char *options, +static int __add_preferred_console(const char *name, const short idx, + const char *devname, char *options, char *brl_options, bool user_specified) { struct console_cmdline *c; int i; + if (!name && !devname) + return -EINVAL; + /* * We use a signed short index for struct console for device drivers to * indicate a not yet assigned index or port. However, a negative index - * value is not valid for preferred console. + * value is not valid when the console name and index are defined on + * the command line. */ - if (idx < 0) + if (name && idx < 0) return -EINVAL; /* @@ -2448,9 +2529,10 @@ static int __add_preferred_console(const char *name, const short idx, char *opti * if we have a slot free. */ for (i = 0, c = console_cmdline; - i < MAX_CMDLINECONSOLES && c->name[0]; + i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); i++, c++) { - if (strcmp(c->name, name) == 0 && c->index == idx) { + if ((name && strcmp(c->name, name) == 0 && c->index == idx) || + (devname && strcmp(c->devname, devname) == 0)) { if (!brl_options) preferred_console = i; set_user_specified(c, user_specified); @@ -2461,7 +2543,10 @@ static int __add_preferred_console(const char *name, const short idx, char *opti return -E2BIG; if (!brl_options) preferred_console = i; - strscpy(c->name, name, sizeof(c->name)); + if (name) + strscpy(c->name, name); + if (devname) + strscpy(c->devname, devname); c->options = options; set_user_specified(c, user_specified); braille_set_options(c, brl_options); @@ -2486,8 +2571,13 @@ __setup("console_msg_format=", console_msg_format_setup); */ static int __init console_setup(char *str) { - char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */ - char *s, *options, *brl_options = NULL; + static_assert(sizeof(console_cmdline[0].devname) >= sizeof(console_cmdline[0].name) + 4); + char buf[sizeof(console_cmdline[0].devname)]; + char *brl_options = NULL; + char *ttyname = NULL; + char *devname = NULL; + char *options; + char *s; int idx; /* @@ -2496,17 +2586,23 @@ static int __init console_setup(char *str) * for exactly this purpose. */ if (str[0] == 0 || strcmp(str, "null") == 0) { - __add_preferred_console("ttynull", 0, NULL, NULL, true); + __add_preferred_console("ttynull", 0, NULL, NULL, NULL, true); return 1; } if (_braille_console_setup(&str, &brl_options)) return 1; + /* For a DEVNAME:0.0 style console the character device is unknown early */ + if (strchr(str, ':')) + devname = buf; + else + ttyname = buf; + /* * Decode str into name, index, options. */ - if (isdigit(str[0])) + if (ttyname && isdigit(str[0])) scnprintf(buf, sizeof(buf), "ttyS%s", str); else strscpy(buf, str); @@ -2523,12 +2619,18 @@ static int __init console_setup(char *str) #endif for (s = buf; *s; s++) - if (isdigit(*s) || *s == ',') + if ((ttyname && isdigit(*s)) || *s == ',') break; - idx = simple_strtoul(s, NULL, 10); + + /* @idx will get defined when devname matches. */ + if (devname) + idx = -1; + else + idx = simple_strtoul(s, NULL, 10); + *s = 0; - __add_preferred_console(buf, idx, options, brl_options, true); + __add_preferred_console(ttyname, idx, devname, options, brl_options, true); return 1; } __setup("console=", console_setup); @@ -2548,9 +2650,54 @@ __setup("console=", console_setup); */ int add_preferred_console(const char *name, const short idx, char *options) { - return __add_preferred_console(name, idx, options, NULL, false); + return __add_preferred_console(name, idx, NULL, options, NULL, false); } +/** + * match_devname_and_update_preferred_console - Update a preferred console + * when matching devname is found. + * @devname: DEVNAME:0.0 style device name + * @name: Name of the corresponding console driver, e.g. "ttyS" + * @idx: Console index, e.g. port number. + * + * The function checks whether a device with the given @devname is + * preferred via the console=DEVNAME:0.0 command line option. + * It fills the missing console driver name and console index + * so that a later register_console() call could find (match) + * and enable this device. + * + * It might be used when a driver subsystem initializes particular + * devices with already known DEVNAME:0.0 style names. And it + * could predict which console driver name and index this device + * would later get associated with. + * + * Return: 0 on success, negative error code on failure. + */ +int match_devname_and_update_preferred_console(const char *devname, + const char *name, + const short idx) +{ + struct console_cmdline *c = console_cmdline; + int i; + + if (!devname || !strlen(devname) || !name || !strlen(name) || idx < 0) + return -EINVAL; + + for (i = 0; i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); + i++, c++) { + if (!strcmp(devname, c->devname)) { + pr_info("associate the preferred console \"%s\" with \"%s%d\"\n", + devname, name, idx); + strscpy(c->name, name); + c->index = idx; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(match_devname_and_update_preferred_console); + bool console_suspend_enabled = true; EXPORT_SYMBOL(console_suspend_enabled); @@ -2578,18 +2725,28 @@ module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc"); /** - * suspend_console - suspend the console subsystem + * console_suspend_all - suspend the console subsystem * * This disables printk() while we go into suspend states */ -void suspend_console(void) +void console_suspend_all(void) { struct console *con; + if (console_suspend_enabled) + pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); + + /* + * Flush any console backlog and then avoid queueing irq_work until + * console_resume_all(). Until then deferred printing is no longer + * triggered, NBCON consoles transition to atomic flushing, and + * any klogd waiters are not triggered. + */ + pr_flush(1000, true); + console_irqwork_blocked = true; + if (!console_suspend_enabled) return; - pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); - pr_flush(1000, true); console_list_lock(); for_each_console(con) @@ -2605,24 +2762,39 @@ void suspend_console(void) synchronize_srcu(&console_srcu); } -void resume_console(void) +void console_resume_all(void) { + struct console_flush_type ft; struct console *con; - if (!console_suspend_enabled) - return; - - console_list_lock(); - for_each_console(con) - console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); - console_list_unlock(); - /* - * Ensure that all SRCU list walks have completed. All printing - * contexts must be able to see they are no longer suspended so - * that they are guaranteed to wake up and resume printing. + * Allow queueing irq_work. After restoring console state, deferred + * printing and any klogd waiters need to be triggered in case there + * is now a console backlog. */ - synchronize_srcu(&console_srcu); + console_irqwork_blocked = false; + + if (console_suspend_enabled) { + console_list_lock(); + for_each_console(con) + console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); + console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. All printing + * contexts must be able to see they are no longer suspended so + * that they are guaranteed to wake up and resume printing. + */ + synchronize_srcu(&console_srcu); + } + + printk_get_console_flush_type(&ft); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + if (ft.legacy_offload) + defer_console_output(); + else + wake_up_klogd(); pr_flush(1000, true); } @@ -2638,10 +2810,16 @@ void resume_console(void) */ static int console_cpu_notify(unsigned int cpu) { + struct console_flush_type ft; + if (!cpuhp_tasks_frozen) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } } return 0; } @@ -2659,7 +2837,7 @@ void console_lock(void) might_sleep(); /* On panic, the console_lock must be left to the panic cpu. */ - while (other_cpu_in_panic()) + while (panic_on_other_cpu()) msleep(1000); down_console_sem(); @@ -2679,7 +2857,7 @@ EXPORT_SYMBOL(console_lock); int console_trylock(void) { /* On panic, the console_lock must be left to the panic cpu. */ - if (other_cpu_in_panic()) + if (panic_on_other_cpu()) return 0; if (down_trylock_console_sem()) return 0; @@ -2695,36 +2873,6 @@ int is_console_locked(void) } EXPORT_SYMBOL(is_console_locked); -/* - * Check if the given console is currently capable and allowed to print - * records. - * - * Requires the console_srcu_read_lock. - */ -static inline bool console_is_usable(struct console *con) -{ - short flags = console_srcu_read_flags(con); - - if (!(flags & CON_ENABLED)) - return false; - - if ((flags & CON_SUSPENDED)) - return false; - - if (!con->write) - return false; - - /* - * Console drivers may assume that per-cpu resources have been - * allocated. So unless they're explicitly marked as being able to - * cope (CON_ANYTIME) don't call them until this CPU is officially up. - */ - if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) - return false; - - return true; -} - static void __console_unlock(void) { console_locked = 0; @@ -2734,30 +2882,31 @@ static void __console_unlock(void) #ifdef CONFIG_PRINTK /* - * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This - * is achieved by shifting the existing message over and inserting the dropped - * message. + * Prepend the message in @pmsg->pbufs->outbuf. This is achieved by shifting + * the existing message over and inserting the scratchbuf message. * - * @pmsg is the printk message to prepend. + * @pmsg is the original printk message. + * @fmt is the printf format of the message which will prepend the existing one. * - * @dropped is the dropped count to report in the dropped message. - * - * If the message text in @pmsg->pbufs->outbuf does not have enough space for - * the dropped message, the message text will be sufficiently truncated. + * If there is not enough space in @pmsg->pbufs->outbuf, the existing + * message text will be sufficiently truncated. * * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. */ -void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) +__printf(2, 3) +static void console_prepend_message(struct printk_message *pmsg, const char *fmt, ...) { struct printk_buffers *pbufs = pmsg->pbufs; const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); const size_t outbuf_sz = sizeof(pbufs->outbuf); char *scratchbuf = &pbufs->scratchbuf[0]; char *outbuf = &pbufs->outbuf[0]; + va_list args; size_t len; - len = scnprintf(scratchbuf, scratchbuf_sz, - "** %lu printk messages dropped **\n", dropped); + va_start(args, fmt); + len = vscnprintf(scratchbuf, scratchbuf_sz, fmt, args); + va_end(args); /* * Make sure outbuf is sufficiently large before prepending. @@ -2780,6 +2929,30 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) } /* + * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". + * @pmsg->outbuf_len is updated appropriately. + * + * @pmsg is the printk message to prepend. + * + * @dropped is the dropped count to report in the dropped message. + */ +void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) +{ + console_prepend_message(pmsg, "** %lu printk messages dropped **\n", dropped); +} + +/* + * Prepend the message in @pmsg->pbufs->outbuf with a "replay message". + * @pmsg->outbuf_len is updated appropriately. + * + * @pmsg is the printk message to prepend. + */ +void console_prepend_replay(struct printk_message *pmsg) +{ + console_prepend_message(pmsg, "** replaying previous printk message **\n"); +} + +/* * Read and format the specified record (or a later record if the specified * record is not available). * @@ -2809,6 +2982,7 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq, struct printk_info info; struct printk_record r; size_t len = 0; + bool force_con; /* * Formatting extended messages requires a separate buffer, so use the @@ -2827,9 +3001,13 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq, pmsg->seq = r.info->seq; pmsg->dropped = r.info->seq - seq; + force_con = r.info->flags & LOG_FORCE_CON; - /* Skip record that has level above the console loglevel. */ - if (may_suppress && suppress_message_printing(r.info->level)) + /* + * Skip records that are not forced to be printed on consoles and that + * has level above the console loglevel. + */ + if (!force_con && may_suppress && suppress_message_printing(r.info->level)) goto out; if (is_extended) { @@ -2845,6 +3023,31 @@ out: } /* + * The legacy console always acquires a spinlock_t from its printing + * callback. This violates lock nesting if the caller acquired an always + * spinning lock (raw_spinlock_t) while invoking printk(). This is not a + * problem on PREEMPT_RT because legacy consoles print always from a + * dedicated thread and never from within printk(). Therefore we tell + * lockdep that a sleeping spin lock (spinlock_t) is valid here. + */ +#ifdef CONFIG_PREEMPT_RT +static inline void printk_legacy_allow_spinlock_enter(void) { } +static inline void printk_legacy_allow_spinlock_exit(void) { } +#else +static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_CONFIG); + +static inline void printk_legacy_allow_spinlock_enter(void) +{ + lock_map_acquire_try(&printk_legacy_map); +} + +static inline void printk_legacy_allow_spinlock_exit(void) +{ + lock_map_release(&printk_legacy_map); +} +#endif /* CONFIG_PREEMPT_RT */ + +/* * Used as the printk buffers for non-panic, serialized console printing. * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles. * Its usage requires the console_lock held. @@ -2893,31 +3096,46 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co con->dropped = 0; } - /* - * While actively printing out messages, if another printk() - * were to occur on another CPU, it may wait for this one to - * finish. This task can not be preempted if there is a - * waiter waiting to take over. - * - * Interrupts are disabled because the hand over to a waiter - * must not be interrupted until the hand over is completed - * (@console_waiter is cleared). - */ - printk_safe_enter_irqsave(flags); - console_lock_spinning_enable(); + /* Write everything out to the hardware. */ - /* Do not trace print latency. */ - stop_critical_timings(); + if (force_legacy_kthread() && !panic_in_progress()) { + /* + * With forced threading this function is in a task context + * (either legacy kthread or get_init_console_seq()). There + * is no need for concern about printk reentrance, handovers, + * or lockdep complaints. + */ - /* Write everything out to the hardware. */ - con->write(con, outbuf, pmsg.outbuf_len); + con->write(con, outbuf, pmsg.outbuf_len); + con->seq = pmsg.seq + 1; + } else { + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + * + * Interrupts are disabled because the hand over to a waiter + * must not be interrupted until the hand over is completed + * (@console_waiter is cleared). + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); - start_critical_timings(); + /* Do not trace print latency. */ + stop_critical_timings(); - con->seq = pmsg.seq + 1; + printk_legacy_allow_spinlock_enter(); + con->write(con, outbuf, pmsg.outbuf_len); + printk_legacy_allow_spinlock_exit(); - *handover = console_lock_spinning_disable_and_check(cookie); - printk_safe_exit_irqrestore(flags); + start_critical_timings(); + + con->seq = pmsg.seq + 1; + + *handover = console_lock_spinning_disable_and_check(cookie); + printk_safe_exit_irqrestore(flags); + } skip: return true; } @@ -2930,99 +3148,152 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co return false; } +static inline void printk_kthreads_check_locked(void) { } + #endif /* CONFIG_PRINTK */ + /* - * Print out all remaining records to all consoles. + * Print out one record for each console. * * @do_cond_resched is set by the caller. It can be true only in schedulable * context. * * @next_seq is set to the sequence number after the last available record. - * The value is valid only when this function returns true. It means that all - * usable consoles are completely flushed. + * The value is valid only when all usable consoles were flushed. It is + * when the function returns true (can do the job) and @try_again parameter + * is set to false, see below. * * @handover will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding the * console_lock. Otherwise it is set to false. * - * Returns true when there was at least one usable console and all messages - * were flushed to all usable consoles. A returned false informs the caller - * that everything was not flushed (either there were no usable consoles or - * another context has taken over printing or it is a panic situation and this - * is not the panic CPU). Regardless the reason, the caller should assume it - * is not useful to immediately try again. + * @try_again will be set to true when it still makes sense to call this + * function again. The function could do the job, see the return value. + * And some consoles still make progress. + * + * Returns true when the function could do the job. Some consoles are usable, + * and there was no takeover and no panic_on_other_cpu(). * * Requires the console_lock. */ -static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) +static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *handover, + bool *try_again) { + struct console_flush_type ft; bool any_usable = false; struct console *con; - bool any_progress; int cookie; - *next_seq = 0; - *handover = false; + *try_again = false; - do { - any_progress = false; + printk_get_console_flush_type(&ft); - cookie = console_srcu_read_lock(); - for_each_console_srcu(con) { - bool progress; + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + u64 printk_seq; + bool progress; - if (!console_is_usable(con)) - continue; - any_usable = true; + /* + * console_flush_one_record() is only responsible for + * nbcon consoles when the nbcon consoles cannot print via + * their atomic or threaded flushing. + */ + if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) + continue; + if (!console_is_usable(con, flags, !do_cond_resched)) + continue; + any_usable = true; + + if (flags & CON_NBCON) { + progress = nbcon_legacy_emit_next_record(con, handover, cookie, + !do_cond_resched); + printk_seq = nbcon_seq_read(con); + } else { progress = console_emit_next_record(con, handover, cookie); + printk_seq = con->seq; + } - /* - * If a handover has occurred, the SRCU read lock - * is already released. - */ - if (*handover) - return false; + /* + * If a handover has occurred, the SRCU read lock + * is already released. + */ + if (*handover) + goto fail; - /* Track the next of the highest seq flushed. */ - if (con->seq > *next_seq) - *next_seq = con->seq; + /* Track the next of the highest seq flushed. */ + if (printk_seq > *next_seq) + *next_seq = printk_seq; - if (!progress) - continue; - any_progress = true; + if (!progress) + continue; - /* Allow panic_cpu to take over the consoles safely. */ - if (other_cpu_in_panic()) - goto abandon; + /* + * An usable console made a progress. There might still be + * pending messages. + */ + *try_again = true; - if (do_cond_resched) - cond_resched(); - } - console_srcu_read_unlock(cookie); - } while (any_progress); + /* Allow panic_cpu to take over the consoles safely. */ + if (panic_on_other_cpu()) + goto fail_srcu; + + if (do_cond_resched) + cond_resched(); + } + console_srcu_read_unlock(cookie); return any_usable; -abandon: +fail_srcu: console_srcu_read_unlock(cookie); +fail: + *try_again = false; return false; } -/** - * console_unlock - unblock the console subsystem from printing +/* + * Print out all remaining records to all consoles. * - * Releases the console_lock which the caller holds to block printing of - * the console subsystem. + * @do_cond_resched is set by the caller. It can be true only in schedulable + * context. * - * While the console_lock was held, console output may have been buffered - * by printk(). If this is the case, console_unlock(); emits - * the output prior to releasing the lock. + * @next_seq is set to the sequence number after the last available record. + * The value is valid only when this function returns true. It means that all + * usable consoles are completely flushed. * - * console_unlock(); may be called from any context. + * @handover will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding the + * console_lock. Otherwise it is set to false. + * + * Returns true when there was at least one usable console and all messages + * were flushed to all usable consoles. A returned false informs the caller + * that everything was not flushed (either there were no usable consoles or + * another context has taken over printing or it is a panic situation and this + * is not the panic CPU). Regardless the reason, the caller should assume it + * is not useful to immediately try again. + * + * Requires the console_lock. */ -void console_unlock(void) +static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) +{ + bool try_again; + bool ret; + + *next_seq = 0; + *handover = false; + + do { + ret = console_flush_one_record(do_cond_resched, next_seq, + handover, &try_again); + } while (try_again); + + return ret; +} + +static void __console_flush_and_unlock(void) { bool do_cond_resched; bool handover; @@ -3066,6 +3337,29 @@ void console_unlock(void) */ } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); } + +/** + * console_unlock - unblock the legacy console subsystem from printing + * + * Releases the console_lock which the caller holds to block printing of + * the legacy console subsystem. + * + * While the console_lock was held, console output may have been buffered + * by printk(). If this is the case, console_unlock() emits the output on + * legacy consoles prior to releasing the lock. + * + * console_unlock(); may be called from any context. + */ +void console_unlock(void) +{ + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (ft.legacy_direct) + __console_flush_and_unlock(); + else + __console_unlock(); +} EXPORT_SYMBOL(console_unlock); /** @@ -3098,7 +3392,10 @@ void console_unblank(void) */ cookie = console_srcu_read_lock(); for_each_console_srcu(c) { - if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) { + if (!console_is_usable(c, console_srcu_read_flags(c), true)) + continue; + + if (c->unblank) { found_unblank = true; break; } @@ -3135,7 +3432,10 @@ void console_unblank(void) cookie = console_srcu_read_lock(); for_each_console_srcu(c) { - if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) + if (!console_is_usable(c, console_srcu_read_flags(c), true)) + continue; + + if (c->unblank) c->unblank(); } console_srcu_read_unlock(cookie); @@ -3188,6 +3488,7 @@ static void __console_rewind_all(void) */ void console_flush_on_panic(enum con_flush_mode mode) { + struct console_flush_type ft; bool handover; u64 next_seq; @@ -3211,7 +3512,13 @@ void console_flush_on_panic(enum con_flush_mode mode) if (mode == CONSOLE_REPLAY_ALL) __console_rewind_all(); - console_flush_all(false, &next_seq, &handover); + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + + /* Flush legacy consoles once allowed, even when dangerous. */ + if (legacy_allow_panic_sync) + console_flush_all(false, &next_seq, &handover); } /* @@ -3246,10 +3553,10 @@ struct tty_driver *console_device(int *index) /* * Prevent further output on the passed console device so that (for example) - * serial drivers can disable console output before suspending a port, and can + * serial drivers can suspend console output before suspending a port, and can * re-enable output afterwards. */ -void console_stop(struct console *console) +void console_suspend(struct console *console) { __pr_flush(console, 1000, true); console_list_lock(); @@ -3264,16 +3571,254 @@ void console_stop(struct console *console) */ synchronize_srcu(&console_srcu); } -EXPORT_SYMBOL(console_stop); +EXPORT_SYMBOL(console_suspend); -void console_start(struct console *console) +void console_resume(struct console *console) { + struct console_flush_type ft; + bool is_nbcon; + console_list_lock(); console_srcu_write_flags(console, console->flags | CON_ENABLED); + is_nbcon = console->flags & CON_NBCON; console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. The related + * printing context must be able to see it is enabled so that + * it is guaranteed to wake up and resume printing. + */ + synchronize_srcu(&console_srcu); + + printk_get_console_flush_type(&ft); + if (is_nbcon && ft.nbcon_offload) + nbcon_kthread_wake(console); + else if (ft.legacy_offload) + defer_console_output(); + __pr_flush(console, 1000, true); } -EXPORT_SYMBOL(console_start); +EXPORT_SYMBOL(console_resume); + +#ifdef CONFIG_PRINTK +static int unregister_console_locked(struct console *console); + +/* True when system boot is far enough to create printer threads. */ +bool printk_kthreads_ready __ro_after_init; + +static struct task_struct *printk_legacy_kthread; + +static bool legacy_kthread_should_wakeup(void) +{ + struct console_flush_type ft; + struct console *con; + bool ret = false; + int cookie; + + if (kthread_should_stop()) + return true; + + printk_get_console_flush_type(&ft); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + u64 printk_seq; + + /* + * The legacy printer thread is only responsible for nbcon + * consoles when the nbcon consoles cannot print via their + * atomic or threaded flushing. + */ + if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) + continue; + + if (!console_is_usable(con, flags, false)) + continue; + + if (flags & CON_NBCON) { + printk_seq = nbcon_seq_read(con); + } else { + /* + * It is safe to read @seq because only this + * thread context updates @seq. + */ + printk_seq = con->seq; + } + + if (prb_read_valid(prb, printk_seq, NULL)) { + ret = true; + break; + } + } + console_srcu_read_unlock(cookie); + + return ret; +} + +static int legacy_kthread_func(void *unused) +{ + bool try_again; + +wait_for_event: + wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup()); + + do { + bool handover = false; + u64 next_seq = 0; + + if (kthread_should_stop()) + return 0; + + console_lock(); + console_flush_one_record(true, &next_seq, &handover, &try_again); + if (!handover) + __console_unlock(); + + } while (try_again); + + goto wait_for_event; +} + +static bool legacy_kthread_create(void) +{ + struct task_struct *kt; + + lockdep_assert_console_list_lock_held(); + + kt = kthread_run(legacy_kthread_func, NULL, "pr/legacy"); + if (WARN_ON(IS_ERR(kt))) { + pr_err("failed to start legacy printing thread\n"); + return false; + } + + printk_legacy_kthread = kt; + + /* + * It is important that console printing threads are scheduled + * shortly after a printk call and with generous runtime budgets. + */ + sched_set_normal(printk_legacy_kthread, -20); + + return true; +} + +/** + * printk_kthreads_shutdown - shutdown all threaded printers + * @data: syscore context + * + * On system shutdown all threaded printers are stopped. This allows printk + * to transition back to atomic printing, thus providing a robust mechanism + * for the final shutdown/reboot messages to be output. + */ +static void printk_kthreads_shutdown(void *data) +{ + struct console *con; + + console_list_lock(); + if (printk_kthreads_running) { + printk_kthreads_running = false; + + for_each_console(con) { + if (con->flags & CON_NBCON) + nbcon_kthread_stop(con); + } + + /* + * The threads may have been stopped while printing a + * backlog. Flush any records left over. + */ + nbcon_atomic_flush_pending(); + } + console_list_unlock(); +} + +static const struct syscore_ops printk_syscore_ops = { + .shutdown = printk_kthreads_shutdown, +}; + +static struct syscore printk_syscore = { + .ops = &printk_syscore_ops, +}; + +/* + * If appropriate, start nbcon kthreads and set @printk_kthreads_running. + * If any kthreads fail to start, those consoles are unregistered. + * + * Must be called under console_list_lock(). + */ +static void printk_kthreads_check_locked(void) +{ + struct hlist_node *tmp; + struct console *con; + + lockdep_assert_console_list_lock_held(); + + if (!printk_kthreads_ready) + return; + + /* Start or stop the legacy kthread when needed. */ + if (have_legacy_console || have_boot_console) { + if (!printk_legacy_kthread && + force_legacy_kthread() && + !legacy_kthread_create()) { + /* + * All legacy consoles must be unregistered. If there + * are any nbcon consoles, they will set up their own + * kthread. + */ + hlist_for_each_entry_safe(con, tmp, &console_list, node) { + if (con->flags & CON_NBCON) + continue; + + unregister_console_locked(con); + } + } + } else if (printk_legacy_kthread) { + kthread_stop(printk_legacy_kthread); + printk_legacy_kthread = NULL; + } + + /* + * Printer threads cannot be started as long as any boot console is + * registered because there is no way to synchronize the hardware + * registers between boot console code and regular console code. + * It can only be known that there will be no new boot consoles when + * an nbcon console is registered. + */ + if (have_boot_console || !have_nbcon_console) { + /* Clear flag in case all nbcon consoles unregistered. */ + printk_kthreads_running = false; + return; + } + + if (printk_kthreads_running) + return; + + hlist_for_each_entry_safe(con, tmp, &console_list, node) { + if (!(con->flags & CON_NBCON)) + continue; + + if (!nbcon_kthread_create(con)) + unregister_console_locked(con); + } + + printk_kthreads_running = true; +} + +static int __init printk_set_kthreads_ready(void) +{ + register_syscore(&printk_syscore); + + console_list_lock(); + printk_kthreads_ready = true; + printk_kthreads_check_locked(); + console_list_unlock(); + + return 0; +} +early_initcall(printk_set_kthreads_ready); +#endif /* CONFIG_PRINTK */ static int __read_mostly keep_bootcon; @@ -3318,8 +3863,11 @@ static int try_enable_preferred_console(struct console *newcon, int i, err; for (i = 0, c = console_cmdline; - i < MAX_CMDLINECONSOLES && c->name[0]; + i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); i++, c++) { + /* Console not yet initialized? */ + if (!c->name[0]) + continue; if (c->user_specified != user_specified) continue; if (!newcon->match || @@ -3373,19 +3921,21 @@ static void try_enable_default_console(struct console *newcon) newcon->flags |= CON_CONSDEV; } -static void console_init_seq(struct console *newcon, bool bootcon_registered) +/* Return the starting sequence number for a newly registered console. */ +static u64 get_init_console_seq(struct console *newcon, bool bootcon_registered) { struct console *con; bool handover; + u64 init_seq; if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) { /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); - newcon->seq = syslog_seq; + init_seq = syslog_seq; mutex_unlock(&syslog_lock); } else { /* Begin with next message added to ringbuffer. */ - newcon->seq = prb_next_seq(prb); + init_seq = prb_next_seq(prb); /* * If any enabled boot consoles are due to be unregistered @@ -3406,7 +3956,7 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) * Flush all consoles and set the console to start at * the next unprinted sequence number. */ - if (!console_flush_all(true, &newcon->seq, &handover)) { + if (!console_flush_all(true, &init_seq, &handover)) { /* * Flushing failed. Just choose the lowest * sequence of the enabled boot consoles. @@ -3419,19 +3969,30 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) if (handover) console_lock(); - newcon->seq = prb_next_seq(prb); + init_seq = prb_next_seq(prb); for_each_console(con) { - if ((con->flags & CON_BOOT) && - (con->flags & CON_ENABLED) && - con->seq < newcon->seq) { - newcon->seq = con->seq; + u64 seq; + + if (!(con->flags & CON_BOOT) || + !(con->flags & CON_ENABLED)) { + continue; } + + if (con->flags & CON_NBCON) + seq = nbcon_seq_read(con); + else + seq = con->seq; + + if (seq < init_seq) + init_seq = seq; } } console_unlock(); } } + + return init_seq; } #define console_first() \ @@ -3460,9 +4021,12 @@ static int unregister_console_locked(struct console *console); */ void register_console(struct console *newcon) { - struct console *con; + bool use_device_lock = (newcon->flags & CON_NBCON) && newcon->write_atomic; bool bootcon_registered = false; bool realcon_registered = false; + struct console *con; + unsigned long flags; + u64 init_seq; int err; console_list_lock(); @@ -3540,10 +4104,31 @@ void register_console(struct console *newcon) } newcon->dropped = 0; - console_init_seq(newcon, bootcon_registered); + init_seq = get_init_console_seq(newcon, bootcon_registered); + + if (newcon->flags & CON_NBCON) { + have_nbcon_console = true; + nbcon_seq_force(newcon, init_seq); + } else { + have_legacy_console = true; + newcon->seq = init_seq; + } + + if (newcon->flags & CON_BOOT) + have_boot_console = true; - if (newcon->flags & CON_NBCON) - nbcon_init(newcon); + /* + * If another context is actively using the hardware of this new + * console, it will not be aware of the nbcon synchronization. This + * is a risk that two contexts could access the hardware + * simultaneously if this new console is used for atomic printing + * and the other context is still using the hardware. + * + * Use the driver synchronization to ensure that the hardware is not + * in use while this new console transitions to being registered. + */ + if (use_device_lock) + newcon->device_lock(newcon, &flags); /* * Put this console in the list - keep the @@ -3569,6 +4154,10 @@ void register_console(struct console *newcon) * register_console() completes. */ + /* This new console is now registered. */ + if (use_device_lock) + newcon->device_unlock(newcon, flags); + console_sysfs_notify(); /* @@ -3589,6 +4178,9 @@ void register_console(struct console *newcon) unregister_console_locked(con); } } + + /* Changed console list, may require printer threads to start/stop. */ + printk_kthreads_check_locked(); unlock: console_list_unlock(); } @@ -3597,6 +4189,12 @@ EXPORT_SYMBOL(register_console); /* Must be called under console_list_lock(). */ static int unregister_console_locked(struct console *console) { + bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic; + bool found_legacy_con = false; + bool found_nbcon_con = false; + bool found_boot_con = false; + unsigned long flags; + struct console *c; int res; lockdep_assert_console_list_lock_held(); @@ -3609,14 +4207,29 @@ static int unregister_console_locked(struct console *console) if (res > 0) return 0; + if (!console_is_registered_locked(console)) + res = -ENODEV; + else if (console_is_usable(console, console->flags, true)) + __pr_flush(console, 1000, true); + /* Disable it unconditionally */ console_srcu_write_flags(console, console->flags & ~CON_ENABLED); - if (!console_is_registered_locked(console)) - return -ENODEV; + if (res < 0) + return res; + + /* + * Use the driver synchronization to ensure that the hardware is not + * in use while this console transitions to being unregistered. + */ + if (use_device_lock) + console->device_lock(console, &flags); hlist_del_init_rcu(&console->node); + if (use_device_lock) + console->device_unlock(console, flags); + /* * <HISTORICAL> * If this isn't the last console and it has CON_CONSDEV set, we @@ -3636,6 +4249,27 @@ static int unregister_console_locked(struct console *console) */ synchronize_srcu(&console_srcu); + /* + * With this console gone, the global flags tracking registered + * console types may have changed. Update them. + */ + for_each_console(c) { + if (c->flags & CON_BOOT) + found_boot_con = true; + + if (c->flags & CON_NBCON) + found_nbcon_con = true; + else + found_legacy_con = true; + } + if (!found_boot_con) + have_boot_console = found_boot_con; + if (!found_legacy_con) + have_legacy_console = found_legacy_con; + if (!found_nbcon_con) + have_nbcon_console = found_nbcon_con; + + /* @have_nbcon_console must be updated before calling nbcon_free(). */ if (console->flags & CON_NBCON) nbcon_free(console); @@ -3644,6 +4278,9 @@ static int unregister_console_locked(struct console *console) if (console->exit) res = console->exit(console); + /* Changed console list, may require printer threads to start/stop. */ + printk_kthreads_check_locked(); + return res; } @@ -3712,6 +4349,11 @@ void __init console_init(void) initcall_t call; initcall_entry_t *ce; +#ifdef CONFIG_NULL_TTY_DEFAULT_CONSOLE + if (!console_set_on_cmdline) + add_preferred_console("ttynull", 0, NULL); +#endif + /* Setup the default TTY line discipline. */ n_tty_init(); @@ -3790,6 +4432,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre { unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms); unsigned long remaining_jiffies = timeout_jiffies; + struct console_flush_type ft; struct console *c; u64 last_diff = 0; u64 printk_seq; @@ -3798,13 +4441,22 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre u64 diff; u64 seq; + /* Sorry, pr_flush() will not work this early. */ + if (system_state < SYSTEM_SCHEDULING) + return false; + might_sleep(); seq = prb_next_reserve_seq(prb); /* Flush the consoles so that records up to @seq are printed. */ - console_lock(); - console_unlock(); + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.legacy_direct) { + console_lock(); + console_unlock(); + } for (;;) { unsigned long begin_jiffies; @@ -3817,6 +4469,12 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * console->seq. Releasing console_lock flushes more * records in case @seq is still not printed on all * usable consoles. + * + * Holding the console_lock is not necessary if there + * are no legacy or boot consoles. However, such a + * console could register at any time. Always hold the + * console_lock as a precaution rather than + * synchronizing against register_console(). */ console_lock(); @@ -3832,8 +4490,10 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * that they make forward progress, so only increment * @diff for usable consoles. */ - if (!console_is_usable(c)) + if (!console_is_usable(c, flags, true) && + !console_is_usable(c, flags, false)) { continue; + } if (flags & CON_NBCON) { printk_seq = nbcon_seq_read(c); @@ -3883,7 +4543,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * Context: Process context. May sleep while acquiring console lock. * Return: true if all usable printers are caught up. */ -static bool pr_flush(int timeout_ms, bool reset_on_progress) +bool pr_flush(int timeout_ms, bool reset_on_progress) { return __pr_flush(NULL, timeout_ms, reset_on_progress); } @@ -3901,9 +4561,13 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) int pending = this_cpu_xchg(printk_pending, 0); if (pending & PRINTK_PENDING_OUTPUT) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); + if (force_legacy_kthread()) { + if (printk_legacy_kthread) + wake_up_interruptible(&legacy_wait); + } else { + if (console_trylock()) + console_unlock(); + } } if (pending & PRINTK_PENDING_WAKEUP) @@ -3918,6 +4582,13 @@ static void __wake_up_klogd(int val) if (!printk_percpu_data_ready()) return; + /* + * It is not allowed to call this function when console irq_work + * is blocked. + */ + if (WARN_ON_ONCE(console_irqwork_blocked)) + return; + preempt_disable(); /* * Guarantee any new records can be seen by tasks preparing to wait @@ -3974,9 +4645,30 @@ void defer_console_output(void) __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); } +/** + * printk_trigger_flush - Attempt to flush printk buffer to consoles. + * + * If possible, flush the printk buffer to all consoles in the caller's + * context. If offloading is available, trigger deferred printing. + * + * This is best effort. Depending on the system state, console states, + * and caller context, no actual flushing may result from this call. + */ void printk_trigger_flush(void) { - defer_console_output(); + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } + if (ft.legacy_offload) + defer_console_output(); } int vprintk_deferred(const char *fmt, va_list args) @@ -4111,16 +4803,21 @@ const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); /** - * kmsg_dump - dump kernel log to kernel message dumpers. + * kmsg_dump_desc - dump kernel log to kernel message dumpers. * @reason: the reason (oops, panic etc) for dumping + * @desc: a short string to describe what caused the panic or oops. Can be NULL + * if no additional description is available. * * Call each of the registered dumper's dump() callback, which can * retrieve the kmsg records with kmsg_dump_get_line() or * kmsg_dump_get_buffer(). */ -void kmsg_dump(enum kmsg_dump_reason reason) +void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc) { struct kmsg_dumper *dumper; + struct kmsg_dump_detail detail = { + .reason = reason, + .description = desc}; rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { @@ -4138,7 +4835,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) continue; /* invoke dumper which will iterate over records */ - dumper->dump(dumper, reason); + dumper->dump(dumper, &detail); } rcu_read_unlock(); } @@ -4299,18 +4996,27 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) EXPORT_SYMBOL_GPL(kmsg_dump_rewind); /** - * console_replay_all - replay kernel log on consoles + * console_try_replay_all - try to replay kernel log on consoles * * Try to obtain lock on console subsystem and replay all * available records in printk buffer on the consoles. * Does nothing if lock is not obtained. * - * Context: Any context. + * Context: Any, except for NMI. */ -void console_replay_all(void) +void console_try_replay_all(void) { + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); if (console_trylock()) { __console_rewind_all(); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + if (ft.legacy_offload) + defer_console_output(); /* Consoles are flushed as part of console_unlock(). */ console_unlock(); } @@ -4321,6 +5027,11 @@ void console_replay_all(void) static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1); static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0); +bool is_printk_cpu_sync_owner(void) +{ + return (atomic_read(&printk_cpu_sync_owner) == raw_smp_processor_id()); +} + /** * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant * spinning lock is not owned by any CPU. diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index 88e8f3a61922..56c8e3d031f4 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <kunit/visibility.h> #include <linux/kernel.h> #include <linux/irqflags.h> #include <linux/string.h> @@ -393,25 +394,38 @@ static unsigned int to_blk_size(unsigned int size) * Sanity checker for reserve size. The ringbuffer code assumes that a data * block does not exceed the maximum possible size that could fit within the * ringbuffer. This function provides that basic size check so that the - * assumption is safe. + * assumption is safe. In particular, it guarantees that data_push_tail() will + * never attempt to push the tail beyond the head. */ static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) { - struct prb_data_block *db = NULL; - + /* Data-less blocks take no space. */ if (size == 0) return true; /* - * Ensure the alignment padded size could possibly fit in the data - * array. The largest possible data block must still leave room for - * at least the ID of the next block. + * If data blocks were allowed to be larger than half the data ring + * size, a wrapping data block could require more space than the full + * ringbuffer. */ - size = to_blk_size(size); - if (size > DATA_SIZE(data_ring) - sizeof(db->id)) - return false; + return to_blk_size(size) <= DATA_SIZE(data_ring) / 2; +} - return true; +/* + * Compare the current and requested logical position and decide + * whether more space is needed. + * + * Return false when @lpos_current is already at or beyond @lpos_target. + * + * Also return false when the difference between the positions is bigger + * than the size of the data buffer. It might happen only when the caller + * raced with another CPU(s) which already made and used the space. + */ +static bool need_more_space(struct prb_data_ring *data_ring, + unsigned long lpos_current, + unsigned long lpos_target) +{ + return lpos_target - lpos_current - 1 < DATA_SIZE(data_ring); } /* Query the state of a descriptor. */ @@ -580,7 +594,7 @@ static bool data_make_reusable(struct printk_ringbuffer *rb, unsigned long id; /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */ - while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) { + while (need_more_space(data_ring, lpos_begin, lpos_end)) { blk = to_block(data_ring, lpos_begin); /* @@ -671,7 +685,7 @@ static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos) * sees the new tail lpos, any descriptor states that transitioned to * the reusable state must already be visible. */ - while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) { + while (need_more_space(data_ring, tail_lpos, lpos)) { /* * Make all descriptors reusable that are associated with * data blocks before @lpos. @@ -1002,6 +1016,17 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) return true; } +static bool is_blk_wrapped(struct prb_data_ring *data_ring, + unsigned long begin_lpos, unsigned long next_lpos) +{ + /* + * Subtract one from next_lpos since it's not actually part of this data + * block. This allows perfectly fitting records to not wrap. + */ + return DATA_WRAPS(data_ring, begin_lpos) != + DATA_WRAPS(data_ring, next_lpos - 1); +} + /* Determine the end of a data block. */ static unsigned long get_next_lpos(struct prb_data_ring *data_ring, unsigned long lpos, unsigned int size) @@ -1013,7 +1038,7 @@ static unsigned long get_next_lpos(struct prb_data_ring *data_ring, next_lpos = lpos + size; /* First check if the data block does not wrap. */ - if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos)) + if (!is_blk_wrapped(data_ring, begin_lpos, next_lpos)) return next_lpos; /* Wrapping data blocks store their data at the beginning. */ @@ -1051,8 +1076,17 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, do { next_lpos = get_next_lpos(data_ring, begin_lpos, size); - if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) { - /* Failed to allocate, specify a data-less block. */ + /* + * data_check_size() prevents data block allocation that could + * cause illegal ringbuffer states. But double check that the + * used space will not be bigger than the ring buffer. Wrapped + * messages need to reserve more space, see get_next_lpos(). + * + * Specify a data-less block when the check or the allocation + * fails. + */ + if (WARN_ON_ONCE(next_lpos - begin_lpos > DATA_SIZE(data_ring)) || + !data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) { blk_lpos->begin = FAILED_LPOS; blk_lpos->next = FAILED_LPOS; return NULL; @@ -1081,7 +1115,7 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, blk = to_block(data_ring, begin_lpos); blk->id = id; /* LMM(data_alloc:B) */ - if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) { + if (is_blk_wrapped(data_ring, begin_lpos, next_lpos)) { /* Wrapping data blocks store their data at the beginning. */ blk = to_block(data_ring, 0); @@ -1125,14 +1159,21 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, return NULL; /* Keep track if @blk_lpos was a wrapping data block. */ - wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next)); + wrapped = is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next); size = to_blk_size(size); next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size); - /* If the data block does not increase, there is nothing to do. */ - if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { + /* + * Use the current data block when the size does not increase, i.e. + * when @head_lpos is already able to accommodate the new @next_lpos. + * + * Note that need_more_space() could never return false here because + * the difference between the positions was bigger than the data + * buffer size. The data block is reopened and can't get reused. + */ + if (!need_more_space(data_ring, head_lpos, next_lpos)) { if (wrapped) blk = to_block(data_ring, 0); else @@ -1140,8 +1181,18 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, return &blk->data[0]; } - if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) + /* + * data_check_size() prevents data block reallocation that could + * cause illegal ringbuffer states. But double check that the + * new used space will not be bigger than the ring buffer. Wrapped + * messages need to reserve more space, see get_next_lpos(). + * + * Specify failure when the check or the allocation fails. + */ + if (WARN_ON_ONCE(next_lpos - blk_lpos->begin > DATA_SIZE(data_ring)) || + !data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) { return NULL; + } /* The memory barrier involvement is the same as data_alloc:A. */ if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos, @@ -1151,7 +1202,7 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, blk = to_block(data_ring, blk_lpos->begin); - if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) { + if (is_blk_wrapped(data_ring, blk_lpos->begin, next_lpos)) { struct prb_data_block *old_blk = blk; /* Wrapping data blocks store their data at the beginning. */ @@ -1187,7 +1238,7 @@ static unsigned int space_used(struct prb_data_ring *data_ring, if (BLK_DATALESS(blk_lpos)) return 0; - if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { + if (!is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next)) { /* Data block does not wrap. */ return (DATA_INDEX(data_ring, blk_lpos->next) - DATA_INDEX(data_ring, blk_lpos->begin)); @@ -1233,15 +1284,15 @@ static const char *get_data(struct prb_data_ring *data_ring, return NULL; } - /* Regular data block: @begin less than @next and in same wrap. */ - if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && - blk_lpos->begin < blk_lpos->next) { + /* Regular data block: @begin and @next in the same wrap. */ + if (!is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next)) { db = to_block(data_ring, blk_lpos->begin); *data_size = blk_lpos->next - blk_lpos->begin; /* Wrapping data block: @begin is one wrap behind @next. */ - } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == - DATA_WRAPS(data_ring, blk_lpos->next)) { + } else if (!is_blk_wrapped(data_ring, + blk_lpos->begin + DATA_SIZE(data_ring), + blk_lpos->next)) { db = to_block(data_ring, 0); *data_size = DATA_INDEX(data_ring, blk_lpos->next); @@ -1251,6 +1302,10 @@ static const char *get_data(struct prb_data_ring *data_ring, return NULL; } + /* Sanity check. Data-less blocks were handled earlier. */ + if (WARN_ON_ONCE(!data_check_size(data_ring, *data_size) || !*data_size)) + return NULL; + /* A valid data block will always be aligned to the ID size. */ if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { @@ -1685,6 +1740,7 @@ fail: memset(r, 0, sizeof(*r)); return false; } +EXPORT_SYMBOL_IF_KUNIT(prb_reserve); /* Commit the data (possibly finalizing it) and restore interrupts. */ static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val) @@ -1759,6 +1815,7 @@ void prb_commit(struct prb_reserved_entry *e) if (head_id != e->id) desc_make_final(e->rb, e->id); } +EXPORT_SYMBOL_IF_KUNIT(prb_commit); /** * prb_final_commit() - Commit and finalize (previously reserved) data to @@ -2133,9 +2190,9 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, * there may be other finalized records beyond that * need to be printed for a panic situation. If this * is the panic CPU, skip this - * non-existent/non-finalized record unless it is - * at or beyond the head, in which case it is not - * possible to continue. + * non-existent/non-finalized record unless non-panic + * CPUs are still running and their debugging is + * explicitly enabled. * * Note that new messages printed on panic CPU are * finalized when we are here. The only exception @@ -2143,10 +2200,13 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, * But it would have the sequence number returned * by "prb_next_reserve_seq() - 1". */ - if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb))) + if (panic_on_this_cpu() && + (!debug_non_panic_cpus || legacy_allow_panic_sync) && + ((*seq + 1) < prb_next_reserve_seq(rb))) { (*seq)++; - else + } else { return false; + } } } @@ -2181,6 +2241,7 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, { return _prb_read_valid(rb, &seq, r, NULL); } +EXPORT_SYMBOL_IF_KUNIT(prb_read_valid); /** * prb_read_valid_info() - Non-blocking read of meta data for a requested @@ -2330,6 +2391,7 @@ void prb_init(struct printk_ringbuffer *rb, infos[0].seq = -(u64)_DESCS_COUNT(descbits); infos[_DESCS_COUNT(descbits) - 1].seq = 0; } +EXPORT_SYMBOL_IF_KUNIT(prb_init); /** * prb_record_text_space() - Query the full actual used ringbuffer space for diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 52626d0f1fa3..4ef81349d9fb 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -4,7 +4,10 @@ #define _KERNEL_PRINTK_RINGBUFFER_H #include <linux/atomic.h> +#include <linux/bits.h> #include <linux/dev_printk.h> +#include <linux/stddef.h> +#include <linux/types.h> /* * Meta information about each stored message. @@ -120,7 +123,7 @@ enum desc_state { #define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) #define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) -#define DESC_SV_BITS (sizeof(unsigned long) * 8) +#define DESC_SV_BITS BITS_PER_LONG #define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2) #define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT) #define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT)) @@ -401,10 +404,12 @@ u64 prb_next_reserve_seq(struct printk_ringbuffer *rb); #define __u64seq_to_ulseq(u64seq) (u64seq) #define __ulseq_to_u64seq(rb, ulseq) (ulseq) +#define ULSEQ_MAX(rb) (-1) #else /* CONFIG_64BIT */ #define __u64seq_to_ulseq(u64seq) ((u32)u64seq) +#define ULSEQ_MAX(rb) __u64seq_to_ulseq(prb_first_seq(rb) + 0x80000000UL) static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) { diff --git a/kernel/printk/printk_ringbuffer_kunit_test.c b/kernel/printk/printk_ringbuffer_kunit_test.c new file mode 100644 index 000000000000..2282348e869a --- /dev/null +++ b/kernel/printk/printk_ringbuffer_kunit_test.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/cpuhplock.h> +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/wait.h> + +#include <kunit/resource.h> +#include <kunit/test.h> + +#include "printk_ringbuffer.h" + +/* + * This KUnit tests the data integrity of the lockless printk_ringbuffer. + * From multiple CPUs it writes messages of varying length and content while + * a reader validates the correctness of the messages. + * + * IMPORTANT: The more CPUs you can use for this KUnit, the better! + * + * The test works by starting "num_online_cpus() - 1" writer threads, each + * pinned to their own CPU. Each writer thread loops, writing data of varying + * length into a printk_ringbuffer as fast as possible. The data content is + * an embedded data struct followed by string content repeating the byte: + * + * 'A' + CPUID + * + * The reader is running on the remaining online CPU, or if there is only one + * CPU on the same as the writer. + * It ensures that the embedded struct content is consistent with the string + * and that the string * is terminated and is composed of the same repeating + * byte as its first byte. + * + * Because the threads are running in such tight loops, they will call + * cond_resched() from time to time so the system stays functional. + * + * If the reader encounters an error, the test is aborted and some + * information about the error is reported. + * The runtime of the test can be configured with the runtime_ms module parameter. + * + * Note that the test is performed on a separate printk_ringbuffer instance + * and not the instance used by printk(). + */ + +static unsigned long runtime_ms = 10 * MSEC_PER_SEC; +module_param(runtime_ms, ulong, 0400); + +/* test data structure */ +struct prbtest_rbdata { + unsigned int size; + char text[] __counted_by(size); +}; + +#define MAX_RBDATA_TEXT_SIZE 0x80 +#define MAX_PRB_RECORD_SIZE (sizeof(struct prbtest_rbdata) + MAX_RBDATA_TEXT_SIZE) + +struct prbtest_data { + struct kunit *test; + struct printk_ringbuffer *ringbuffer; + /* used by writers to signal reader of new records */ + wait_queue_head_t new_record_wait; +}; + +struct prbtest_thread_data { + unsigned long num; + struct prbtest_data *test_data; +}; + +static void prbtest_fail_record(struct kunit *test, const struct prbtest_rbdata *dat, u64 seq) +{ + unsigned int len; + + len = dat->size - 1; + + KUNIT_FAIL(test, "BAD RECORD: seq=%llu size=%u text=%.*s\n", + seq, dat->size, + len < MAX_RBDATA_TEXT_SIZE ? len : -1, + len < MAX_RBDATA_TEXT_SIZE ? dat->text : "<invalid>"); +} + +static bool prbtest_check_data(const struct prbtest_rbdata *dat) +{ + unsigned int len; + + /* Sane size? At least one character + trailing '\0' */ + if (dat->size < 2 || dat->size > MAX_RBDATA_TEXT_SIZE) + return false; + + len = dat->size - 1; + if (dat->text[len] != '\0') + return false; + + /* String repeats with the same character? */ + while (len--) { + if (dat->text[len] != dat->text[0]) + return false; + } + + return true; +} + +static int prbtest_writer(void *data) +{ + struct prbtest_thread_data *tr = data; + char text_id = 'A' + tr->num; + struct prb_reserved_entry e; + struct prbtest_rbdata *dat; + u32 record_size, text_size; + unsigned long count = 0; + struct printk_record r; + + kunit_info(tr->test_data->test, "start thread %03lu (writer)\n", tr->num); + + for (;;) { + /* ensure at least 1 character + trailing '\0' */ + text_size = get_random_u32_inclusive(2, MAX_RBDATA_TEXT_SIZE); + if (WARN_ON_ONCE(text_size < 2)) + text_size = 2; + if (WARN_ON_ONCE(text_size > MAX_RBDATA_TEXT_SIZE)) + text_size = MAX_RBDATA_TEXT_SIZE; + + record_size = sizeof(struct prbtest_rbdata) + text_size; + WARN_ON_ONCE(record_size > MAX_PRB_RECORD_SIZE); + + /* specify the text sizes for reservation */ + prb_rec_init_wr(&r, record_size); + + /* + * Reservation can fail if: + * + * - No free descriptor is available. + * - The buffer is full, and the oldest record is reserved + * but not yet committed. + * + * It actually happens in this test because all CPUs are trying + * to write an unbounded number of messages in a tight loop. + * These failures are intentionally ignored because this test + * focuses on races, ringbuffer consistency, and pushing system + * usability limits. + */ + if (prb_reserve(&e, tr->test_data->ringbuffer, &r)) { + r.info->text_len = record_size; + + dat = (struct prbtest_rbdata *)r.text_buf; + dat->size = text_size; + memset(dat->text, text_id, text_size - 1); + dat->text[text_size - 1] = '\0'; + + prb_commit(&e); + + wake_up_interruptible(&tr->test_data->new_record_wait); + } + + if ((count++ & 0x3fff) == 0) + cond_resched(); + + if (kthread_should_stop()) + break; + } + + kunit_info(tr->test_data->test, "end thread %03lu: wrote=%lu\n", tr->num, count); + + return 0; +} + +struct prbtest_wakeup_timer { + struct timer_list timer; + struct task_struct *task; +}; + +static void prbtest_wakeup_callback(struct timer_list *timer) +{ + struct prbtest_wakeup_timer *wakeup = timer_container_of(wakeup, timer, timer); + + set_tsk_thread_flag(wakeup->task, TIF_NOTIFY_SIGNAL); + wake_up_process(wakeup->task); +} + +static int prbtest_reader(struct prbtest_data *test_data, unsigned long timeout_ms) +{ + struct prbtest_wakeup_timer wakeup; + char text_buf[MAX_PRB_RECORD_SIZE]; + unsigned long count = 0; + struct printk_info info; + struct printk_record r; + u64 seq = 0; + + wakeup.task = current; + timer_setup_on_stack(&wakeup.timer, prbtest_wakeup_callback, 0); + mod_timer(&wakeup.timer, jiffies + msecs_to_jiffies(timeout_ms)); + + prb_rec_init_rd(&r, &info, text_buf, sizeof(text_buf)); + + kunit_info(test_data->test, "start reader\n"); + + while (!wait_event_interruptible(test_data->new_record_wait, + prb_read_valid(test_data->ringbuffer, seq, &r))) { + /* check/track the sequence */ + if (info.seq < seq) + KUNIT_FAIL(test_data->test, "BAD SEQ READ: request=%llu read=%llu\n", + seq, info.seq); + + if (!prbtest_check_data((struct prbtest_rbdata *)r.text_buf)) + prbtest_fail_record(test_data->test, + (struct prbtest_rbdata *)r.text_buf, info.seq); + + if ((count++ & 0x3fff) == 0) + cond_resched(); + + seq = info.seq + 1; + } + + timer_delete_sync(&wakeup.timer); + timer_destroy_on_stack(&wakeup.timer); + + kunit_info(test_data->test, "end reader: read=%lu seq=%llu\n", count, info.seq); + + return 0; +} + +KUNIT_DEFINE_ACTION_WRAPPER(prbtest_cpumask_cleanup, free_cpumask_var, struct cpumask *); +KUNIT_DEFINE_ACTION_WRAPPER(prbtest_kthread_cleanup, kthread_stop, struct task_struct *); + +static void prbtest_add_cpumask_cleanup(struct kunit *test, cpumask_var_t mask) +{ + int err; + + err = kunit_add_action_or_reset(test, prbtest_cpumask_cleanup, mask); + KUNIT_ASSERT_EQ(test, err, 0); +} + +static void prbtest_add_kthread_cleanup(struct kunit *test, struct task_struct *kthread) +{ + int err; + + err = kunit_add_action_or_reset(test, prbtest_kthread_cleanup, kthread); + KUNIT_ASSERT_EQ(test, err, 0); +} + +static inline void prbtest_prb_reinit(struct printk_ringbuffer *rb) +{ + prb_init(rb, rb->text_data_ring.data, rb->text_data_ring.size_bits, rb->desc_ring.descs, + rb->desc_ring.count_bits, rb->desc_ring.infos); +} + +static void test_readerwriter(struct kunit *test) +{ + /* Equivalent to CONFIG_LOG_BUF_SHIFT=13 */ + DEFINE_PRINTKRB(test_rb, 8, 5); + + struct prbtest_thread_data *thread_data; + struct prbtest_data *test_data; + struct task_struct *thread; + cpumask_var_t test_cpus; + int cpu, reader_cpu; + + KUNIT_ASSERT_TRUE(test, alloc_cpumask_var(&test_cpus, GFP_KERNEL)); + prbtest_add_cpumask_cleanup(test, test_cpus); + + cpus_read_lock(); + /* + * Failure of KUNIT_ASSERT() kills the current task + * so it can not be called while the CPU hotplug lock is held. + * Instead use a snapshot of the online CPUs. + * If they change during test execution it is unfortunate but not a grave error. + */ + cpumask_copy(test_cpus, cpu_online_mask); + cpus_read_unlock(); + + /* One CPU is for the reader, all others are writers */ + reader_cpu = cpumask_first(test_cpus); + if (cpumask_weight(test_cpus) == 1) + kunit_warn(test, "more than one CPU is recommended"); + else + cpumask_clear_cpu(reader_cpu, test_cpus); + + /* KUnit test can get restarted more times. */ + prbtest_prb_reinit(&test_rb); + + test_data = kunit_kmalloc(test, sizeof(*test_data), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, test_data); + test_data->test = test; + test_data->ringbuffer = &test_rb; + init_waitqueue_head(&test_data->new_record_wait); + + kunit_info(test, "running for %lu ms\n", runtime_ms); + + for_each_cpu(cpu, test_cpus) { + thread_data = kunit_kmalloc(test, sizeof(*thread_data), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, thread_data); + thread_data->test_data = test_data; + thread_data->num = cpu; + + thread = kthread_run_on_cpu(prbtest_writer, thread_data, cpu, + "prbtest writer %u"); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, thread); + prbtest_add_kthread_cleanup(test, thread); + } + + kunit_info(test, "starting test\n"); + + set_cpus_allowed_ptr(current, cpumask_of(reader_cpu)); + prbtest_reader(test_data, runtime_ms); + + kunit_info(test, "completed test\n"); +} + +static struct kunit_case prb_test_cases[] = { + KUNIT_CASE_SLOW(test_readerwriter), + {} +}; + +static struct kunit_suite prb_test_suite = { + .name = "printk-ringbuffer", + .test_cases = prb_test_cases, +}; +kunit_test_suite(prb_test_suite); + +MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); +MODULE_AUTHOR("John Ogness <john.ogness@linutronix.de>"); +MODULE_DESCRIPTION("printk_ringbuffer KUnit test"); +MODULE_LICENSE("GPL"); diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 6d10927a07d8..32a28f563b13 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -12,6 +12,24 @@ #include "internal.h" +/* Context where printk messages are never suppressed */ +static atomic_t force_con; + +void printk_force_console_enter(void) +{ + atomic_inc(&force_con); +} + +void printk_force_console_exit(void) +{ + atomic_dec(&force_con); +} + +bool is_printk_force_console(void) +{ + return atomic_read(&force_con); +} + static DEFINE_PER_CPU(int, printk_context); /* Can be preempted by NMI. */ @@ -26,6 +44,34 @@ void __printk_safe_exit(void) this_cpu_dec(printk_context); } +void __printk_deferred_enter(void) +{ + cant_migrate(); + __printk_safe_enter(); +} + +void __printk_deferred_exit(void) +{ + cant_migrate(); + __printk_safe_exit(); +} + +bool is_printk_legacy_deferred(void) +{ + /* + * The per-CPU variable @printk_context can be read safely in any + * context. CPU migration is always disabled when set. + * + * A context holding the printk_cpu_sync must not spin waiting for + * another CPU. For legacy printing, it could be the console_lock + * or the port lock. + */ + return (force_legacy_kthread() || + this_cpu_read(printk_context) || + in_nmi() || + is_printk_cpu_sync_owner()); +} + asmlinkage int vprintk(const char *fmt, va_list args) { #ifdef CONFIG_KGDB_KDB @@ -33,15 +79,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); #endif - - /* - * Use the main logbuf even in NMI. But avoid calling console - * drivers that might have their own locks. - */ - if (this_cpu_read(printk_context) || in_nmi()) - return vprintk_deferred(fmt, args); - - /* No obstacles. */ return vprintk_default(fmt, args); } EXPORT_SYMBOL(vprintk); diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c index 3e47dedce9e5..da77f3f5c1fe 100644 --- a/kernel/printk/sysctl.c +++ b/kernel/printk/sysctl.c @@ -11,7 +11,7 @@ static const int ten_thousand = 10000; -static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, +static int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (write && !capable(CAP_SYS_ADMIN)) @@ -20,7 +20,7 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } -static struct ctl_table printk_sysctls[] = { +static const struct ctl_table printk_sysctls[] = { { .procname = "printk", .data = &console_loglevel, |
