diff options
Diffstat (limited to 'kernel/printk')
-rw-r--r-- | kernel/printk/console_cmdline.h | 1 | ||||
-rw-r--r-- | kernel/printk/internal.h | 218 | ||||
-rw-r--r-- | kernel/printk/nbcon.c | 975 | ||||
-rw-r--r-- | kernel/printk/printk.c | 1126 | ||||
-rw-r--r-- | kernel/printk/printk_ringbuffer.c | 335 | ||||
-rw-r--r-- | kernel/printk/printk_ringbuffer.h | 61 | ||||
-rw-r--r-- | kernel/printk/printk_safe.c | 55 | ||||
-rw-r--r-- | kernel/printk/sysctl.c | 5 |
8 files changed, 2376 insertions, 400 deletions
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h index 3ca74ad391d6..0ab573b6d4dc 100644 --- a/kernel/printk/console_cmdline.h +++ b/kernel/printk/console_cmdline.h @@ -6,6 +6,7 @@ struct console_cmdline { char name[16]; /* Name of the driver */ int index; /* Minor dev. to use */ + char devname[32]; /* DEVNAME:0.0 style device name */ bool user_specified; /* Specified by command line vs. platform */ char *options; /* Options for the driver */ #ifdef CONFIG_A11Y_BRAILLE_CONSOLE diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 6c2afee5ef62..a91bdf802967 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -2,13 +2,14 @@ /* * internal.h - printk internal definitions */ -#include <linux/percpu.h> #include <linux/console.h> -#include "printk_ringbuffer.h" +#include <linux/percpu.h> +#include <linux/types.h> #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) +struct ctl_table; void __init printk_sysctl_init(void); -int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, +int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #else #define printk_sysctl_init() do { } while (0) @@ -20,6 +21,19 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, (con->flags & CON_BOOT) ? "boot" : "", \ con->name, con->index, ##__VA_ARGS__) +/* + * Identify if legacy printing is forced in a dedicated kthread. If + * true, all printing via console lock occurs within a dedicated + * legacy printer thread. The only exception is on panic, after the + * nbcon consoles have had their chance to print the panic messages + * first. + */ +#ifdef CONFIG_PREEMPT_RT +# define force_legacy_kthread() (true) +#else +# define force_legacy_kthread() (false) +#endif + #ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK_CALLER @@ -39,11 +53,17 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, /* Flags for a single printk record. */ enum printk_info_flags { + /* always show on console, ignore console_loglevel */ + LOG_FORCE_CON = 1, LOG_NEWLINE = 2, /* text ended with a newline */ LOG_CONT = 8, /* text is a fragment of a continuation line */ }; +struct printk_ringbuffer; +struct dev_printk_info; + extern struct printk_ringbuffer *prb; +extern bool printk_kthreads_running; __printf(4, 0) int vprintk_store(int facility, int level, @@ -53,6 +73,9 @@ int vprintk_store(int facility, int level, __printf(1, 0) int vprintk_default(const char *fmt, va_list args); __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); +void __printk_safe_enter(void); +void __printk_safe_exit(void); + bool printk_percpu_data_ready(void); #define printk_safe_enter_irqsave(flags) \ @@ -68,15 +91,86 @@ bool printk_percpu_data_ready(void); } while (0) void defer_console_output(void); +bool is_printk_legacy_deferred(void); +bool is_printk_force_console(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); +void console_lock_spinning_enable(void); +int console_lock_spinning_disable_and_check(int cookie); u64 nbcon_seq_read(struct console *con); void nbcon_seq_force(struct console *con, u64 seq); bool nbcon_alloc(struct console *con); -void nbcon_init(struct console *con); void nbcon_free(struct console *con); +enum nbcon_prio nbcon_get_default_prio(void); +void nbcon_atomic_flush_pending(void); +bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, + int cookie, bool use_atomic); +bool nbcon_kthread_create(struct console *con); +void nbcon_kthread_stop(struct console *con); +void nbcon_kthreads_wake(void); + +/* + * Check if the given console is currently capable and allowed to print + * records. Note that this function does not consider the current context, + * which can also play a role in deciding if @con can be used to print + * records. + */ +static inline bool console_is_usable(struct console *con, short flags, bool use_atomic) +{ + if (!(flags & CON_ENABLED)) + return false; + + if ((flags & CON_SUSPENDED)) + return false; + + if (flags & CON_NBCON) { + /* The write_atomic() callback is optional. */ + if (use_atomic && !con->write_atomic) + return false; + + /* + * For the !use_atomic case, @printk_kthreads_running is not + * checked because the write_thread() callback is also used + * via the legacy loop when the printer threads are not + * available. + */ + } else { + if (!con->write) + return false; + } + + /* + * Console drivers may assume that per-cpu resources have been + * allocated. So unless they're explicitly marked as being able to + * cope (CON_ANYTIME) don't call them until this CPU is officially up. + */ + if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) + return false; + + return true; +} + +/** + * nbcon_kthread_wake - Wake up a console printing thread + * @con: Console to operate on + */ +static inline void nbcon_kthread_wake(struct console *con) +{ + /* + * Guarantee any new records can be seen by tasks preparing to wait + * before this context checks if the rcuwait is empty. + * + * The full memory barrier in rcuwait_wake_up() pairs with the full + * memory barrier within set_current_state() of + * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait() + * adds the waiter but before it has checked the wait condition. + * + * This pairs with nbcon_kthread_func:A. + */ + rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */ +} #else @@ -84,6 +178,8 @@ void nbcon_free(struct console *con); #define PRINTK_MESSAGE_MAX 0 #define PRINTKRB_RECORD_MAX 0 +#define printk_kthreads_running (false) + /* * In !PRINTK builds we still export console_sem * semaphore and some of console functions (console_unlock()/etc.), so @@ -93,14 +189,119 @@ void nbcon_free(struct console *con); #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline void defer_console_output(void) { } +static inline bool is_printk_legacy_deferred(void) { return false; } static inline u64 nbcon_seq_read(struct console *con) { return 0; } static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } -static inline void nbcon_init(struct console *con) { } static inline void nbcon_free(struct console *con) { } +static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; } +static inline void nbcon_atomic_flush_pending(void) { } +static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, + int cookie, bool use_atomic) { return false; } +static inline void nbcon_kthread_wake(struct console *con) { } +static inline void nbcon_kthreads_wake(void) { } + +static inline bool console_is_usable(struct console *con, short flags, + bool use_atomic) { return false; } #endif /* CONFIG_PRINTK */ +extern bool have_boot_console; +extern bool have_nbcon_console; +extern bool have_legacy_console; +extern bool legacy_allow_panic_sync; + +/** + * struct console_flush_type - Define available console flush methods + * @nbcon_atomic: Flush directly using nbcon_atomic() callback + * @nbcon_offload: Offload flush to printer thread + * @legacy_direct: Call the legacy loop in this context + * @legacy_offload: Offload the legacy loop into IRQ or legacy thread + * + * Note that the legacy loop also flushes the nbcon consoles. + */ +struct console_flush_type { + bool nbcon_atomic; + bool nbcon_offload; + bool legacy_direct; + bool legacy_offload; +}; + +/* + * Identify which console flushing methods should be used in the context of + * the caller. + */ +static inline void printk_get_console_flush_type(struct console_flush_type *ft) +{ + memset(ft, 0, sizeof(*ft)); + + switch (nbcon_get_default_prio()) { + case NBCON_PRIO_NORMAL: + if (have_nbcon_console && !have_boot_console) { + if (printk_kthreads_running) + ft->nbcon_offload = true; + else + ft->nbcon_atomic = true; + } + + /* Legacy consoles are flushed directly when possible. */ + if (have_legacy_console || have_boot_console) { + if (!is_printk_legacy_deferred()) + ft->legacy_direct = true; + else + ft->legacy_offload = true; + } + break; + + case NBCON_PRIO_EMERGENCY: + if (have_nbcon_console && !have_boot_console) + ft->nbcon_atomic = true; + + /* Legacy consoles are flushed directly when possible. */ + if (have_legacy_console || have_boot_console) { + if (!is_printk_legacy_deferred()) + ft->legacy_direct = true; + else + ft->legacy_offload = true; + } + break; + + case NBCON_PRIO_PANIC: + /* + * In panic, the nbcon consoles will directly print. But + * only allowed if there are no boot consoles. + */ + if (have_nbcon_console && !have_boot_console) + ft->nbcon_atomic = true; + + if (have_legacy_console || have_boot_console) { + /* + * This is the same decision as NBCON_PRIO_NORMAL + * except that offloading never occurs in panic. + * + * Note that console_flush_on_panic() will flush + * legacy consoles anyway, even if unsafe. + */ + if (!is_printk_legacy_deferred()) + ft->legacy_direct = true; + + /* + * In panic, if nbcon atomic printing occurs, + * the legacy consoles must remain silent until + * explicitly allowed. + */ + if (ft->nbcon_atomic && !legacy_allow_panic_sync) + ft->legacy_direct = false; + } + break; + + default: + WARN_ON_ONCE(1); + break; + } +} + extern struct printk_buffers printk_shared_pbufs; /** @@ -135,4 +336,11 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq, #ifdef CONFIG_PRINTK void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped); +void console_prepend_replay(struct printk_message *pmsg); +#endif + +#ifdef CONFIG_SMP +bool is_printk_cpu_sync_owner(void); +#else +static inline bool is_printk_cpu_sync_owner(void) { return false; } #endif diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index b96077152f49..fd12efcc4aed 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -2,11 +2,25 @@ // Copyright (C) 2022 Linutronix GmbH, John Ogness // Copyright (C) 2022 Intel, Thomas Gleixner -#include <linux/kernel.h> +#include <linux/atomic.h> +#include <linux/bug.h> #include <linux/console.h> #include <linux/delay.h> +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/irqflags.h> +#include <linux/kthread.h> +#include <linux/minmax.h> +#include <linux/percpu.h> +#include <linux/preempt.h> #include <linux/slab.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/types.h> #include "internal.h" +#include "printk_ringbuffer.h" /* * Printk console printing implementation for consoles which does not depend * on the legacy style console_lock mechanism. @@ -140,39 +154,6 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } -#ifdef CONFIG_64BIT - -#define __seq_to_nbcon_seq(seq) (seq) -#define __nbcon_seq_to_seq(seq) (seq) - -#else /* CONFIG_64BIT */ - -#define __seq_to_nbcon_seq(seq) ((u32)seq) - -static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq) -{ - u64 seq; - u64 rb_next_seq; - - /* - * The provided sequence is only the lower 32 bits of the ringbuffer - * sequence. It needs to be expanded to 64bit. Get the next sequence - * number from the ringbuffer and fold it. - * - * Having a 32bit representation in the console is sufficient. - * If a console ever gets more than 2^31 records behind - * the ringbuffer then this is the least of the problems. - * - * Also the access to the ring buffer is always safe. - */ - rb_next_seq = prb_next_seq(prb); - seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq); - - return seq; -} - -#endif /* CONFIG_64BIT */ - /** * nbcon_seq_read - Read the current console sequence * @con: Console to read the sequence of @@ -183,7 +164,7 @@ u64 nbcon_seq_read(struct console *con) { unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); - return __nbcon_seq_to_seq(nbcon_seq); + return __ulseq_to_u64seq(prb, nbcon_seq); } /** @@ -204,10 +185,7 @@ void nbcon_seq_force(struct console *con, u64 seq) */ u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); - atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq)); - - /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ - con->seq = 0; + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); } /** @@ -223,11 +201,11 @@ void nbcon_seq_force(struct console *con, u64 seq) */ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) { - unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); + unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq); struct console *con = ctxt->console; if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, - __seq_to_nbcon_seq(new_seq))) { + __u64seq_to_ulseq(new_seq))) { ctxt->seq = new_seq; } else { ctxt->seq = nbcon_seq_read(con); @@ -264,6 +242,13 @@ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, struct nbcon_state new; do { + /* + * Panic does not imply that the console is owned. However, it + * is critical that non-panic CPUs during panic are unable to + * acquire ownership in order to satisfy the assumptions of + * nbcon_waiter_matches(). In particular, the assumption that + * lower priorities are ignored during panic. + */ if (other_cpu_in_panic()) return -EPERM; @@ -295,18 +280,29 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) /* * The request context is well defined by the @req_prio because: * - * - Only a context with a higher priority can take over the request. + * - Only a context with a priority higher than the owner can become + * a waiter. + * - Only a context with a priority higher than the waiter can + * directly take over the request. * - There are only three priorities. * - Only one CPU is allowed to request PANIC priority. * - Lower priorities are ignored during panic() until reboot. * * As a result, the following scenario is *not* possible: * - * 1. Another context with a higher priority directly takes ownership. - * 2. The higher priority context releases the ownership. - * 3. A lower priority context takes the ownership. - * 4. Another context with the same priority as this context + * 1. This context is currently a waiter. + * 2. Another context with a higher priority than this context + * directly takes ownership. + * 3. The higher priority context releases the ownership. + * 4. Another lower priority context takes the ownership. + * 5. Another context with the same priority as this context * creates a request and starts waiting. + * + * Event #1 implies this context is EMERGENCY. + * Event #2 implies the new context is PANIC. + * Event #3 occurs when panic() has flushed the console. + * Events #4 and #5 are not possible due to the other_cpu_in_panic() + * check in nbcon_context_try_acquire_direct(). */ return (cur->req_prio == expected_prio); @@ -564,6 +560,7 @@ static struct printk_buffers panic_nbcon_pbufs; * nbcon_context_try_acquire - Try to acquire nbcon console * @ctxt: The context of the caller * + * Context: Under @ctxt->con->device_lock() or local_irq_save(). * Return: True if the console was acquired. False otherwise. * * If the caller allowed an unsafe hostile takeover, on success the @@ -571,7 +568,6 @@ static struct printk_buffers panic_nbcon_pbufs; * in an unsafe state. Otherwise, on success the caller may assume * the console is not in an unsafe state. */ -__maybe_unused static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) { unsigned int cpu = smp_processor_id(); @@ -614,11 +610,29 @@ static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, int expected_prio) { /* - * Since consoles can only be acquired by higher priorities, - * owning contexts are uniquely identified by @prio. However, - * since contexts can unexpectedly lose ownership, it is - * possible that later another owner appears with the same - * priority. For this reason @cpu is also needed. + * A similar function, nbcon_waiter_matches(), only deals with + * EMERGENCY and PANIC priorities. However, this function must also + * deal with the NORMAL priority, which requires additional checks + * and constraints. + * + * For the case where preemption and interrupts are disabled, it is + * enough to also verify that the owning CPU has not changed. + * + * For the case where preemption or interrupts are enabled, an + * external synchronization method *must* be used. In particular, + * the driver-specific locking mechanism used in device_lock() + * (including disabling migration) should be used. It prevents + * scenarios such as: + * + * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X] and + * is scheduled out. + * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY + * and releases it. + * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X] + * and is scheduled out. + * 4. [Task A] gets running on [CPU X] and sees that the console is + * still owned by a task on [CPU X] with NBON_PRIO_NORMAL. Thus + * [Task A] thinks it is the owner when it is not. */ if (cur->prio != expected_prio) @@ -817,6 +831,19 @@ out: return nbcon_context_can_proceed(ctxt, &cur); } +static void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt, + char *buf, unsigned int len) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + struct nbcon_state cur; + + wctxt->outbuf = buf; + wctxt->len = len; + nbcon_state_read(con, &cur); + wctxt->unsafe_takeover = cur.unsafe_takeover; +} + /** * nbcon_enter_unsafe - Enter an unsafe region in the driver * @wctxt: The write context that was handed to the write function @@ -832,8 +859,12 @@ out: bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + bool is_owner; - return nbcon_context_enter_unsafe(ctxt); + is_owner = nbcon_context_enter_unsafe(ctxt); + if (!is_owner) + nbcon_write_context_set_buf(wctxt, NULL, 0); + return is_owner; } EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); @@ -852,14 +883,47 @@ EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + bool ret; - return nbcon_context_exit_unsafe(ctxt); + ret = nbcon_context_exit_unsafe(ctxt); + if (!ret) + nbcon_write_context_set_buf(wctxt, NULL, 0); + return ret; } EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); /** + * nbcon_reacquire_nobuf - Reacquire a console after losing ownership + * while printing + * @wctxt: The write context that was handed to the write callback + * + * Since ownership can be lost at any time due to handover or takeover, a + * printing context _must_ be prepared to back out immediately and + * carefully. However, there are scenarios where the printing context must + * reacquire ownership in order to finalize or revert hardware changes. + * + * This function allows a printing context to reacquire ownership using the + * same priority as its previous ownership. + * + * Note that after a successful reacquire the printing context will have no + * output buffer because that has been lost. This function cannot be used to + * resume printing. + */ +void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + while (!nbcon_context_try_acquire(ctxt)) + cpu_relax(); + + nbcon_write_context_set_buf(wctxt, NULL, 0); +} +EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf); + +/** * nbcon_emit_next_record - Emit a record in the acquired context * @wctxt: The write context that will be handed to the write function + * @use_atomic: True if the write_atomic() callback is to be used * * Return: True if this context still owns the console. False if * ownership was handed over or taken. @@ -873,8 +937,7 @@ EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); * When true is returned, @wctxt->ctxt.backlog indicates whether there are * still records pending in the ringbuffer, */ -__maybe_unused -static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) +static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; @@ -885,7 +948,22 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) unsigned long con_dropped; struct nbcon_state cur; unsigned long dropped; - bool done; + unsigned long ulseq; + + /* + * This function should never be called for consoles that have not + * implemented the necessary callback for writing: i.e. legacy + * consoles and, when atomic, nbcon consoles with no write_atomic(). + * Handle it as if ownership was lost and try to continue. + * + * Note that for nbcon consoles the write_thread() callback is + * mandatory and was already checked in nbcon_alloc(). + */ + if (WARN_ON_ONCE((use_atomic && !con->write_atomic) || + !(console_srcu_read_flags(con) & CON_NBCON))) { + nbcon_context_release(ctxt); + return false; + } /* * The printk buffers are filled within an unsafe section. This @@ -911,6 +989,29 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) if (dropped && !is_extended) console_prepend_dropped(&pmsg, dropped); + /* + * If the previous owner was assigned the same record, this context + * has taken over ownership and is replaying the record. Prepend a + * message to let the user know the record is replayed. + */ + ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq)); + if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) { + console_prepend_replay(&pmsg); + } else { + /* + * Ensure this context is still the owner before trying to + * update @nbcon_prev_seq. Otherwise the value in @ulseq may + * not be from the previous owner and instead be some later + * value from the context that took over ownership. + */ + nbcon_state_read(con, &cur); + if (!nbcon_context_can_proceed(ctxt, &cur)) + return false; + + atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq, + __u64seq_to_ulseq(pmsg.seq)); + } + if (!nbcon_context_exit_unsafe(ctxt)) return false; @@ -919,22 +1020,27 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) goto update_con; /* Initialize the write context for driver callbacks. */ - wctxt->outbuf = &pmsg.pbufs->outbuf[0]; - wctxt->len = pmsg.outbuf_len; - nbcon_state_read(con, &cur); - wctxt->unsafe_takeover = cur.unsafe_takeover; + nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len); - if (con->write_atomic) { - done = con->write_atomic(con, wctxt); - } else { + if (use_atomic) + con->write_atomic(con, wctxt); + else + con->write_thread(con, wctxt); + + if (!wctxt->outbuf) { + /* + * Ownership was lost and reacquired by the driver. Handle it + * as if ownership was lost. + */ nbcon_context_release(ctxt); - WARN_ON_ONCE(1); - done = false; + return false; } - /* If not done, the emit was aborted. */ - if (!done) - return false; + /* + * Ownership may have been lost but _not_ reacquired by the driver. + * This case is detected and handled when entering unsafe to update + * dropped/seq values. + */ /* * Since any dropped message was successfully output, reset the @@ -961,54 +1067,650 @@ update_con: return nbcon_context_exit_unsafe(ctxt); } +/* + * nbcon_emit_one - Print one record for an nbcon console using the + * specified callback + * @wctxt: An initialized write context struct to use for this context + * @use_atomic: True if the write_atomic() callback is to be used + * + * Return: True, when a record has been printed and there are still + * pending records. The caller might want to continue flushing. + * + * False, when there is no pending record, or when the console + * context cannot be acquired, or the ownership has been lost. + * The caller should give up. Either the job is done, cannot be + * done, or will be handled by the owning context. + * + * This is an internal helper to handle the locking of the console before + * calling nbcon_emit_next_record(). + */ +static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + unsigned long flags; + bool ret = false; + + if (!use_atomic) { + con->device_lock(con, &flags); + + /* + * Ensure this stays on the CPU to make handover and + * takeover possible. + */ + cant_migrate(); + } + + if (!nbcon_context_try_acquire(ctxt)) + goto out; + + /* + * nbcon_emit_next_record() returns false when the console was + * handed over or taken over. In both cases the context is no + * longer valid. + * + * The higher priority printing context takes over responsibility + * to print the pending records. + */ + if (!nbcon_emit_next_record(wctxt, use_atomic)) + goto out; + + nbcon_context_release(ctxt); + + ret = ctxt->backlog; +out: + if (!use_atomic) + con->device_unlock(con, flags); + return ret; +} + /** - * nbcon_alloc - Allocate buffers needed by the nbcon console - * @con: Console to allocate buffers for + * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup + * @con: Console to operate on + * @ctxt: The nbcon context from nbcon_context_try_acquire() * - * Return: True on success. False otherwise and the console cannot - * be used. + * Return: True if the thread should shutdown or if the console is + * allowed to print and a record is available. False otherwise. * - * This is not part of nbcon_init() because buffer allocation must - * be performed earlier in the console registration process. + * After the thread wakes up, it must first check if it should shutdown before + * attempting any printing. */ -bool nbcon_alloc(struct console *con) +static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt) { - if (con->flags & CON_BOOT) { + bool ret = false; + short flags; + int cookie; + + if (kthread_should_stop()) + return true; + + cookie = console_srcu_read_lock(); + + flags = console_srcu_read_flags(con); + if (console_is_usable(con, flags, false)) { + /* Bring the sequence in @ctxt up to date */ + ctxt->seq = nbcon_seq_read(con); + + ret = prb_read_valid(prb, ctxt->seq, NULL); + } + + console_srcu_read_unlock(cookie); + return ret; +} + +/** + * nbcon_kthread_func - The printer thread function + * @__console: Console to operate on + * + * Return: 0 + */ +static int nbcon_kthread_func(void *__console) +{ + struct console *con = __console; + struct nbcon_write_context wctxt = { + .ctxt.console = con, + .ctxt.prio = NBCON_PRIO_NORMAL, + }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); + short con_flags; + bool backlog; + int cookie; + +wait_for_event: + /* + * Guarantee this task is visible on the rcuwait before + * checking the wake condition. + * + * The full memory barrier within set_current_state() of + * ___rcuwait_wait_event() pairs with the full memory + * barrier within rcuwait_has_sleeper(). + * + * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A. + */ + rcuwait_wait_event(&con->rcuwait, + nbcon_kthread_should_wakeup(con, ctxt), + TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */ + + do { + if (kthread_should_stop()) + return 0; + + backlog = false; + /* - * Boot console printing is synchronized with legacy console - * printing, so boot consoles can share the same global printk - * buffers. + * Keep the srcu read lock around the entire operation so that + * synchronize_srcu() can guarantee that the kthread stopped + * or suspended printing. */ - con->pbufs = &printk_shared_pbufs; + cookie = console_srcu_read_lock(); + + con_flags = console_srcu_read_flags(con); + + if (console_is_usable(con, con_flags, false)) + backlog = nbcon_emit_one(&wctxt, false); + + console_srcu_read_unlock(cookie); + + cond_resched(); + + } while (backlog); + + goto wait_for_event; +} + +/** + * nbcon_irq_work - irq work to wake console printer thread + * @irq_work: The irq work to operate on + */ +static void nbcon_irq_work(struct irq_work *irq_work) +{ + struct console *con = container_of(irq_work, struct console, irq_work); + + nbcon_kthread_wake(con); +} + +static inline bool rcuwait_has_sleeper(struct rcuwait *w) +{ + /* + * Guarantee any new records can be seen by tasks preparing to wait + * before this context checks if the rcuwait is empty. + * + * This full memory barrier pairs with the full memory barrier within + * set_current_state() of ___rcuwait_wait_event(), which is called + * after prepare_to_rcuwait() adds the waiter but before it has + * checked the wait condition. + * + * This pairs with nbcon_kthread_func:A. + */ + smp_mb(); /* LMM(rcuwait_has_sleeper:A) */ + return rcuwait_active(w); +} + +/** + * nbcon_kthreads_wake - Wake up printing threads using irq_work + */ +void nbcon_kthreads_wake(void) +{ + struct console *con; + int cookie; + + if (!printk_kthreads_running) + return; + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + if (!(console_srcu_read_flags(con) & CON_NBCON)) + continue; + + /* + * Only schedule irq_work if the printing thread is + * actively waiting. If not waiting, the thread will + * notice by itself that it has work to do. + */ + if (rcuwait_has_sleeper(&con->rcuwait)) + irq_work_queue(&con->irq_work); + } + console_srcu_read_unlock(cookie); +} + +/* + * nbcon_kthread_stop - Stop a console printer thread + * @con: Console to operate on + */ +void nbcon_kthread_stop(struct console *con) +{ + lockdep_assert_console_list_lock_held(); + + if (!con->kthread) + return; + + kthread_stop(con->kthread); + con->kthread = NULL; +} + +/** + * nbcon_kthread_create - Create a console printer thread + * @con: Console to operate on + * + * Return: True if the kthread was started or already exists. + * Otherwise false and @con must not be registered. + * + * This function is called when it will be expected that nbcon consoles are + * flushed using the kthread. The messages printed with NBCON_PRIO_NORMAL + * will be no longer flushed by the legacy loop. This is why failure must + * be fatal for console registration. + * + * If @con was already registered and this function fails, @con must be + * unregistered before the global state variable @printk_kthreads_running + * can be set. + */ +bool nbcon_kthread_create(struct console *con) +{ + struct task_struct *kt; + + lockdep_assert_console_list_lock_held(); + + if (con->kthread) + return true; + + kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index); + if (WARN_ON(IS_ERR(kt))) { + con_printk(KERN_ERR, con, "failed to start printing thread\n"); + return false; + } + + con->kthread = kt; + + /* + * It is important that console printing threads are scheduled + * shortly after a printk call and with generous runtime budgets. + */ + sched_set_normal(con->kthread, -20); + + return true; +} + +/* Track the nbcon emergency nesting per CPU. */ +static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); +static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; + +/** + * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer + * + * Context: For reading, any context. For writing, any context which could + * not be migrated to another CPU. + * Return: Either a pointer to the per CPU emergency nesting counter of + * the current CPU or to the init data during early boot. + * + * The function is safe for reading per-CPU variables in any context because + * preemption is disabled if the current CPU is in the emergency state. See + * also nbcon_cpu_emergency_enter(). + */ +static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void) +{ + /* + * The value of __printk_percpu_data_ready gets set in normal + * context and before SMP initialization. As a result it could + * never change while inside an nbcon emergency section. + */ + if (!printk_percpu_data_ready()) + return &early_nbcon_pcpu_emergency_nesting; + + return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting); +} + +/** + * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon + * printing on the current CPU + * + * Context: Any context. + * Return: The nbcon_prio to use for acquiring an nbcon console in this + * context for printing. + * + * The function is safe for reading per-CPU data in any context because + * preemption is disabled if the current CPU is in the emergency or panic + * state. + */ +enum nbcon_prio nbcon_get_default_prio(void) +{ + unsigned int *cpu_emergency_nesting; + + if (this_cpu_in_panic()) + return NBCON_PRIO_PANIC; + + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + if (*cpu_emergency_nesting) + return NBCON_PRIO_EMERGENCY; + + return NBCON_PRIO_NORMAL; +} + +/** + * nbcon_legacy_emit_next_record - Print one record for an nbcon console + * in legacy contexts + * @con: The console to print on + * @handover: Will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding + * both the console_lock and the SRCU read lock. Otherwise it + * is set to false. + * @cookie: The cookie from the SRCU read lock. + * @use_atomic: Set true when called in an atomic or unknown context. + * It affects which nbcon callback will be used: write_atomic() + * or write_thread(). + * + * When false, the write_thread() callback is used and would be + * called in a preemtible context unless disabled by the + * device_lock. The legacy handover is not allowed in this mode. + * + * Context: Any context except NMI. + * Return: True, when a record has been printed and there are still + * pending records. The caller might want to continue flushing. + * + * False, when there is no pending record, or when the console + * context cannot be acquired, or the ownership has been lost. + * The caller should give up. Either the job is done, cannot be + * done, or will be handled by the owning context. + * + * This function is meant to be called by console_flush_all() to print records + * on nbcon consoles from legacy context (printing via console unlocking). + * Essentially it is the nbcon version of console_emit_next_record(). + */ +bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, + int cookie, bool use_atomic) +{ + struct nbcon_write_context wctxt = { }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); + unsigned long flags; + bool progress; + + ctxt->console = con; + ctxt->prio = nbcon_get_default_prio(); + + if (use_atomic) { + /* + * In an atomic or unknown context, use the same procedure as + * in console_emit_next_record(). It allows to handover. + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); + stop_critical_timings(); + } + + progress = nbcon_emit_one(&wctxt, use_atomic); + + if (use_atomic) { + start_critical_timings(); + *handover = console_lock_spinning_disable_and_check(cookie); + printk_safe_exit_irqrestore(flags); } else { - con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); - if (!con->pbufs) { - con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); - return false; + /* Non-atomic does not perform legacy spinning handovers. */ + *handover = false; + } + + return progress; +} + +/** + * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its + * write_atomic() callback + * @con: The nbcon console to flush + * @stop_seq: Flush up until this record + * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers + * + * Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on + * failure. + * + * Errors: + * + * -EPERM: Unable to acquire console ownership. + * + * -EAGAIN: Another context took over ownership while printing. + * + * -ENOENT: A record before @stop_seq is not available. + * + * If flushing up to @stop_seq was not successful, it only makes sense for the + * caller to try again when -EAGAIN was returned. When -EPERM is returned, + * this context is not allowed to acquire the console. When -ENOENT is + * returned, it cannot be expected that the unfinalized record will become + * available. + */ +static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, + bool allow_unsafe_takeover) +{ + struct nbcon_write_context wctxt = { }; + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); + int err = 0; + + ctxt->console = con; + ctxt->spinwait_max_us = 2000; + ctxt->prio = nbcon_get_default_prio(); + ctxt->allow_unsafe_takeover = allow_unsafe_takeover; + + if (!nbcon_context_try_acquire(ctxt)) + return -EPERM; + + while (nbcon_seq_read(con) < stop_seq) { + /* + * nbcon_emit_next_record() returns false when the console was + * handed over or taken over. In both cases the context is no + * longer valid. + */ + if (!nbcon_emit_next_record(&wctxt, true)) + return -EAGAIN; + + if (!ctxt->backlog) { + /* Are there reserved but not yet finalized records? */ + if (nbcon_seq_read(con) < stop_seq) + err = -ENOENT; + break; } } - return true; + nbcon_context_release(ctxt); + return err; +} + +/** + * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its + * write_atomic() callback + * @con: The nbcon console to flush + * @stop_seq: Flush up until this record + * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers + * + * This will stop flushing before @stop_seq if another context has ownership. + * That context is then responsible for the flushing. Likewise, if new records + * are added while this context was flushing and there is no other context + * to handle the printing, this context must also flush those records. + */ +static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, + bool allow_unsafe_takeover) +{ + struct console_flush_type ft; + unsigned long flags; + int err; + +again: + /* + * Atomic flushing does not use console driver synchronization (i.e. + * it does not hold the port lock for uart consoles). Therefore IRQs + * must be disabled to avoid being interrupted and then calling into + * a driver that will deadlock trying to acquire console ownership. + */ + local_irq_save(flags); + + err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); + + local_irq_restore(flags); + + /* + * If there was a new owner (-EPERM, -EAGAIN), that context is + * responsible for completing. + * + * Do not wait for records not yet finalized (-ENOENT) to avoid a + * possible deadlock. They will either get flushed by the writer or + * eventually skipped on panic CPU. + */ + if (err) + return; + + /* + * If flushing was successful but more records are available, this + * context must flush those remaining records if the printer thread + * is not available do it. + */ + printk_get_console_flush_type(&ft); + if (!ft.nbcon_offload && + prb_read_valid(prb, nbcon_seq_read(con), NULL)) { + stop_seq = prb_next_reserve_seq(prb); + goto again; + } } /** - * nbcon_init - Initialize the nbcon console specific data + * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their + * write_atomic() callback + * @stop_seq: Flush up until this record + * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers + */ +static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover) +{ + struct console *con; + int cookie; + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + + if (!(flags & CON_NBCON)) + continue; + + if (!console_is_usable(con, flags, true)) + continue; + + if (nbcon_seq_read(con) >= stop_seq) + continue; + + nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); + } + console_srcu_read_unlock(cookie); +} + +/** + * nbcon_atomic_flush_pending - Flush all nbcon consoles using their + * write_atomic() callback + * + * Flush the backlog up through the currently newest record. Any new + * records added while flushing will not be flushed if there is another + * context available to handle the flushing. This is to avoid one CPU + * printing unbounded because other CPUs continue to add records. + */ +void nbcon_atomic_flush_pending(void) +{ + __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false); +} + +/** + * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their + * write_atomic() callback and allowing unsafe hostile takeovers + * + * Flush the backlog up through the currently newest record. Unsafe hostile + * takeovers will be performed, if necessary. + */ +void nbcon_atomic_flush_unsafe(void) +{ + __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true); +} + +/** + * nbcon_cpu_emergency_enter - Enter an emergency section where printk() + * messages for that CPU are flushed directly + * + * Context: Any context. Disables preemption. + * + * When within an emergency section, printk() calls will attempt to flush any + * pending messages in the ringbuffer. + */ +void nbcon_cpu_emergency_enter(void) +{ + unsigned int *cpu_emergency_nesting; + + preempt_disable(); + + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + (*cpu_emergency_nesting)++; +} + +/** + * nbcon_cpu_emergency_exit - Exit an emergency section + * + * Context: Within an emergency section. Enables preemption. + */ +void nbcon_cpu_emergency_exit(void) +{ + unsigned int *cpu_emergency_nesting; + + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); + + if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0)) + (*cpu_emergency_nesting)--; + + preempt_enable(); +} + +/** + * nbcon_alloc - Allocate and init the nbcon console specific data * @con: Console to initialize * - * nbcon_alloc() *must* be called and succeed before this function - * is called. + * Return: True if the console was fully allocated and initialized. + * Otherwise @con must not be registered. * - * This function expects that the legacy @con->seq has been set. + * When allocation and init was successful, the console must be properly + * freed using nbcon_free() once it is no longer needed. */ -void nbcon_init(struct console *con) +bool nbcon_alloc(struct console *con) { struct nbcon_state state = { }; - /* nbcon_alloc() must have been called and successful! */ - BUG_ON(!con->pbufs); + /* The write_thread() callback is mandatory. */ + if (WARN_ON(!con->write_thread)) + return false; - nbcon_seq_force(con, con->seq); + rcuwait_init(&con->rcuwait); + init_irq_work(&con->irq_work, nbcon_irq_work); + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL); nbcon_state_set(con, &state); + + /* + * Initialize @nbcon_seq to the highest possible sequence number so + * that practically speaking it will have nothing to print until a + * desired initial sequence number has been set via nbcon_seq_force(). + */ + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), ULSEQ_MAX(prb)); + + if (con->flags & CON_BOOT) { + /* + * Boot console printing is synchronized with legacy console + * printing, so boot consoles can share the same global printk + * buffers. + */ + con->pbufs = &printk_shared_pbufs; + } else { + con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); + if (!con->pbufs) { + con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); + return false; + } + + if (printk_kthreads_running) { + if (!nbcon_kthread_create(con)) { + kfree(con->pbufs); + con->pbufs = NULL; + return false; + } + } + } + + return true; } /** @@ -1019,6 +1721,9 @@ void nbcon_free(struct console *con) { struct nbcon_state state = { }; + if (printk_kthreads_running) + nbcon_kthread_stop(con); + nbcon_state_set(con, &state); /* Boot consoles share global printk buffers. */ @@ -1027,3 +1732,85 @@ void nbcon_free(struct console *con) con->pbufs = NULL; } + +/** + * nbcon_device_try_acquire - Try to acquire nbcon console and enter unsafe + * section + * @con: The nbcon console to acquire + * + * Context: Under the locking mechanism implemented in + * @con->device_lock() including disabling migration. + * Return: True if the console was acquired. False otherwise. + * + * Console drivers will usually use their own internal synchronization + * mechasism to synchronize between console printing and non-printing + * activities (such as setting baud rates). However, nbcon console drivers + * supporting atomic consoles may also want to mark unsafe sections when + * performing non-printing activities in order to synchronize against their + * atomic_write() callback. + * + * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL + * and marks it unsafe for handover/takeover. + */ +bool nbcon_device_try_acquire(struct console *con) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); + + cant_migrate(); + + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->console = con; + ctxt->prio = NBCON_PRIO_NORMAL; + + if (!nbcon_context_try_acquire(ctxt)) + return false; + + if (!nbcon_context_enter_unsafe(ctxt)) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(nbcon_device_try_acquire); + +/** + * nbcon_device_release - Exit unsafe section and release the nbcon console + * @con: The nbcon console acquired in nbcon_device_try_acquire() + */ +void nbcon_device_release(struct console *con) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); + struct console_flush_type ft; + int cookie; + + if (!nbcon_context_exit_unsafe(ctxt)) + return; + + nbcon_context_release(ctxt); + + /* + * This context must flush any new records added while the console + * was locked if the printer thread is not available to do it. The + * console_srcu_read_lock must be taken to ensure the console is + * usable throughout flushing. + */ + cookie = console_srcu_read_lock(); + printk_get_console_flush_type(&ft); + if (console_is_usable(con, console_srcu_read_flags(con), true) && + !ft.nbcon_offload && + prb_read_valid(prb, nbcon_seq_read(con), NULL)) { + /* + * If nbcon_atomic flushing is not available, fallback to + * using the legacy loop. + */ + if (ft.nbcon_atomic) { + __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false); + } else if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } else if (ft.legacy_offload) { + printk_trigger_flush(); + } + } + console_srcu_read_unlock(cookie); +} +EXPORT_SYMBOL_GPL(nbcon_device_release); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f2444b581e16..07668433644b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -34,7 +34,8 @@ #include <linux/security.h> #include <linux/memblock.h> #include <linux/syscalls.h> -#include <linux/crash_core.h> +#include <linux/syscore_ops.h> +#include <linux/vmcore_info.h> #include <linux/ratelimit.h> #include <linux/kmsg_dump.h> #include <linux/syslog.h> @@ -178,9 +179,9 @@ static int __init control_devkmsg(char *str) * Set sysctl string accordingly: */ if (devkmsg_log == DEVKMSG_LOG_MASK_ON) - strcpy(devkmsg_log_str, "on"); + strscpy(devkmsg_log_str, "on"); else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) - strcpy(devkmsg_log_str, "off"); + strscpy(devkmsg_log_str, "off"); /* else "ratelimit" which is set by default. */ /* @@ -197,7 +198,7 @@ __setup("printk.devkmsg=", control_devkmsg); char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) -int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, +int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { char old_str[DEVKMSG_STR_MAX_SIZE]; @@ -209,7 +210,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, return -EINVAL; old = devkmsg_log; - strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE); + strscpy(old_str, devkmsg_log_str); } err = proc_dostring(table, write, buffer, lenp, ppos); @@ -227,7 +228,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, /* ... and restore old setting. */ devkmsg_log = old; - strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE); + strscpy(devkmsg_log_str, old_str); return -EINVAL; } @@ -282,6 +283,7 @@ EXPORT_SYMBOL(console_list_unlock); * Return: A cookie to pass to console_srcu_read_unlock(). */ int console_srcu_read_lock(void) + __acquires(&console_srcu) { return srcu_read_lock_nmisafe(&console_srcu); } @@ -295,6 +297,7 @@ EXPORT_SYMBOL(console_srcu_read_lock); * Counterpart to console_srcu_read_lock() */ void console_srcu_read_unlock(int cookie) + __releases(&console_srcu) { srcu_read_unlock_nmisafe(&console_srcu, cookie); } @@ -347,6 +350,29 @@ static bool panic_in_progress(void) return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); } +/* Return true if a panic is in progress on the current CPU. */ +bool this_cpu_in_panic(void) +{ + /* + * We can use raw_smp_processor_id() here because it is impossible for + * the task to be migrated to the panic_cpu, or away from it. If + * panic_cpu has already been set, and we're not currently executing on + * that CPU, then we never will be. + */ + return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); +} + +/* + * Return true if a panic is in progress on a remote CPU. + * + * On true, the local CPU should immediately release any printing resources + * that may be needed by the panic CPU. + */ +bool other_cpu_in_panic(void) +{ + return (panic_in_progress() && !this_cpu_in_panic()); +} + /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's @@ -438,20 +464,43 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); -#ifdef CONFIG_PRINTK /* - * During panic, heavy printk by other CPUs can delay the - * panic and risk deadlock on console resources. + * Specifies if a legacy console is registered. If legacy consoles are + * present, it is necessary to perform the console lock/unlock dance + * whenever console flushing should occur. + */ +bool have_legacy_console; + +/* + * Specifies if an nbcon console is registered. If nbcon consoles are present, + * synchronous printing of legacy consoles will not occur during panic until + * the backtrace has been stored to the ringbuffer. */ -static int __read_mostly suppress_panic_printk; +bool have_nbcon_console; +/* + * Specifies if a boot console is registered. If boot consoles are present, + * nbcon consoles cannot print simultaneously and must be synchronized by + * the console lock. This is because boot consoles and nbcon consoles may + * have mapped the same hardware. + */ +bool have_boot_console; + +/* See printk_legacy_allow_panic_sync() for details. */ +bool legacy_allow_panic_sync; + +#ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); +static DECLARE_WAIT_QUEUE_HEAD(legacy_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; static size_t syslog_partial; static bool syslog_time; +/* True when _all_ printer threads are available for printing. */ +bool printk_kthreads_running; + struct latched_seq { seqcount_latch_t latch; u64 val[2]; @@ -474,7 +523,7 @@ static struct latched_seq clear_seq = { /* record buffer */ #define LOG_ALIGN __alignof__(unsigned long) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) -#define LOG_BUF_LEN_MAX (u32)(1 << 31) +#define LOG_BUF_LEN_MAX ((u32)1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; @@ -511,10 +560,11 @@ bool printk_percpu_data_ready(void) /* Must be called under syslog_lock. */ static void latched_seq_write(struct latched_seq *ls, u64 val) { - raw_write_seqcount_latch(&ls->latch); + write_seqcount_latch_begin(&ls->latch); ls->val[0] = val; - raw_write_seqcount_latch(&ls->latch); + write_seqcount_latch(&ls->latch); ls->val[1] = val; + write_seqcount_latch_end(&ls->latch); } /* Can be called from any context. */ @@ -525,10 +575,10 @@ static u64 latched_seq_read_nolock(struct latched_seq *ls) u64 val; do { - seq = raw_read_seqcount_latch(&ls->latch); + seq = read_seqcount_latch(&ls->latch); idx = seq & 0x1; val = ls->val[idx]; - } while (raw_read_seqcount_latch_retry(&ls->latch, seq)); + } while (read_seqcount_latch_retry(&ls->latch, seq)); return val; } @@ -598,17 +648,6 @@ static int check_syslog_permissions(int type, int source) if (syslog_action_restricted(type)) { if (capable(CAP_SYSLOG)) goto ok; - /* - * For historical reasons, accept CAP_SYS_ADMIN too, with - * a warning. - */ - if (capable(CAP_SYS_ADMIN)) { - pr_warn_once("%s (%d): Attempt to access syslog with " - "CAP_SYS_ADMIN but no CAP_SYSLOG " - "(deprecated).\n", - current->comm, task_pid_nr(current)); - goto ok; - } return -EPERM; } ok: @@ -951,7 +990,7 @@ const struct file_operations kmsg_fops = { .release = devkmsg_release, }; -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO /* * This appends the listed symbols to /proc/vmcore * @@ -1118,6 +1157,17 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, static char setup_text_buf[PRINTKRB_RECORD_MAX] __initdata; +static void print_log_buf_usage_stats(void) +{ + unsigned int descs_count = log_buf_len >> PRB_AVGBITS; + size_t meta_data_size; + + meta_data_size = descs_count * (sizeof(struct prb_desc) + sizeof(struct printk_info)); + + pr_info("log buffer data + meta data: %u + %zu = %zu bytes\n", + log_buf_len, meta_data_size, log_buf_len + meta_data_size); +} + void __init setup_log_buf(int early) { struct printk_info *new_infos; @@ -1147,20 +1197,25 @@ void __init setup_log_buf(int early) if (!early && !new_log_buf_len) log_buf_add_cpu(); - if (!new_log_buf_len) + if (!new_log_buf_len) { + /* Show the memory stats only once. */ + if (!early) + goto out; + return; + } new_descs_count = new_log_buf_len >> PRB_AVGBITS; if (new_descs_count == 0) { pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); - return; + goto out; } new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); if (unlikely(!new_log_buf)) { pr_err("log_buf_len: %lu text bytes not available\n", new_log_buf_len); - return; + goto out; } new_descs_size = new_descs_count * sizeof(struct prb_desc); @@ -1223,7 +1278,7 @@ void __init setup_log_buf(int early) prb_next_seq(&printk_rb_static) - seq); } - pr_info("log_buf_len: %u bytes\n", log_buf_len); + print_log_buf_usage_stats(); pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); return; @@ -1232,6 +1287,8 @@ err_free_descs: memblock_free(new_descs, new_descs_size); err_free_log_buf: memblock_free(new_log_buf, new_log_buf_len); +out: + print_log_buf_usage_stats(); } static bool __read_mostly ignore_loglevel; @@ -1281,11 +1338,11 @@ static void boot_delay_msec(int level) { unsigned long long k; unsigned long timeout; + bool suppress = !is_printk_force_console() && + suppress_message_printing(level); - if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) - || suppress_message_printing(level)) { + if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) || suppress) return; - } k = (unsigned long long)loops_per_msec * boot_delay; @@ -1844,12 +1901,25 @@ static bool console_waiter; * there may be a waiter spinning (like a spinlock). Also it must be * ready to hand over the lock at the end of the section. */ -static void console_lock_spinning_enable(void) +void console_lock_spinning_enable(void) { + /* + * Do not use spinning in panic(). The panic CPU wants to keep the lock. + * Non-panic CPUs abandon the flush anyway. + * + * Just keep the lockdep annotation. The panic-CPU should avoid + * taking console_owner_lock because it might cause a deadlock. + * This looks like the easiest way how to prevent false lockdep + * reports without handling races a lockless way. + */ + if (panic_in_progress()) + goto lockdep; + raw_spin_lock(&console_owner_lock); console_owner = current; raw_spin_unlock(&console_owner_lock); +lockdep: /* The waiter may spin on us after setting console_owner */ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); } @@ -1870,10 +1940,26 @@ static void console_lock_spinning_enable(void) * * Return: 1 if the lock rights were passed, 0 otherwise. */ -static int console_lock_spinning_disable_and_check(int cookie) +int console_lock_spinning_disable_and_check(int cookie) { int waiter; + /* + * Ignore spinning waiters during panic() because they might get stopped + * or blocked at any time, + * + * It is safe because nobody is allowed to start spinning during panic + * in the first place. If there has been a waiter then non panic CPUs + * might stay spinning. They would get stopped anyway. The panic context + * will never start spinning and an interrupted spin on panic CPU will + * never continue. + */ + if (panic_in_progress()) { + /* Keep lockdep happy. */ + spin_release(&console_owner_dep_map, _THIS_IP_); + return 0; + } + raw_spin_lock(&console_owner_lock); waiter = READ_ONCE(console_waiter); console_owner = NULL; @@ -1974,6 +2060,12 @@ static int console_trylock_spinning(void) */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + /* + * Update @console_may_schedule for trylock because the previous + * owner may have been schedulable. + */ + console_may_schedule = 0; + return 1; } @@ -2200,6 +2292,9 @@ int vprintk_store(int facility, int level, if (dev_info) flags |= LOG_NEWLINE; + if (is_printk_force_console()) + flags |= LOG_FORCE_CON; + if (flags & LOG_CONT) { prb_rec_init_wr(&r, reserve_size); if (prb_reserve_in_last(&e, prb, &r, caller_id, PRINTKRB_RECORD_MAX)) { @@ -2207,6 +2302,9 @@ int vprintk_store(int facility, int level, facility, &flags, fmt, args); r.info->text_len += text_len; + if (flags & LOG_FORCE_CON) + r.info->flags |= LOG_FORCE_CON; + if (flags & LOG_NEWLINE) { r.info->flags |= LOG_NEWLINE; prb_final_commit(&e); @@ -2259,32 +2357,63 @@ out: return ret; } +/* + * This acts as a one-way switch to allow legacy consoles to print from + * the printk() caller context on a panic CPU. It also attempts to flush + * the legacy consoles in this context. + */ +void printk_legacy_allow_panic_sync(void) +{ + struct console_flush_type ft; + + legacy_allow_panic_sync = true; + + printk_get_console_flush_type(&ft); + if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } +} + asmlinkage int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args) { + struct console_flush_type ft; int printed_len; - bool in_sched = false; /* Suppress unimportant messages after panic happens */ if (unlikely(suppress_printk)) return 0; - if (unlikely(suppress_panic_printk) && - atomic_read(&panic_cpu) != raw_smp_processor_id()) + /* + * The messages on the panic CPU are the most important. If + * non-panic CPUs are generating any messages, they will be + * silently dropped. + */ + if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; + printk_get_console_flush_type(&ft); + + /* If called from the scheduler, we can not call up(). */ if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; - in_sched = true; + ft.legacy_offload |= ft.legacy_direct; + ft.legacy_direct = false; } printk_delay(level); printed_len = vprintk_store(facility, level, dev_info, fmt, args); - /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + + if (ft.legacy_direct) { /* * The caller may be holding system-critical or * timing-sensitive locks. Disable preemption during @@ -2304,7 +2433,7 @@ asmlinkage int vprintk_emit(int facility, int level, preempt_enable(); } - if (in_sched) + if (ft.legacy_offload) defer_console_output(); else wake_up_klogd(); @@ -2384,18 +2513,23 @@ static void set_user_specified(struct console_cmdline *c, bool user_specified) console_set_on_cmdline = 1; } -static int __add_preferred_console(const char *name, const short idx, char *options, +static int __add_preferred_console(const char *name, const short idx, + const char *devname, char *options, char *brl_options, bool user_specified) { struct console_cmdline *c; int i; + if (!name && !devname) + return -EINVAL; + /* * We use a signed short index for struct console for device drivers to * indicate a not yet assigned index or port. However, a negative index - * value is not valid for preferred console. + * value is not valid when the console name and index are defined on + * the command line. */ - if (idx < 0) + if (name && idx < 0) return -EINVAL; /* @@ -2403,9 +2537,10 @@ static int __add_preferred_console(const char *name, const short idx, char *opti * if we have a slot free. */ for (i = 0, c = console_cmdline; - i < MAX_CMDLINECONSOLES && c->name[0]; + i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); i++, c++) { - if (strcmp(c->name, name) == 0 && c->index == idx) { + if ((name && strcmp(c->name, name) == 0 && c->index == idx) || + (devname && strcmp(c->devname, devname) == 0)) { if (!brl_options) preferred_console = i; set_user_specified(c, user_specified); @@ -2416,7 +2551,10 @@ static int __add_preferred_console(const char *name, const short idx, char *opti return -E2BIG; if (!brl_options) preferred_console = i; - strscpy(c->name, name, sizeof(c->name)); + if (name) + strscpy(c->name, name); + if (devname) + strscpy(c->devname, devname); c->options = options; set_user_specified(c, user_specified); braille_set_options(c, brl_options); @@ -2441,8 +2579,13 @@ __setup("console_msg_format=", console_msg_format_setup); */ static int __init console_setup(char *str) { - char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */ - char *s, *options, *brl_options = NULL; + static_assert(sizeof(console_cmdline[0].devname) >= sizeof(console_cmdline[0].name) + 4); + char buf[sizeof(console_cmdline[0].devname)]; + char *brl_options = NULL; + char *ttyname = NULL; + char *devname = NULL; + char *options; + char *s; int idx; /* @@ -2451,39 +2594,51 @@ static int __init console_setup(char *str) * for exactly this purpose. */ if (str[0] == 0 || strcmp(str, "null") == 0) { - __add_preferred_console("ttynull", 0, NULL, NULL, true); + __add_preferred_console("ttynull", 0, NULL, NULL, NULL, true); return 1; } if (_braille_console_setup(&str, &brl_options)) return 1; + /* For a DEVNAME:0.0 style console the character device is unknown early */ + if (strchr(str, ':')) + devname = buf; + else + ttyname = buf; + /* * Decode str into name, index, options. */ - if (str[0] >= '0' && str[0] <= '9') { - strcpy(buf, "ttyS"); - strncpy(buf + 4, str, sizeof(buf) - 5); - } else { - strncpy(buf, str, sizeof(buf) - 1); - } - buf[sizeof(buf) - 1] = 0; + if (ttyname && isdigit(str[0])) + scnprintf(buf, sizeof(buf), "ttyS%s", str); + else + strscpy(buf, str); + options = strchr(str, ','); if (options) *(options++) = 0; + #ifdef __sparc__ if (!strcmp(str, "ttya")) - strcpy(buf, "ttyS0"); + strscpy(buf, "ttyS0"); if (!strcmp(str, "ttyb")) - strcpy(buf, "ttyS1"); + strscpy(buf, "ttyS1"); #endif + for (s = buf; *s; s++) - if (isdigit(*s) || *s == ',') + if ((ttyname && isdigit(*s)) || *s == ',') break; - idx = simple_strtoul(s, NULL, 10); + + /* @idx will get defined when devname matches. */ + if (devname) + idx = -1; + else + idx = simple_strtoul(s, NULL, 10); + *s = 0; - __add_preferred_console(buf, idx, options, brl_options, true); + __add_preferred_console(ttyname, idx, devname, options, brl_options, true); return 1; } __setup("console=", console_setup); @@ -2503,9 +2658,54 @@ __setup("console=", console_setup); */ int add_preferred_console(const char *name, const short idx, char *options) { - return __add_preferred_console(name, idx, options, NULL, false); + return __add_preferred_console(name, idx, NULL, options, NULL, false); } +/** + * match_devname_and_update_preferred_console - Update a preferred console + * when matching devname is found. + * @devname: DEVNAME:0.0 style device name + * @name: Name of the corresponding console driver, e.g. "ttyS" + * @idx: Console index, e.g. port number. + * + * The function checks whether a device with the given @devname is + * preferred via the console=DEVNAME:0.0 command line option. + * It fills the missing console driver name and console index + * so that a later register_console() call could find (match) + * and enable this device. + * + * It might be used when a driver subsystem initializes particular + * devices with already known DEVNAME:0.0 style names. And it + * could predict which console driver name and index this device + * would later get associated with. + * + * Return: 0 on success, negative error code on failure. + */ +int match_devname_and_update_preferred_console(const char *devname, + const char *name, + const short idx) +{ + struct console_cmdline *c = console_cmdline; + int i; + + if (!devname || !strlen(devname) || !name || !strlen(name) || idx < 0) + return -EINVAL; + + for (i = 0; i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); + i++, c++) { + if (!strcmp(devname, c->devname)) { + pr_info("associate the preferred console \"%s\" with \"%s%d\"\n", + devname, name, idx); + strscpy(c->name, name); + c->index = idx; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(match_devname_and_update_preferred_console); + bool console_suspend_enabled = true; EXPORT_SYMBOL(console_suspend_enabled); @@ -2562,6 +2762,7 @@ void suspend_console(void) void resume_console(void) { + struct console_flush_type ft; struct console *con; if (!console_suspend_enabled) @@ -2579,6 +2780,12 @@ void resume_console(void) */ synchronize_srcu(&console_srcu); + printk_get_console_flush_type(&ft); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + if (ft.legacy_offload) + defer_console_output(); + pr_flush(1000, true); } @@ -2593,34 +2800,20 @@ void resume_console(void) */ static int console_cpu_notify(unsigned int cpu) { + struct console_flush_type ft; + if (!cpuhp_tasks_frozen) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } } return 0; } -/* - * Return true if a panic is in progress on a remote CPU. - * - * On true, the local CPU should immediately release any printing resources - * that may be needed by the panic CPU. - */ -bool other_cpu_in_panic(void) -{ - if (!panic_in_progress()) - return false; - - /* - * We can use raw_smp_processor_id() here because it is impossible for - * the task to be migrated to the panic_cpu, or away from it. If - * panic_cpu has already been set, and we're not currently executing on - * that CPU, then we never will be. - */ - return atomic_read(&panic_cpu) != raw_smp_processor_id(); -} - /** * console_lock - block the console subsystem from printing * @@ -2670,36 +2863,6 @@ int is_console_locked(void) } EXPORT_SYMBOL(is_console_locked); -/* - * Check if the given console is currently capable and allowed to print - * records. - * - * Requires the console_srcu_read_lock. - */ -static inline bool console_is_usable(struct console *con) -{ - short flags = console_srcu_read_flags(con); - - if (!(flags & CON_ENABLED)) - return false; - - if ((flags & CON_SUSPENDED)) - return false; - - if (!con->write) - return false; - - /* - * Console drivers may assume that per-cpu resources have been - * allocated. So unless they're explicitly marked as being able to - * cope (CON_ANYTIME) don't call them until this CPU is officially up. - */ - if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) - return false; - - return true; -} - static void __console_unlock(void) { console_locked = 0; @@ -2709,30 +2872,31 @@ static void __console_unlock(void) #ifdef CONFIG_PRINTK /* - * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This - * is achieved by shifting the existing message over and inserting the dropped - * message. + * Prepend the message in @pmsg->pbufs->outbuf. This is achieved by shifting + * the existing message over and inserting the scratchbuf message. * - * @pmsg is the printk message to prepend. + * @pmsg is the original printk message. + * @fmt is the printf format of the message which will prepend the existing one. * - * @dropped is the dropped count to report in the dropped message. - * - * If the message text in @pmsg->pbufs->outbuf does not have enough space for - * the dropped message, the message text will be sufficiently truncated. + * If there is not enough space in @pmsg->pbufs->outbuf, the existing + * message text will be sufficiently truncated. * * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. */ -void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) +__printf(2, 3) +static void console_prepend_message(struct printk_message *pmsg, const char *fmt, ...) { struct printk_buffers *pbufs = pmsg->pbufs; const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); const size_t outbuf_sz = sizeof(pbufs->outbuf); char *scratchbuf = &pbufs->scratchbuf[0]; char *outbuf = &pbufs->outbuf[0]; + va_list args; size_t len; - len = scnprintf(scratchbuf, scratchbuf_sz, - "** %lu printk messages dropped **\n", dropped); + va_start(args, fmt); + len = vscnprintf(scratchbuf, scratchbuf_sz, fmt, args); + va_end(args); /* * Make sure outbuf is sufficiently large before prepending. @@ -2755,6 +2919,30 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) } /* + * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". + * @pmsg->outbuf_len is updated appropriately. + * + * @pmsg is the printk message to prepend. + * + * @dropped is the dropped count to report in the dropped message. + */ +void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) +{ + console_prepend_message(pmsg, "** %lu printk messages dropped **\n", dropped); +} + +/* + * Prepend the message in @pmsg->pbufs->outbuf with a "replay message". + * @pmsg->outbuf_len is updated appropriately. + * + * @pmsg is the printk message to prepend. + */ +void console_prepend_replay(struct printk_message *pmsg) +{ + console_prepend_message(pmsg, "** replaying previous printk message **\n"); +} + +/* * Read and format the specified record (or a later record if the specified * record is not available). * @@ -2776,8 +2964,6 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) bool printk_get_next_message(struct printk_message *pmsg, u64 seq, bool is_extended, bool may_suppress) { - static int panic_console_dropped; - struct printk_buffers *pbufs = pmsg->pbufs; const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); const size_t outbuf_sz = sizeof(pbufs->outbuf); @@ -2786,6 +2972,7 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq, struct printk_info info; struct printk_record r; size_t len = 0; + bool force_con; /* * Formatting extended messages requires a separate buffer, so use the @@ -2804,20 +2991,13 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq, pmsg->seq = r.info->seq; pmsg->dropped = r.info->seq - seq; + force_con = r.info->flags & LOG_FORCE_CON; /* - * Check for dropped messages in panic here so that printk - * suppression can occur as early as possible if necessary. + * Skip records that are not forced to be printed on consoles and that + * has level above the console loglevel. */ - if (pmsg->dropped && - panic_in_progress() && - panic_console_dropped++ > 10) { - suppress_panic_printk = 1; - pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); - } - - /* Skip record that has level above the console loglevel. */ - if (may_suppress && suppress_message_printing(r.info->level)) + if (!force_con && may_suppress && suppress_message_printing(r.info->level)) goto out; if (is_extended) { @@ -2833,6 +3013,34 @@ out: } /* + * Legacy console printing from printk() caller context does not respect + * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a + * false positive. For PREEMPT_RT the false positive condition does not + * occur. + * + * This map is used to temporarily establish LD_WAIT_SLEEP context for the + * console write() callback when legacy printing to avoid false positive + * lockdep complaints, thus allowing lockdep to continue to function for + * real issues. + */ +#ifdef CONFIG_PREEMPT_RT +static inline void printk_legacy_allow_spinlock_enter(void) { } +static inline void printk_legacy_allow_spinlock_exit(void) { } +#else +static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP); + +static inline void printk_legacy_allow_spinlock_enter(void) +{ + lock_map_acquire_try(&printk_legacy_map); +} + +static inline void printk_legacy_allow_spinlock_exit(void) +{ + lock_map_release(&printk_legacy_map); +} +#endif /* CONFIG_PREEMPT_RT */ + +/* * Used as the printk buffers for non-panic, serialized console printing. * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles. * Its usage requires the console_lock held. @@ -2881,31 +3089,46 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co con->dropped = 0; } - /* - * While actively printing out messages, if another printk() - * were to occur on another CPU, it may wait for this one to - * finish. This task can not be preempted if there is a - * waiter waiting to take over. - * - * Interrupts are disabled because the hand over to a waiter - * must not be interrupted until the hand over is completed - * (@console_waiter is cleared). - */ - printk_safe_enter_irqsave(flags); - console_lock_spinning_enable(); + /* Write everything out to the hardware. */ - /* Do not trace print latency. */ - stop_critical_timings(); + if (force_legacy_kthread() && !panic_in_progress()) { + /* + * With forced threading this function is in a task context + * (either legacy kthread or get_init_console_seq()). There + * is no need for concern about printk reentrance, handovers, + * or lockdep complaints. + */ - /* Write everything out to the hardware. */ - con->write(con, outbuf, pmsg.outbuf_len); + con->write(con, outbuf, pmsg.outbuf_len); + con->seq = pmsg.seq + 1; + } else { + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + * + * Interrupts are disabled because the hand over to a waiter + * must not be interrupted until the hand over is completed + * (@console_waiter is cleared). + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); - start_critical_timings(); + /* Do not trace print latency. */ + stop_critical_timings(); - con->seq = pmsg.seq + 1; + printk_legacy_allow_spinlock_enter(); + con->write(con, outbuf, pmsg.outbuf_len); + printk_legacy_allow_spinlock_exit(); - *handover = console_lock_spinning_disable_and_check(cookie); - printk_safe_exit_irqrestore(flags); + start_critical_timings(); + + con->seq = pmsg.seq + 1; + + *handover = console_lock_spinning_disable_and_check(cookie); + printk_safe_exit_irqrestore(flags); + } skip: return true; } @@ -2918,6 +3141,8 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co return false; } +static inline void printk_kthreads_check_locked(void) { } + #endif /* CONFIG_PRINTK */ /* @@ -2945,6 +3170,7 @@ static bool console_emit_next_record(struct console *con, bool *handover, int co */ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) { + struct console_flush_type ft; bool any_usable = false; struct console *con; bool any_progress; @@ -2956,15 +3182,34 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove do { any_progress = false; + printk_get_console_flush_type(&ft); + cookie = console_srcu_read_lock(); for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + u64 printk_seq; bool progress; - if (!console_is_usable(con)) + /* + * console_flush_all() is only responsible for nbcon + * consoles when the nbcon consoles cannot print via + * their atomic or threaded flushing. + */ + if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) + continue; + + if (!console_is_usable(con, flags, !do_cond_resched)) continue; any_usable = true; - progress = console_emit_next_record(con, handover, cookie); + if (flags & CON_NBCON) { + progress = nbcon_legacy_emit_next_record(con, handover, cookie, + !do_cond_resched); + printk_seq = nbcon_seq_read(con); + } else { + progress = console_emit_next_record(con, handover, cookie); + printk_seq = con->seq; + } /* * If a handover has occurred, the SRCU read lock @@ -2974,8 +3219,8 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove return false; /* Track the next of the highest seq flushed. */ - if (con->seq > *next_seq) - *next_seq = con->seq; + if (printk_seq > *next_seq) + *next_seq = printk_seq; if (!progress) continue; @@ -2998,19 +3243,7 @@ abandon: return false; } -/** - * console_unlock - unblock the console subsystem from printing - * - * Releases the console_lock which the caller holds to block printing of - * the console subsystem. - * - * While the console_lock was held, console output may have been buffered - * by printk(). If this is the case, console_unlock(); emits - * the output prior to releasing the lock. - * - * console_unlock(); may be called from any context. - */ -void console_unlock(void) +static void __console_flush_and_unlock(void) { bool do_cond_resched; bool handover; @@ -3054,6 +3287,29 @@ void console_unlock(void) */ } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); } + +/** + * console_unlock - unblock the legacy console subsystem from printing + * + * Releases the console_lock which the caller holds to block printing of + * the legacy console subsystem. + * + * While the console_lock was held, console output may have been buffered + * by printk(). If this is the case, console_unlock() emits the output on + * legacy consoles prior to releasing the lock. + * + * console_unlock(); may be called from any context. + */ +void console_unlock(void) +{ + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (ft.legacy_direct) + __console_flush_and_unlock(); + else + __console_unlock(); +} EXPORT_SYMBOL(console_unlock); /** @@ -3134,6 +3390,40 @@ void console_unblank(void) pr_flush(1000, true); } +/* + * Rewind all consoles to the oldest available record. + * + * IMPORTANT: The function is safe only when called under + * console_lock(). It is not enforced because + * it is used as a best effort in panic(). + */ +static void __console_rewind_all(void) +{ + struct console *c; + short flags; + int cookie; + u64 seq; + + seq = prb_first_valid_seq(prb); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { + flags = console_srcu_read_flags(c); + + if (flags & CON_NBCON) { + nbcon_seq_force(c, seq); + } else { + /* + * This assignment is safe only when called under + * console_lock(). On panic, legacy consoles are + * only best effort. + */ + c->seq = seq; + } + } + console_srcu_read_unlock(cookie); +} + /** * console_flush_on_panic - flush console content on panic * @mode: flush all messages in buffer or just the pending ones @@ -3142,6 +3432,7 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { + struct console_flush_type ft; bool handover; u64 next_seq; @@ -3162,32 +3453,16 @@ void console_flush_on_panic(enum con_flush_mode mode) */ console_may_schedule = 0; - if (mode == CONSOLE_REPLAY_ALL) { - struct console *c; - short flags; - int cookie; - u64 seq; - - seq = prb_first_valid_seq(prb); - - cookie = console_srcu_read_lock(); - for_each_console_srcu(c) { - flags = console_srcu_read_flags(c); + if (mode == CONSOLE_REPLAY_ALL) + __console_rewind_all(); - if (flags & CON_NBCON) { - nbcon_seq_force(c, seq); - } else { - /* - * This is an unsynchronized assignment. On - * panic legacy consoles are only best effort. - */ - c->seq = seq; - } - } - console_srcu_read_unlock(cookie); - } + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); - console_flush_all(false, &next_seq, &handover); + /* Flush legacy consoles once allowed, even when dangerous. */ + if (legacy_allow_panic_sync) + console_flush_all(false, &next_seq, &handover); } /* @@ -3244,13 +3519,236 @@ EXPORT_SYMBOL(console_stop); void console_start(struct console *console) { + struct console_flush_type ft; + bool is_nbcon; + console_list_lock(); console_srcu_write_flags(console, console->flags | CON_ENABLED); + is_nbcon = console->flags & CON_NBCON; console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. The related + * printing context must be able to see it is enabled so that + * it is guaranteed to wake up and resume printing. + */ + synchronize_srcu(&console_srcu); + + printk_get_console_flush_type(&ft); + if (is_nbcon && ft.nbcon_offload) + nbcon_kthread_wake(console); + else if (ft.legacy_offload) + defer_console_output(); + __pr_flush(console, 1000, true); } EXPORT_SYMBOL(console_start); +#ifdef CONFIG_PRINTK +static int unregister_console_locked(struct console *console); + +/* True when system boot is far enough to create printer threads. */ +static bool printk_kthreads_ready __ro_after_init; + +static struct task_struct *printk_legacy_kthread; + +static bool legacy_kthread_should_wakeup(void) +{ + struct console_flush_type ft; + struct console *con; + bool ret = false; + int cookie; + + if (kthread_should_stop()) + return true; + + printk_get_console_flush_type(&ft); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + u64 printk_seq; + + /* + * The legacy printer thread is only responsible for nbcon + * consoles when the nbcon consoles cannot print via their + * atomic or threaded flushing. + */ + if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) + continue; + + if (!console_is_usable(con, flags, false)) + continue; + + if (flags & CON_NBCON) { + printk_seq = nbcon_seq_read(con); + } else { + /* + * It is safe to read @seq because only this + * thread context updates @seq. + */ + printk_seq = con->seq; + } + + if (prb_read_valid(prb, printk_seq, NULL)) { + ret = true; + break; + } + } + console_srcu_read_unlock(cookie); + + return ret; +} + +static int legacy_kthread_func(void *unused) +{ + for (;;) { + wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup()); + + if (kthread_should_stop()) + break; + + console_lock(); + __console_flush_and_unlock(); + } + + return 0; +} + +static bool legacy_kthread_create(void) +{ + struct task_struct *kt; + + lockdep_assert_console_list_lock_held(); + + kt = kthread_run(legacy_kthread_func, NULL, "pr/legacy"); + if (WARN_ON(IS_ERR(kt))) { + pr_err("failed to start legacy printing thread\n"); + return false; + } + + printk_legacy_kthread = kt; + + /* + * It is important that console printing threads are scheduled + * shortly after a printk call and with generous runtime budgets. + */ + sched_set_normal(printk_legacy_kthread, -20); + + return true; +} + +/** + * printk_kthreads_shutdown - shutdown all threaded printers + * + * On system shutdown all threaded printers are stopped. This allows printk + * to transition back to atomic printing, thus providing a robust mechanism + * for the final shutdown/reboot messages to be output. + */ +static void printk_kthreads_shutdown(void) +{ + struct console *con; + + console_list_lock(); + if (printk_kthreads_running) { + printk_kthreads_running = false; + + for_each_console(con) { + if (con->flags & CON_NBCON) + nbcon_kthread_stop(con); + } + + /* + * The threads may have been stopped while printing a + * backlog. Flush any records left over. + */ + nbcon_atomic_flush_pending(); + } + console_list_unlock(); +} + +static struct syscore_ops printk_syscore_ops = { + .shutdown = printk_kthreads_shutdown, +}; + +/* + * If appropriate, start nbcon kthreads and set @printk_kthreads_running. + * If any kthreads fail to start, those consoles are unregistered. + * + * Must be called under console_list_lock(). + */ +static void printk_kthreads_check_locked(void) +{ + struct hlist_node *tmp; + struct console *con; + + lockdep_assert_console_list_lock_held(); + + if (!printk_kthreads_ready) + return; + + if (have_legacy_console || have_boot_console) { + if (!printk_legacy_kthread && + force_legacy_kthread() && + !legacy_kthread_create()) { + /* + * All legacy consoles must be unregistered. If there + * are any nbcon consoles, they will set up their own + * kthread. + */ + hlist_for_each_entry_safe(con, tmp, &console_list, node) { + if (con->flags & CON_NBCON) + continue; + + unregister_console_locked(con); + } + } + } else if (printk_legacy_kthread) { + kthread_stop(printk_legacy_kthread); + printk_legacy_kthread = NULL; + } + + /* + * Printer threads cannot be started as long as any boot console is + * registered because there is no way to synchronize the hardware + * registers between boot console code and regular console code. + * It can only be known that there will be no new boot consoles when + * an nbcon console is registered. + */ + if (have_boot_console || !have_nbcon_console) { + /* Clear flag in case all nbcon consoles unregistered. */ + printk_kthreads_running = false; + return; + } + + if (printk_kthreads_running) + return; + + hlist_for_each_entry_safe(con, tmp, &console_list, node) { + if (!(con->flags & CON_NBCON)) + continue; + + if (!nbcon_kthread_create(con)) + unregister_console_locked(con); + } + + printk_kthreads_running = true; +} + +static int __init printk_set_kthreads_ready(void) +{ + register_syscore_ops(&printk_syscore_ops); + + console_list_lock(); + printk_kthreads_ready = true; + printk_kthreads_check_locked(); + console_list_unlock(); + + return 0; +} +early_initcall(printk_set_kthreads_ready); +#endif /* CONFIG_PRINTK */ + static int __read_mostly keep_bootcon; static int __init keep_bootcon_setup(char *str) @@ -3263,6 +3761,21 @@ static int __init keep_bootcon_setup(char *str) early_param("keep_bootcon", keep_bootcon_setup); +static int console_call_setup(struct console *newcon, char *options) +{ + int err; + + if (!newcon->setup) + return 0; + + /* Synchronize with possible boot console. */ + console_lock(); + err = newcon->setup(newcon, options); + console_unlock(); + + return err; +} + /* * This is called by register_console() to try to match * the newly registered console with any of the ones selected @@ -3279,8 +3792,11 @@ static int try_enable_preferred_console(struct console *newcon, int i, err; for (i = 0, c = console_cmdline; - i < MAX_CMDLINECONSOLES && c->name[0]; + i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); i++, c++) { + /* Console not yet initialized? */ + if (!c->name[0]) + continue; if (c->user_specified != user_specified) continue; if (!newcon->match || @@ -3298,8 +3814,8 @@ static int try_enable_preferred_console(struct console *newcon, if (_braille_register_console(newcon, c)) return 0; - if (newcon->setup && - (err = newcon->setup(newcon, c->options)) != 0) + err = console_call_setup(newcon, c->options); + if (err) return err; } newcon->flags |= CON_ENABLED; @@ -3325,7 +3841,7 @@ static void try_enable_default_console(struct console *newcon) if (newcon->index < 0) newcon->index = 0; - if (newcon->setup && newcon->setup(newcon, NULL) != 0) + if (console_call_setup(newcon, NULL) != 0) return; newcon->flags |= CON_ENABLED; @@ -3334,19 +3850,21 @@ static void try_enable_default_console(struct console *newcon) newcon->flags |= CON_CONSDEV; } -static void console_init_seq(struct console *newcon, bool bootcon_registered) +/* Return the starting sequence number for a newly registered console. */ +static u64 get_init_console_seq(struct console *newcon, bool bootcon_registered) { struct console *con; bool handover; + u64 init_seq; if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) { /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); - newcon->seq = syslog_seq; + init_seq = syslog_seq; mutex_unlock(&syslog_lock); } else { /* Begin with next message added to ringbuffer. */ - newcon->seq = prb_next_seq(prb); + init_seq = prb_next_seq(prb); /* * If any enabled boot consoles are due to be unregistered @@ -3367,7 +3885,7 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) * Flush all consoles and set the console to start at * the next unprinted sequence number. */ - if (!console_flush_all(true, &newcon->seq, &handover)) { + if (!console_flush_all(true, &init_seq, &handover)) { /* * Flushing failed. Just choose the lowest * sequence of the enabled boot consoles. @@ -3380,19 +3898,30 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) if (handover) console_lock(); - newcon->seq = prb_next_seq(prb); + init_seq = prb_next_seq(prb); for_each_console(con) { - if ((con->flags & CON_BOOT) && - (con->flags & CON_ENABLED) && - con->seq < newcon->seq) { - newcon->seq = con->seq; + u64 seq; + + if (!(con->flags & CON_BOOT) || + !(con->flags & CON_ENABLED)) { + continue; } + + if (con->flags & CON_NBCON) + seq = nbcon_seq_read(con); + else + seq = con->seq; + + if (seq < init_seq) + init_seq = seq; } } console_unlock(); } } + + return init_seq; } #define console_first() \ @@ -3421,9 +3950,12 @@ static int unregister_console_locked(struct console *console); */ void register_console(struct console *newcon) { - struct console *con; + bool use_device_lock = (newcon->flags & CON_NBCON) && newcon->write_atomic; bool bootcon_registered = false; bool realcon_registered = false; + struct console *con; + unsigned long flags; + u64 init_seq; int err; console_list_lock(); @@ -3501,10 +4033,31 @@ void register_console(struct console *newcon) } newcon->dropped = 0; - console_init_seq(newcon, bootcon_registered); + init_seq = get_init_console_seq(newcon, bootcon_registered); - if (newcon->flags & CON_NBCON) - nbcon_init(newcon); + if (newcon->flags & CON_NBCON) { + have_nbcon_console = true; + nbcon_seq_force(newcon, init_seq); + } else { + have_legacy_console = true; + newcon->seq = init_seq; + } + + if (newcon->flags & CON_BOOT) + have_boot_console = true; + + /* + * If another context is actively using the hardware of this new + * console, it will not be aware of the nbcon synchronization. This + * is a risk that two contexts could access the hardware + * simultaneously if this new console is used for atomic printing + * and the other context is still using the hardware. + * + * Use the driver synchronization to ensure that the hardware is not + * in use while this new console transitions to being registered. + */ + if (use_device_lock) + newcon->device_lock(newcon, &flags); /* * Put this console in the list - keep the @@ -3530,6 +4083,10 @@ void register_console(struct console *newcon) * register_console() completes. */ + /* This new console is now registered. */ + if (use_device_lock) + newcon->device_unlock(newcon, flags); + console_sysfs_notify(); /* @@ -3550,6 +4107,9 @@ void register_console(struct console *newcon) unregister_console_locked(con); } } + + /* Changed console list, may require printer threads to start/stop. */ + printk_kthreads_check_locked(); unlock: console_list_unlock(); } @@ -3558,6 +4118,12 @@ EXPORT_SYMBOL(register_console); /* Must be called under console_list_lock(). */ static int unregister_console_locked(struct console *console) { + bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic; + bool found_legacy_con = false; + bool found_nbcon_con = false; + bool found_boot_con = false; + unsigned long flags; + struct console *c; int res; lockdep_assert_console_list_lock_held(); @@ -3570,14 +4136,29 @@ static int unregister_console_locked(struct console *console) if (res > 0) return 0; + if (!console_is_registered_locked(console)) + res = -ENODEV; + else if (console_is_usable(console, console->flags, true)) + __pr_flush(console, 1000, true); + /* Disable it unconditionally */ console_srcu_write_flags(console, console->flags & ~CON_ENABLED); - if (!console_is_registered_locked(console)) - return -ENODEV; + if (res < 0) + return res; + + /* + * Use the driver synchronization to ensure that the hardware is not + * in use while this console transitions to being unregistered. + */ + if (use_device_lock) + console->device_lock(console, &flags); hlist_del_init_rcu(&console->node); + if (use_device_lock) + console->device_unlock(console, flags); + /* * <HISTORICAL> * If this isn't the last console and it has CON_CONSDEV set, we @@ -3605,6 +4186,29 @@ static int unregister_console_locked(struct console *console) if (console->exit) res = console->exit(console); + /* + * With this console gone, the global flags tracking registered + * console types may have changed. Update them. + */ + for_each_console(c) { + if (c->flags & CON_BOOT) + found_boot_con = true; + + if (c->flags & CON_NBCON) + found_nbcon_con = true; + else + found_legacy_con = true; + } + if (!found_boot_con) + have_boot_console = found_boot_con; + if (!found_legacy_con) + have_legacy_console = found_legacy_con; + if (!found_nbcon_con) + have_nbcon_console = found_nbcon_con; + + /* Changed console list, may require printer threads to start/stop. */ + printk_kthreads_check_locked(); + return res; } @@ -3751,6 +4355,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre { unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms); unsigned long remaining_jiffies = timeout_jiffies; + struct console_flush_type ft; struct console *c; u64 last_diff = 0; u64 printk_seq; @@ -3759,13 +4364,22 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre u64 diff; u64 seq; + /* Sorry, pr_flush() will not work this early. */ + if (system_state < SYSTEM_SCHEDULING) + return false; + might_sleep(); - seq = prb_next_seq(prb); + seq = prb_next_reserve_seq(prb); /* Flush the consoles so that records up to @seq are printed. */ - console_lock(); - console_unlock(); + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.legacy_direct) { + console_lock(); + console_unlock(); + } for (;;) { unsigned long begin_jiffies; @@ -3778,6 +4392,12 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * console->seq. Releasing console_lock flushes more * records in case @seq is still not printed on all * usable consoles. + * + * Holding the console_lock is not necessary if there + * are no legacy or boot consoles. However, such a + * console could register at any time. Always hold the + * console_lock as a precaution rather than + * synchronizing against register_console(). */ console_lock(); @@ -3793,8 +4413,10 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * that they make forward progress, so only increment * @diff for usable consoles. */ - if (!console_is_usable(c)) + if (!console_is_usable(c, flags, true) && + !console_is_usable(c, flags, false)) { continue; + } if (flags & CON_NBCON) { printk_seq = nbcon_seq_read(c); @@ -3862,9 +4484,13 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) int pending = this_cpu_xchg(printk_pending, 0); if (pending & PRINTK_PENDING_OUTPUT) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); + if (force_legacy_kthread()) { + if (printk_legacy_kthread) + wake_up_interruptible(&legacy_wait); + } else { + if (console_trylock()) + console_unlock(); + } } if (pending & PRINTK_PENDING_WAKEUP) @@ -4072,16 +4698,21 @@ const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); /** - * kmsg_dump - dump kernel log to kernel message dumpers. + * kmsg_dump_desc - dump kernel log to kernel message dumpers. * @reason: the reason (oops, panic etc) for dumping + * @desc: a short string to describe what caused the panic or oops. Can be NULL + * if no additional description is available. * * Call each of the registered dumper's dump() callback, which can * retrieve the kmsg records with kmsg_dump_get_line() or * kmsg_dump_get_buffer(). */ -void kmsg_dump(enum kmsg_dump_reason reason) +void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc) { struct kmsg_dumper *dumper; + struct kmsg_dump_detail detail = { + .reason = reason, + .description = desc}; rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { @@ -4099,7 +4730,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) continue; /* invoke dumper which will iterate over records */ - dumper->dump(dumper, reason); + dumper->dump(dumper, &detail); } rcu_read_unlock(); } @@ -4259,12 +4890,43 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); +/** + * console_try_replay_all - try to replay kernel log on consoles + * + * Try to obtain lock on console subsystem and replay all + * available records in printk buffer on the consoles. + * Does nothing if lock is not obtained. + * + * Context: Any, except for NMI. + */ +void console_try_replay_all(void) +{ + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (console_trylock()) { + __console_rewind_all(); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + if (ft.legacy_offload) + defer_console_output(); + /* Consoles are flushed as part of console_unlock(). */ + console_unlock(); + } +} #endif #ifdef CONFIG_SMP static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1); static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0); +bool is_printk_cpu_sync_owner(void) +{ + return (atomic_read(&printk_cpu_sync_owner) == raw_smp_processor_id()); +} + /** * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant * spinning lock is not owned by any CPU. diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index fde338606ce8..88e8f3a61922 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -6,6 +6,7 @@ #include <linux/errno.h> #include <linux/bug.h> #include "printk_ringbuffer.h" +#include "internal.h" /** * DOC: printk_ringbuffer overview @@ -303,6 +304,9 @@ * * desc_push_tail:B / desc_reserve:D * set descriptor reusable (state), then push descriptor tail (id) + * + * desc_update_last_finalized:A / desc_last_finalized_seq:A + * store finalized record, then set new highest finalized sequence number */ #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) @@ -1030,9 +1034,13 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, unsigned long next_lpos; if (size == 0) { - /* Specify a data-less block. */ - blk_lpos->begin = NO_LPOS; - blk_lpos->next = NO_LPOS; + /* + * Data blocks are not created for empty lines. Instead, the + * reader will recognize these special lpos values and handle + * it appropriately. + */ + blk_lpos->begin = EMPTY_LINE_LPOS; + blk_lpos->next = EMPTY_LINE_LPOS; return NULL; } @@ -1210,10 +1218,18 @@ static const char *get_data(struct prb_data_ring *data_ring, /* Data-less data block description. */ if (BLK_DATALESS(blk_lpos)) { - if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { + /* + * Records that are just empty lines are also valid, even + * though they do not have a data block. For such records + * explicitly return empty string data to signify success. + */ + if (blk_lpos->begin == EMPTY_LINE_LPOS && + blk_lpos->next == EMPTY_LINE_LPOS) { *data_size = 0; return ""; } + + /* Data lost, invalid, or otherwise unavailable. */ return NULL; } @@ -1442,19 +1458,117 @@ fail_reopen: } /* + * @last_finalized_seq value guarantees that all records up to and including + * this sequence number are finalized and can be read. The only exception are + * too old records which have already been overwritten. + * + * It is also guaranteed that @last_finalized_seq only increases. + * + * Be aware that finalized records following non-finalized records are not + * reported because they are not yet available to the reader. For example, + * a new record stored via printk() will not be available to a printer if + * it follows a record that has not been finalized yet. However, once that + * non-finalized record becomes finalized, @last_finalized_seq will be + * appropriately updated and the full set of finalized records will be + * available to the printer. And since each printk() caller will either + * directly print or trigger deferred printing of all available unprinted + * records, all printk() messages will get printed. + */ +static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb) +{ + struct prb_desc_ring *desc_ring = &rb->desc_ring; + unsigned long ulseq; + + /* + * Guarantee the sequence number is loaded before loading the + * associated record in order to guarantee that the record can be + * seen by this CPU. This pairs with desc_update_last_finalized:A. + */ + ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq + ); /* LMM(desc_last_finalized_seq:A) */ + + return __ulseq_to_u64seq(rb, ulseq); +} + +static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, + struct printk_record *r, unsigned int *line_count); + +/* + * Check if there are records directly following @last_finalized_seq that are + * finalized. If so, update @last_finalized_seq to the latest of these + * records. It is not allowed to skip over records that are not yet finalized. + */ +static void desc_update_last_finalized(struct printk_ringbuffer *rb) +{ + struct prb_desc_ring *desc_ring = &rb->desc_ring; + u64 old_seq = desc_last_finalized_seq(rb); + unsigned long oldval; + unsigned long newval; + u64 finalized_seq; + u64 try_seq; + +try_again: + finalized_seq = old_seq; + try_seq = finalized_seq + 1; + + /* Try to find later finalized records. */ + while (_prb_read_valid(rb, &try_seq, NULL, NULL)) { + finalized_seq = try_seq; + try_seq++; + } + + /* No update needed if no later finalized record was found. */ + if (finalized_seq == old_seq) + return; + + oldval = __u64seq_to_ulseq(old_seq); + newval = __u64seq_to_ulseq(finalized_seq); + + /* + * Set the sequence number of a later finalized record that has been + * seen. + * + * Guarantee the record data is visible to other CPUs before storing + * its sequence number. This pairs with desc_last_finalized_seq:A. + * + * Memory barrier involvement: + * + * If desc_last_finalized_seq:A reads from + * desc_update_last_finalized:A, then desc_read:A reads from + * _prb_commit:B. + * + * Relies on: + * + * RELEASE from _prb_commit:B to desc_update_last_finalized:A + * matching + * ACQUIRE from desc_last_finalized_seq:A to desc_read:A + * + * Note: _prb_commit:B and desc_update_last_finalized:A can be + * different CPUs. However, the desc_update_last_finalized:A + * CPU (which performs the release) must have previously seen + * _prb_commit:B. + */ + if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq, + &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */ + old_seq = __ulseq_to_u64seq(rb, oldval); + goto try_again; + } +} + +/* * Attempt to finalize a specified descriptor. If this fails, the descriptor * is either already final or it will finalize itself when the writer commits. */ -static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) +static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id) { + struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long prev_state_val = DESC_SV(id, desc_committed); struct prb_desc *d = to_desc(desc_ring, id); - atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, - DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ - - /* Best effort to remember the last finalized @id. */ - atomic_long_set(&desc_ring->last_finalized_id, id); + if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val, + DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */ + desc_update_last_finalized(rb); + } } /** @@ -1550,7 +1664,7 @@ bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, * readers. (For seq==0 there is no previous descriptor.) */ if (info->seq > 0) - desc_make_final(desc_ring, DESC_ID(id - 1)); + desc_make_final(rb, DESC_ID(id - 1)); r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); /* If text data allocation fails, a data-less record is committed. */ @@ -1643,7 +1757,7 @@ void prb_commit(struct prb_reserved_entry *e) */ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ if (head_id != e->id) - desc_make_final(desc_ring, e->id); + desc_make_final(e->rb, e->id); } /** @@ -1663,12 +1777,9 @@ void prb_commit(struct prb_reserved_entry *e) */ void prb_final_commit(struct prb_reserved_entry *e) { - struct prb_desc_ring *desc_ring = &e->rb->desc_ring; - _prb_commit(e, desc_finalized); - /* Best effort to remember the last finalized @id. */ - atomic_long_set(&desc_ring->last_finalized_id, e->id); + desc_update_last_finalized(e->rb); } /* @@ -1832,7 +1943,7 @@ static int prb_read(struct printk_ringbuffer *rb, u64 seq, } /* Get the sequence number of the tail descriptor. */ -static u64 prb_first_seq(struct printk_ringbuffer *rb) +u64 prb_first_seq(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; enum desc_state d_state; @@ -1875,12 +1986,123 @@ static u64 prb_first_seq(struct printk_ringbuffer *rb) return seq; } +/** + * prb_next_reserve_seq() - Get the sequence number after the most recently + * reserved record. + * + * @rb: The ringbuffer to get the sequence number from. + * + * This is the public function available to readers to see what sequence + * number will be assigned to the next reserved record. + * + * Note that depending on the situation, this value can be equal to or + * higher than the sequence number returned by prb_next_seq(). + * + * Context: Any context. + * Return: The sequence number that will be assigned to the next record + * reserved. + */ +u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) +{ + struct prb_desc_ring *desc_ring = &rb->desc_ring; + unsigned long last_finalized_id; + atomic_long_t *state_var; + u64 last_finalized_seq; + unsigned long head_id; + struct prb_desc desc; + unsigned long diff; + struct prb_desc *d; + int err; + + /* + * It may not be possible to read a sequence number for @head_id. + * So the ID of @last_finailzed_seq is used to calculate what the + * sequence number of @head_id will be. + */ + +try_again: + last_finalized_seq = desc_last_finalized_seq(rb); + + /* + * @head_id is loaded after @last_finalized_seq to ensure that + * it points to the record with @last_finalized_seq or newer. + * + * Memory barrier involvement: + * + * If desc_last_finalized_seq:A reads from + * desc_update_last_finalized:A, then + * prb_next_reserve_seq:A reads from desc_reserve:D. + * + * Relies on: + * + * RELEASE from desc_reserve:D to desc_update_last_finalized:A + * matching + * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A + * + * Note: desc_reserve:D and desc_update_last_finalized:A can be + * different CPUs. However, the desc_update_last_finalized:A CPU + * (which performs the release) must have previously seen + * desc_read:C, which implies desc_reserve:D can be seen. + */ + head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */ + + d = to_desc(desc_ring, last_finalized_seq); + state_var = &d->state_var; + + /* Extract the ID, used to specify the descriptor to read. */ + last_finalized_id = DESC_ID(atomic_long_read(state_var)); + + /* Ensure @last_finalized_id is correct. */ + err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc); + + if (err == -EINVAL) { + if (last_finalized_seq == 0) { + /* + * No record has been finalized or even reserved yet. + * + * The @head_id is initialized such that the first + * increment will yield the first record (seq=0). + * Handle it separately to avoid a negative @diff + * below. + */ + if (head_id == DESC0_ID(desc_ring->count_bits)) + return 0; + + /* + * One or more descriptors are already reserved. Use + * the descriptor ID of the first one (@seq=0) for + * the @diff below. + */ + last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1; + } else { + /* Record must have been overwritten. Try again. */ + goto try_again; + } + } + + /* Diff of known descriptor IDs to compute related sequence numbers. */ + diff = head_id - last_finalized_id; + + /* + * @head_id points to the most recently reserved record, but this + * function returns the sequence number that will be assigned to the + * next (not yet reserved) record. Thus +1 is needed. + */ + return (last_finalized_seq + diff + 1); +} + /* - * Non-blocking read of a record. Updates @seq to the last finalized record - * (which may have no data available). + * Non-blocking read of a record. + * + * On success @seq is updated to the record that was read and (if provided) + * @r and @line_count will contain the read/calculated data. + * + * On failure @seq is updated to a record that is not yet available to the + * reader, but it will be the next record available to the reader. * - * See the description of prb_read_valid() and prb_read_valid_info() - * for details. + * Note: When the current CPU is in panic, this function will skip over any + * non-existent/non-finalized records in order to allow the panic CPU + * to print any and all records that have been finalized. */ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, struct printk_record *r, unsigned int *line_count) @@ -1899,12 +2121,32 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, *seq = tail_seq; } else if (err == -ENOENT) { - /* Record exists, but no data available. Skip. */ + /* Record exists, but the data was lost. Skip. */ (*seq)++; } else { - /* Non-existent/non-finalized record. Must stop. */ - return false; + /* + * Non-existent/non-finalized record. Must stop. + * + * For panic situations it cannot be expected that + * non-finalized records will become finalized. But + * there may be other finalized records beyond that + * need to be printed for a panic situation. If this + * is the panic CPU, skip this + * non-existent/non-finalized record unless it is + * at or beyond the head, in which case it is not + * possible to continue. + * + * Note that new messages printed on panic CPU are + * finalized when we are here. The only exception + * might be the last message without trailing newline. + * But it would have the sequence number returned + * by "prb_next_reserve_seq() - 1". + */ + if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb))) + (*seq)++; + else + return false; } } @@ -1932,7 +2174,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, * On success, the reader must check r->info.seq to see which record was * actually read. This allows the reader to detect dropped records. * - * Failure means @seq refers to a not yet written record. + * Failure means @seq refers to a record not yet available to the reader. */ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, struct printk_record *r) @@ -1962,7 +2204,7 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, * On success, the reader must check info->seq to see which record meta data * was actually read. This allows the reader to detect dropped records. * - * Failure means @seq refers to a not yet written record. + * Failure means @seq refers to a record not yet available to the reader. */ bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, struct printk_info *info, unsigned int *line_count) @@ -2008,7 +2250,9 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb) * newest sequence number available to readers will be. * * This provides readers a sequence number to jump to if all currently - * available records should be skipped. + * available records should be skipped. It is guaranteed that all records + * previous to the returned value have been finalized and are (or were) + * available to the reader. * * Context: Any context. * Return: The sequence number of the next newest (not yet available) record @@ -2016,34 +2260,19 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb) */ u64 prb_next_seq(struct printk_ringbuffer *rb) { - struct prb_desc_ring *desc_ring = &rb->desc_ring; - enum desc_state d_state; - unsigned long id; u64 seq; - /* Check if the cached @id still points to a valid @seq. */ - id = atomic_long_read(&desc_ring->last_finalized_id); - d_state = desc_read(desc_ring, id, NULL, &seq, NULL); + seq = desc_last_finalized_seq(rb); - if (d_state == desc_finalized || d_state == desc_reusable) { - /* - * Begin searching after the last finalized record. - * - * On 0, the search must begin at 0 because of hack#2 - * of the bootstrapping phase it is not known if a - * record at index 0 exists. - */ - if (seq != 0) - seq++; - } else { - /* - * The information about the last finalized sequence number - * has gone. It should happen only when there is a flood of - * new messages and the ringbuffer is rapidly recycled. - * Give up and start from the beginning. - */ - seq = 0; - } + /* + * Begin searching after the last finalized record. + * + * On 0, the search must begin at 0 because of hack#2 + * of the bootstrapping phase it is not known if a + * record at index 0 exists. + */ + if (seq != 0) + seq++; /* * The information about the last finalized @seq might be inaccurate. @@ -2085,7 +2314,7 @@ void prb_init(struct printk_ringbuffer *rb, rb->desc_ring.infos = infos; atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); - atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits)); + atomic_long_set(&rb->desc_ring.last_finalized_seq, 0); rb->text_data_ring.size_bits = textbits; rb->text_data_ring.data = text_buf; diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 18cd25e489b8..4ef81349d9fb 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -4,7 +4,10 @@ #define _KERNEL_PRINTK_RINGBUFFER_H #include <linux/atomic.h> +#include <linux/bits.h> #include <linux/dev_printk.h> +#include <linux/stddef.h> +#include <linux/types.h> /* * Meta information about each stored message. @@ -75,7 +78,7 @@ struct prb_desc_ring { struct printk_info *infos; atomic_long_t head_id; atomic_long_t tail_id; - atomic_long_t last_finalized_id; + atomic_long_t last_finalized_seq; }; /* @@ -120,15 +123,29 @@ enum desc_state { #define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) #define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) -#define DESC_SV_BITS (sizeof(unsigned long) * 8) +#define DESC_SV_BITS BITS_PER_LONG #define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2) #define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT) #define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT)) #define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) #define DESC_ID_MASK (~DESC_FLAGS_MASK) #define DESC_ID(sv) ((sv) & DESC_ID_MASK) + +/* + * Special data block logical position values (for fields of + * @prb_desc.text_blk_lpos). + * + * - Bit0 is used to identify if the record has no data block. (Implemented in + * the LPOS_DATALESS() macro.) + * + * - Bit1 specifies the reason for not having a data block. + * + * These special values could never be real lpos values because of the + * meta data and alignment padding of data blocks. (See to_blk_size() for + * details.) + */ #define FAILED_LPOS 0x1 -#define NO_LPOS 0x3 +#define EMPTY_LINE_LPOS 0x3 #define FAILED_BLK_LPOS \ { \ @@ -259,7 +276,7 @@ static struct printk_ringbuffer name = { \ .infos = &_##name##_infos[0], \ .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ - .last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)), \ + .last_finalized_seq = ATOMIC_INIT(0), \ }, \ .text_data_ring = { \ .size_bits = (avgtextbits) + (descbits), \ @@ -378,7 +395,43 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, struct printk_info *info, unsigned int *line_count); +u64 prb_first_seq(struct printk_ringbuffer *rb); u64 prb_first_valid_seq(struct printk_ringbuffer *rb); u64 prb_next_seq(struct printk_ringbuffer *rb); +u64 prb_next_reserve_seq(struct printk_ringbuffer *rb); + +#ifdef CONFIG_64BIT + +#define __u64seq_to_ulseq(u64seq) (u64seq) +#define __ulseq_to_u64seq(rb, ulseq) (ulseq) +#define ULSEQ_MAX(rb) (-1) + +#else /* CONFIG_64BIT */ + +#define __u64seq_to_ulseq(u64seq) ((u32)u64seq) +#define ULSEQ_MAX(rb) __u64seq_to_ulseq(prb_first_seq(rb) + 0x80000000UL) + +static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) +{ + u64 rb_first_seq = prb_first_seq(rb); + u64 seq; + + /* + * The provided sequence is only the lower 32 bits of the ringbuffer + * sequence. It needs to be expanded to 64bit. Get the first sequence + * number from the ringbuffer and fold it. + * + * Having a 32bit representation in the console is sufficient. + * If a console ever gets more than 2^31 records behind + * the ringbuffer then this is the least of the problems. + * + * Also the access to the ring buffer is always safe. + */ + seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq); + + return seq; +} + +#endif /* CONFIG_64BIT */ #endif /* _KERNEL_PRINTK_RINGBUFFER_H */ diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 6d10927a07d8..32a28f563b13 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -12,6 +12,24 @@ #include "internal.h" +/* Context where printk messages are never suppressed */ +static atomic_t force_con; + +void printk_force_console_enter(void) +{ + atomic_inc(&force_con); +} + +void printk_force_console_exit(void) +{ + atomic_dec(&force_con); +} + +bool is_printk_force_console(void) +{ + return atomic_read(&force_con); +} + static DEFINE_PER_CPU(int, printk_context); /* Can be preempted by NMI. */ @@ -26,6 +44,34 @@ void __printk_safe_exit(void) this_cpu_dec(printk_context); } +void __printk_deferred_enter(void) +{ + cant_migrate(); + __printk_safe_enter(); +} + +void __printk_deferred_exit(void) +{ + cant_migrate(); + __printk_safe_exit(); +} + +bool is_printk_legacy_deferred(void) +{ + /* + * The per-CPU variable @printk_context can be read safely in any + * context. CPU migration is always disabled when set. + * + * A context holding the printk_cpu_sync must not spin waiting for + * another CPU. For legacy printing, it could be the console_lock + * or the port lock. + */ + return (force_legacy_kthread() || + this_cpu_read(printk_context) || + in_nmi() || + is_printk_cpu_sync_owner()); +} + asmlinkage int vprintk(const char *fmt, va_list args) { #ifdef CONFIG_KGDB_KDB @@ -33,15 +79,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); #endif - - /* - * Use the main logbuf even in NMI. But avoid calling console - * drivers that might have their own locks. - */ - if (this_cpu_read(printk_context) || in_nmi()) - return vprintk_deferred(fmt, args); - - /* No obstacles. */ return vprintk_default(fmt, args); } EXPORT_SYMBOL(vprintk); diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c index c228343eeb97..da77f3f5c1fe 100644 --- a/kernel/printk/sysctl.c +++ b/kernel/printk/sysctl.c @@ -11,7 +11,7 @@ static const int ten_thousand = 10000; -static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, +static int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (write && !capable(CAP_SYS_ADMIN)) @@ -20,7 +20,7 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } -static struct ctl_table printk_sysctls[] = { +static const struct ctl_table printk_sysctls[] = { { .procname = "printk", .data = &console_loglevel, @@ -76,7 +76,6 @@ static struct ctl_table printk_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, - {} }; void __init printk_sysctl_init(void) |