diff options
Diffstat (limited to 'kernel/printk/printk.c')
-rw-r--r-- | kernel/printk/printk.c | 3126 |
1 files changed, 2322 insertions, 804 deletions
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5a95c688621f..07668433644b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -34,7 +34,8 @@ #include <linux/security.h> #include <linux/memblock.h> #include <linux/syscalls.h> -#include <linux/crash_core.h> +#include <linux/syscore_ops.h> +#include <linux/vmcore_info.h> #include <linux/ratelimit.h> #include <linux/kmsg_dump.h> #include <linux/syslog.h> @@ -71,6 +72,8 @@ EXPORT_SYMBOL_GPL(console_printk); atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); EXPORT_SYMBOL(ignore_console_lock_warning); +EXPORT_TRACEPOINT_SYMBOL_GPL(console); + /* * Low level drivers may need that to know if they can schedule in * their unblank() callback or not. So let's export it. @@ -79,13 +82,20 @@ int oops_in_progress; EXPORT_SYMBOL(oops_in_progress); /* - * console_sem protects the console_drivers list, and also - * provides serialisation for access to the entire console - * driver system. + * console_mutex protects console_list updates and console->flags updates. + * The flags are synchronized only for consoles that are registered, i.e. + * accessible via the console list. */ -static DEFINE_SEMAPHORE(console_sem); -struct console *console_drivers; -EXPORT_SYMBOL_GPL(console_drivers); +static DEFINE_MUTEX(console_mutex); + +/* + * console_sem protects updates to console->seq + * and also provides serialization for console printing. + */ +static DEFINE_SEMAPHORE(console_sem, 1); +HLIST_HEAD(console_list); +EXPORT_SYMBOL_GPL(console_list); +DEFINE_STATIC_SRCU(console_srcu); /* * System may need to suppress printk message under certain @@ -97,6 +107,20 @@ int __read_mostly suppress_printk; static struct lockdep_map console_lock_dep_map = { .name = "console_lock" }; + +void lockdep_assert_console_list_lock_held(void) +{ + lockdep_assert_held(&console_mutex); +} +EXPORT_SYMBOL(lockdep_assert_console_list_lock_held); +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +bool console_srcu_read_lock_is_held(void) +{ + return srcu_read_lock_held(&console_srcu); +} +EXPORT_SYMBOL(console_srcu_read_lock_is_held); #endif enum devkmsg_log_bits { @@ -146,16 +170,18 @@ static int __control_devkmsg(char *str) static int __init control_devkmsg(char *str) { - if (__control_devkmsg(str) < 0) + if (__control_devkmsg(str) < 0) { + pr_warn("printk.devkmsg: bad option string '%s'\n", str); return 1; + } /* * Set sysctl string accordingly: */ if (devkmsg_log == DEVKMSG_LOG_MASK_ON) - strcpy(devkmsg_log_str, "on"); + strscpy(devkmsg_log_str, "on"); else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) - strcpy(devkmsg_log_str, "off"); + strscpy(devkmsg_log_str, "off"); /* else "ratelimit" which is set by default. */ /* @@ -166,13 +192,13 @@ static int __init control_devkmsg(char *str) */ devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; - return 0; + return 1; } __setup("printk.devkmsg=", control_devkmsg); char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; - -int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, +#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) +int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { char old_str[DEVKMSG_STR_MAX_SIZE]; @@ -184,7 +210,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, return -EINVAL; old = devkmsg_log; - strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE); + strscpy(old_str, devkmsg_log_str); } err = proc_dostring(table, write, buffer, lenp, ppos); @@ -202,7 +228,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, /* ... and restore old setting. */ devkmsg_log = old; - strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE); + strscpy(devkmsg_log_str, old_str); return -EINVAL; } @@ -210,9 +236,72 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, return 0; } +#endif /* CONFIG_PRINTK && CONFIG_SYSCTL */ + +/** + * console_list_lock - Lock the console list + * + * For console list or console->flags updates + */ +void console_list_lock(void) +{ + /* + * In unregister_console() and console_force_preferred_locked(), + * synchronize_srcu() is called with the console_list_lock held. + * Therefore it is not allowed that the console_list_lock is taken + * with the srcu_lock held. + * + * Detecting if this context is really in the read-side critical + * section is only possible if the appropriate debug options are + * enabled. + */ + WARN_ON_ONCE(debug_lockdep_rcu_enabled() && + srcu_read_lock_held(&console_srcu)); + + mutex_lock(&console_mutex); +} +EXPORT_SYMBOL(console_list_lock); + +/** + * console_list_unlock - Unlock the console list + * + * Counterpart to console_list_lock() + */ +void console_list_unlock(void) +{ + mutex_unlock(&console_mutex); +} +EXPORT_SYMBOL(console_list_unlock); + +/** + * console_srcu_read_lock - Register a new reader for the + * SRCU-protected console list + * + * Use for_each_console_srcu() to iterate the console list + * + * Context: Any context. + * Return: A cookie to pass to console_srcu_read_unlock(). + */ +int console_srcu_read_lock(void) + __acquires(&console_srcu) +{ + return srcu_read_lock_nmisafe(&console_srcu); +} +EXPORT_SYMBOL(console_srcu_read_lock); -/* Number of registered extended console drivers. */ -static int nr_ext_console_drivers; +/** + * console_srcu_read_unlock - Unregister an old reader from + * the SRCU-protected console list + * @cookie: cookie returned from console_srcu_read_lock() + * + * Counterpart to console_srcu_read_lock() + */ +void console_srcu_read_unlock(int cookie) + __releases(&console_srcu) +{ + srcu_read_unlock_nmisafe(&console_srcu, cookie); +} +EXPORT_SYMBOL(console_srcu_read_unlock); /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use @@ -256,20 +345,43 @@ static void __up_console_sem(unsigned long ip) } #define up_console_sem() __up_console_sem(_RET_IP_) +static bool panic_in_progress(void) +{ + return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); +} + +/* Return true if a panic is in progress on the current CPU. */ +bool this_cpu_in_panic(void) +{ + /* + * We can use raw_smp_processor_id() here because it is impossible for + * the task to be migrated to the panic_cpu, or away from it. If + * panic_cpu has already been set, and we're not currently executing on + * that CPU, then we never will be. + */ + return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); +} + +/* + * Return true if a panic is in progress on a remote CPU. + * + * On true, the local CPU should immediately release any printing resources + * that may be needed by the panic CPU. + */ +bool other_cpu_in_panic(void) +{ + return (panic_in_progress() && !this_cpu_in_panic()); +} + /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's * definitely not the perfect debug tool (we don't know if _WE_ * hold it and are racing, but it helps tracking those weird code * paths in the console code where we end up in places I want - * locked without the console sempahore held). - */ -static int console_locked, console_suspended; - -/* - * If exclusive_console is non-NULL then only this console is to be printed to. + * locked without the console semaphore held). */ -static struct console *exclusive_console; +static int console_locked; /* * Array of consoles built from command line options (console=) @@ -280,7 +392,6 @@ static struct console *exclusive_console; static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int preferred_console = -1; -static bool has_preferred_console; int console_set_on_cmdline; EXPORT_SYMBOL(console_set_on_cmdline); @@ -350,67 +461,61 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; * non-prinatable characters are escaped in the "\xff" notation. */ -enum log_flags { - LOG_NEWLINE = 2, /* text ended with a newline */ - LOG_CONT = 8, /* text is a fragment of a continuation line */ -}; +/* syslog_lock protects syslog_* variables and write access to clear_seq. */ +static DEFINE_MUTEX(syslog_lock); /* - * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken - * within the scheduler's rq lock. It must be released before calling - * console_unlock() or anything else that might wake up a process. + * Specifies if a legacy console is registered. If legacy consoles are + * present, it is necessary to perform the console lock/unlock dance + * whenever console flushing should occur. */ -DEFINE_RAW_SPINLOCK(logbuf_lock); +bool have_legacy_console; /* - * Helper macros to lock/unlock logbuf_lock and switch between - * printk-safe/unsafe modes. + * Specifies if an nbcon console is registered. If nbcon consoles are present, + * synchronous printing of legacy consoles will not occur during panic until + * the backtrace has been stored to the ringbuffer. */ -#define logbuf_lock_irq() \ - do { \ - printk_safe_enter_irq(); \ - raw_spin_lock(&logbuf_lock); \ - } while (0) - -#define logbuf_unlock_irq() \ - do { \ - raw_spin_unlock(&logbuf_lock); \ - printk_safe_exit_irq(); \ - } while (0) +bool have_nbcon_console; -#define logbuf_lock_irqsave(flags) \ - do { \ - printk_safe_enter_irqsave(flags); \ - raw_spin_lock(&logbuf_lock); \ - } while (0) +/* + * Specifies if a boot console is registered. If boot consoles are present, + * nbcon consoles cannot print simultaneously and must be synchronized by + * the console lock. This is because boot consoles and nbcon consoles may + * have mapped the same hardware. + */ +bool have_boot_console; -#define logbuf_unlock_irqrestore(flags) \ - do { \ - raw_spin_unlock(&logbuf_lock); \ - printk_safe_exit_irqrestore(flags); \ - } while (0) +/* See printk_legacy_allow_panic_sync() for details. */ +bool legacy_allow_panic_sync; #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); +static DECLARE_WAIT_QUEUE_HEAD(legacy_wait); +/* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; static size_t syslog_partial; static bool syslog_time; -/* the next printk record to write to the console */ -static u64 console_seq; -static u64 exclusive_console_stop_seq; -static unsigned long console_dropped; +/* True when _all_ printer threads are available for printing. */ +bool printk_kthreads_running; -/* the next printk record to read after the last 'clear' command */ -static u64 clear_seq; +struct latched_seq { + seqcount_latch_t latch; + u64 val[2]; +}; -#ifdef CONFIG_PRINTK_CALLER -#define PREFIX_MAX 48 -#else -#define PREFIX_MAX 32 -#endif -#define LOG_LINE_MAX (1024 - PREFIX_MAX) +/* + * The next printk record to read after the last 'clear' command. There are + * two copies (updated with seqcount_latch) so that reads can locklessly + * access a valid value. Writers are synchronized by @syslog_lock. + */ +static struct latched_seq clear_seq = { + .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), + .val[0] = 0, + .val[1] = 0, +}; #define LOG_LEVEL(v) ((v) & 0x07) #define LOG_FACILITY(v) ((v) >> 3 & 0xff) @@ -418,7 +523,7 @@ static u64 clear_seq; /* record buffer */ #define LOG_ALIGN __alignof__(unsigned long) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) -#define LOG_BUF_LEN_MAX (u32)(1 << 31) +#define LOG_BUF_LEN_MAX ((u32)1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; @@ -438,20 +543,46 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, static struct printk_ringbuffer printk_rb_dynamic; -static struct printk_ringbuffer *prb = &printk_rb_static; +struct printk_ringbuffer *prb = &printk_rb_static; /* * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before * per_cpu_areas are initialised. This variable is set to true when * it's safe to access per-CPU data. */ -static bool __printk_percpu_data_ready __read_mostly; +static bool __printk_percpu_data_ready __ro_after_init; bool printk_percpu_data_ready(void) { return __printk_percpu_data_ready; } +/* Must be called under syslog_lock. */ +static void latched_seq_write(struct latched_seq *ls, u64 val) +{ + write_seqcount_latch_begin(&ls->latch); + ls->val[0] = val; + write_seqcount_latch(&ls->latch); + ls->val[1] = val; + write_seqcount_latch_end(&ls->latch); +} + +/* Can be called from any context. */ +static u64 latched_seq_read_nolock(struct latched_seq *ls) +{ + unsigned int seq; + unsigned int idx; + u64 val; + + do { + seq = read_seqcount_latch(&ls->latch); + idx = seq & 0x1; + val = ls->val[idx]; + } while (read_seqcount_latch_retry(&ls->latch, seq)); + + return val; +} + /* Return log buffer address */ char *log_buf_addr_get(void) { @@ -517,17 +648,6 @@ static int check_syslog_permissions(int type, int source) if (syslog_action_restricted(type)) { if (capable(CAP_SYSLOG)) goto ok; - /* - * For historical reasons, accept CAP_SYS_ADMIN too, with - * a warning. - */ - if (capable(CAP_SYS_ADMIN)) { - pr_warn_once("%s (%d): Attempt to access syslog with " - "CAP_SYS_ADMIN but no CAP_SYSLOG " - "(deprecated).\n", - current->comm, task_pid_nr(current)); - goto ok; - } return -EPERM; } ok: @@ -619,14 +739,10 @@ out: /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { - u64 seq; + atomic64_t seq; struct ratelimit_state rs; struct mutex lock; - char buf[CONSOLE_EXT_LOG_MAX]; - - struct printk_info info; - char text_buf[CONSOLE_EXT_LOG_MAX]; - struct printk_record record; + struct printk_buffers pbufs; }; static __printf(3, 4) __cold @@ -652,7 +768,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) size_t len = iov_iter_count(from); ssize_t ret = len; - if (!user || len > LOG_LINE_MAX) + if (len > PRINTKRB_RECORD_MAX) return -EINVAL; /* Ignore when user logging is disabled. */ @@ -708,59 +824,58 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct devkmsg_user *user = file->private_data; - struct printk_record *r = &user->record; - size_t len; + char *outbuf = &user->pbufs.outbuf[0]; + struct printk_message pmsg = { + .pbufs = &user->pbufs, + }; ssize_t ret; - if (!user) - return -EBADF; - ret = mutex_lock_interruptible(&user->lock); if (ret) return ret; - logbuf_lock_irq(); - if (!prb_read_valid(prb, user->seq, r)) { + if (!printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, false)) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; - logbuf_unlock_irq(); goto out; } - logbuf_unlock_irq(); + /* + * Guarantee this task is visible on the waitqueue before + * checking the wake condition. + * + * The full memory barrier within set_current_state() of + * prepare_to_wait_event() pairs with the full memory barrier + * within wq_has_sleeper(). + * + * This pairs with __wake_up_klogd:A. + */ ret = wait_event_interruptible(log_wait, - prb_read_valid(prb, user->seq, r)); + printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, + false)); /* LMM(devkmsg_read:A) */ if (ret) goto out; - logbuf_lock_irq(); } - if (user->seq < prb_first_valid_seq(prb)) { + if (pmsg.dropped) { /* our last seen message is gone, return error and reset */ - user->seq = prb_first_valid_seq(prb); + atomic64_set(&user->seq, pmsg.seq); ret = -EPIPE; - logbuf_unlock_irq(); goto out; } - len = info_print_ext_header(user->buf, sizeof(user->buf), r->info); - len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, - &r->text_buf[0], r->info->text_len, - &r->info->dev_info); - - user->seq = r->info->seq + 1; - logbuf_unlock_irq(); + atomic64_set(&user->seq, pmsg.seq + 1); - if (len > count) { + if (pmsg.outbuf_len > count) { ret = -EINVAL; goto out; } - if (copy_to_user(buf, user->buf, len)) { + if (copy_to_user(buf, outbuf, pmsg.outbuf_len)) { ret = -EFAULT; goto out; } - ret = len; + ret = pmsg.outbuf_len; out: mutex_unlock(&user->lock); return ret; @@ -779,16 +894,13 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) struct devkmsg_user *user = file->private_data; loff_t ret = 0; - if (!user) - return -EBADF; if (offset) return -ESPIPE; - logbuf_lock_irq(); switch (whence) { case SEEK_SET: /* the first record */ - user->seq = prb_first_valid_seq(prb); + atomic64_set(&user->seq, prb_first_valid_seq(prb)); break; case SEEK_DATA: /* @@ -796,38 +908,33 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) * like issued by 'dmesg -c'. Reading /dev/kmsg itself * changes no global state, and does not clear anything. */ - user->seq = clear_seq; + atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); break; case SEEK_END: /* after the last record */ - user->seq = prb_next_seq(prb); + atomic64_set(&user->seq, prb_next_seq(prb)); break; default: ret = -EINVAL; } - logbuf_unlock_irq(); return ret; } static __poll_t devkmsg_poll(struct file *file, poll_table *wait) { struct devkmsg_user *user = file->private_data; + struct printk_info info; __poll_t ret = 0; - if (!user) - return EPOLLERR|EPOLLNVAL; - poll_wait(file, &log_wait, wait); - logbuf_lock_irq(); - if (prb_read_valid(prb, user->seq, NULL)) { + if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { /* return error when data has vanished underneath us */ - if (user->seq < prb_first_valid_seq(prb)) + if (info.seq != atomic64_read(&user->seq)) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; else ret = EPOLLIN|EPOLLRDNORM; } - logbuf_unlock_irq(); return ret; } @@ -848,7 +955,7 @@ static int devkmsg_open(struct inode *inode, struct file *file) return err; } - user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); + user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); if (!user) return -ENOMEM; @@ -857,12 +964,7 @@ static int devkmsg_open(struct inode *inode, struct file *file) mutex_init(&user->lock); - prb_rec_init_rd(&user->record, &user->info, - &user->text_buf[0], sizeof(user->text_buf)); - - logbuf_lock_irq(); - user->seq = prb_first_valid_seq(prb); - logbuf_unlock_irq(); + atomic64_set(&user->seq, prb_first_valid_seq(prb)); file->private_data = user; return 0; @@ -872,13 +974,10 @@ static int devkmsg_release(struct inode *inode, struct file *file) { struct devkmsg_user *user = file->private_data; - if (!user) - return 0; - ratelimit_state_exit(&user->rs); mutex_destroy(&user->lock); - kfree(user); + kvfree(user); return 0; } @@ -891,7 +990,7 @@ const struct file_operations kmsg_fops = { .release = devkmsg_release, }; -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO /* * This appends the listed symbols to /proc/vmcore * @@ -954,6 +1053,9 @@ void log_buf_vmcoreinfo_setup(void) VMCOREINFO_SIZE(atomic_long_t); VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); + + VMCOREINFO_STRUCT_SIZE(latched_seq); + VMCOREINFO_OFFSET(latched_seq, val); } #endif @@ -1025,9 +1127,6 @@ static inline void log_buf_add_cpu(void) {} static void __init set_percpu_data_ready(void) { - printk_safe_init(); - /* Make sure we set this flag only after printk_safe() init is done */ - barrier(); __printk_percpu_data_ready = true; } @@ -1056,7 +1155,18 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, return prb_record_text_space(&e); } -static char setup_text_buf[LOG_LINE_MAX] __initdata; +static char setup_text_buf[PRINTKRB_RECORD_MAX] __initdata; + +static void print_log_buf_usage_stats(void) +{ + unsigned int descs_count = log_buf_len >> PRB_AVGBITS; + size_t meta_data_size; + + meta_data_size = descs_count * (sizeof(struct prb_desc) + sizeof(struct printk_info)); + + pr_info("log buffer data + meta data: %u + %zu = %zu bytes\n", + log_buf_len, meta_data_size, log_buf_len + meta_data_size); +} void __init setup_log_buf(int early) { @@ -1065,6 +1175,7 @@ void __init setup_log_buf(int early) struct prb_desc *new_descs; struct printk_info info; struct printk_record r; + unsigned int text_size; size_t new_descs_size; size_t new_infos_size; unsigned long flags; @@ -1086,20 +1197,25 @@ void __init setup_log_buf(int early) if (!early && !new_log_buf_len) log_buf_add_cpu(); - if (!new_log_buf_len) + if (!new_log_buf_len) { + /* Show the memory stats only once. */ + if (!early) + goto out; + return; + } new_descs_count = new_log_buf_len >> PRB_AVGBITS; if (new_descs_count == 0) { pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); - return; + goto out; } new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); if (unlikely(!new_log_buf)) { pr_err("log_buf_len: %lu text bytes not available\n", new_log_buf_len); - return; + goto out; } new_descs_size = new_descs_count * sizeof(struct prb_desc); @@ -1125,39 +1241,54 @@ void __init setup_log_buf(int early) new_descs, ilog2(new_descs_count), new_infos); - printk_safe_enter_irqsave(flags); + local_irq_save(flags); log_buf_len = new_log_buf_len; log_buf = new_log_buf; new_log_buf_len = 0; free = __LOG_BUF_LEN; - prb_for_each_record(0, &printk_rb_static, seq, &r) - free -= add_to_rb(&printk_rb_dynamic, &r); + prb_for_each_record(0, &printk_rb_static, seq, &r) { + text_size = add_to_rb(&printk_rb_dynamic, &r); + if (text_size > free) + free = 0; + else + free -= text_size; + } - /* - * This is early enough that everything is still running on the - * boot CPU and interrupts are disabled. So no new messages will - * appear during the transition to the dynamic buffer. - */ prb = &printk_rb_dynamic; - printk_safe_exit_irqrestore(flags); + local_irq_restore(flags); + + /* + * Copy any remaining messages that might have appeared from + * NMI context after copying but before switching to the + * dynamic buffer. + */ + prb_for_each_record(seq, &printk_rb_static, seq, &r) { + text_size = add_to_rb(&printk_rb_dynamic, &r); + if (text_size > free) + free = 0; + else + free -= text_size; + } if (seq != prb_next_seq(&printk_rb_static)) { pr_err("dropped %llu messages\n", prb_next_seq(&printk_rb_static) - seq); } - pr_info("log_buf_len: %u bytes\n", log_buf_len); + print_log_buf_usage_stats(); pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); return; err_free_descs: - memblock_free(__pa(new_descs), new_descs_size); + memblock_free(new_descs, new_descs_size); err_free_log_buf: - memblock_free(__pa(new_log_buf), new_log_buf_len); + memblock_free(new_log_buf, new_log_buf_len); +out: + print_log_buf_usage_stats(); } static bool __read_mostly ignore_loglevel; @@ -1207,11 +1338,11 @@ static void boot_delay_msec(int level) { unsigned long long k; unsigned long timeout; + bool suppress = !is_printk_force_console() && + suppress_message_printing(level); - if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) - || suppress_message_printing(level)) { + if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) || suppress) return; - } k = (unsigned long long)loops_per_msec * boot_delay; @@ -1308,7 +1439,7 @@ static size_t record_print_text(struct printk_record *r, bool syslog, size_t text_len = r->info->text_len; size_t buf_size = r->text_buf_size; char *text = r->text_buf; - char prefix[PREFIX_MAX]; + char prefix[PRINTK_PREFIX_MAX]; bool truncated = false; size_t prefix_len; size_t line_len; @@ -1407,7 +1538,7 @@ static size_t get_record_print_text_size(struct printk_info *info, unsigned int line_count, bool syslog, bool time) { - char prefix[PREFIX_MAX]; + char prefix[PRINTK_PREFIX_MAX]; size_t prefix_len; prefix_len = info_print_prefix(info, syslog, time, prefix); @@ -1420,28 +1551,105 @@ static size_t get_record_print_text_size(struct printk_info *info, return ((prefix_len * line_count) + info->text_len + 1); } +/* + * Beginning with @start_seq, find the first record where it and all following + * records up to (but not including) @max_seq fit into @size. + * + * @max_seq is simply an upper bound and does not need to exist. If the caller + * does not require an upper bound, -1 can be used for @max_seq. + */ +static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, + bool syslog, bool time) +{ + struct printk_info info; + unsigned int line_count; + size_t len = 0; + u64 seq; + + /* Determine the size of the records up to @max_seq. */ + prb_for_each_info(start_seq, prb, seq, &info, &line_count) { + if (info.seq >= max_seq) + break; + len += get_record_print_text_size(&info, line_count, syslog, time); + } + + /* + * Adjust the upper bound for the next loop to avoid subtracting + * lengths that were never added. + */ + if (seq < max_seq) + max_seq = seq; + + /* + * Move first record forward until length fits into the buffer. Ignore + * newest messages that were not counted in the above cycle. Messages + * might appear and get lost in the meantime. This is a best effort + * that prevents an infinite loop that could occur with a retry. + */ + prb_for_each_info(start_seq, prb, seq, &info, &line_count) { + if (len <= size || info.seq >= max_seq) + break; + len -= get_record_print_text_size(&info, line_count, syslog, time); + } + + return seq; +} + +/* The caller is responsible for making sure @size is greater than 0. */ static int syslog_print(char __user *buf, int size) { struct printk_info info; struct printk_record r; char *text; int len = 0; + u64 seq; - text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); + text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL); if (!text) return -ENOMEM; - prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); + prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX); + + mutex_lock(&syslog_lock); + + /* + * Wait for the @syslog_seq record to be available. @syslog_seq may + * change while waiting. + */ + do { + seq = syslog_seq; + + mutex_unlock(&syslog_lock); + /* + * Guarantee this task is visible on the waitqueue before + * checking the wake condition. + * + * The full memory barrier within set_current_state() of + * prepare_to_wait_event() pairs with the full memory barrier + * within wq_has_sleeper(). + * + * This pairs with __wake_up_klogd:A. + */ + len = wait_event_interruptible(log_wait, + prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */ + mutex_lock(&syslog_lock); + + if (len) + goto out; + } while (syslog_seq != seq); - while (size > 0) { + /* + * Copy records that fit into the buffer. The above cycle makes sure + * that the first record is always available. + */ + do { size_t n; size_t skip; + int err; - logbuf_lock_irq(); - if (!prb_read_valid(prb, syslog_seq, &r)) { - logbuf_unlock_irq(); + if (!prb_read_valid(prb, syslog_seq, &r)) break; - } + if (r.info->seq != syslog_seq) { /* message is gone, move to next valid one */ syslog_seq = r.info->seq; @@ -1468,12 +1676,15 @@ static int syslog_print(char __user *buf, int size) syslog_partial += n; } else n = 0; - logbuf_unlock_irq(); if (!n) break; - if (copy_to_user(buf, text + skip, n)) { + mutex_unlock(&syslog_lock); + err = copy_to_user(buf, text + skip, n); + mutex_lock(&syslog_lock); + + if (err) { if (!len) len = -EFAULT; break; @@ -1482,8 +1693,9 @@ static int syslog_print(char __user *buf, int size) len += n; size -= n; buf += n; - } - + } while (size); +out: + mutex_unlock(&syslog_lock); kfree(text); return len; } @@ -1491,36 +1703,26 @@ static int syslog_print(char __user *buf, int size) static int syslog_print_all(char __user *buf, int size, bool clear) { struct printk_info info; - unsigned int line_count; struct printk_record r; char *text; int len = 0; u64 seq; bool time; - text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); + text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL); if (!text) return -ENOMEM; time = printk_time; - logbuf_lock_irq(); /* * Find first record that fits, including all following records, * into the user-provided buffer for this dump. */ - prb_for_each_info(clear_seq, prb, seq, &info, &line_count) - len += get_record_print_text_size(&info, line_count, true, time); - - /* move first record forward until length fits into the buffer */ - prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { - if (len <= size) - break; - len -= get_record_print_text_size(&info, line_count, true, time); - } + seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, + size, true, time); - prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); + prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX); - len = 0; prb_for_each_record(seq, prb, seq, &r) { int textlen; @@ -1531,20 +1733,20 @@ static int syslog_print_all(char __user *buf, int size, bool clear) break; } - logbuf_unlock_irq(); if (copy_to_user(buf + len, text, textlen)) len = -EFAULT; else len += textlen; - logbuf_lock_irq(); if (len < 0) break; } - if (clear) - clear_seq = seq; - logbuf_unlock_irq(); + if (clear) { + mutex_lock(&syslog_lock); + latched_seq_write(&clear_seq, seq); + mutex_unlock(&syslog_lock); + } kfree(text); return len; @@ -1552,13 +1754,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) static void syslog_clear(void) { - logbuf_lock_irq(); - clear_seq = prb_next_seq(prb); - logbuf_unlock_irq(); + mutex_lock(&syslog_lock); + latched_seq_write(&clear_seq, prb_next_seq(prb)); + mutex_unlock(&syslog_lock); } int do_syslog(int type, char __user *buf, int len, int source) { + struct printk_info info; bool clear = false; static int saved_console_loglevel = LOGLEVEL_DEFAULT; int error; @@ -1579,10 +1782,6 @@ int do_syslog(int type, char __user *buf, int len, int source) return 0; if (!access_ok(buf, len)) return -EFAULT; - error = wait_event_interruptible(log_wait, - prb_read_valid(prb, syslog_seq, NULL)); - if (error) - return error; error = syslog_print(buf, len); break; /* Read/clear last kernel messages */ @@ -1628,10 +1827,15 @@ int do_syslog(int type, char __user *buf, int len, int source) break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: - logbuf_lock_irq(); - if (syslog_seq < prb_first_valid_seq(prb)) { + mutex_lock(&syslog_lock); + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ + mutex_unlock(&syslog_lock); + return 0; + } + if (info.seq != syslog_seq) { /* messages are gone, move to first one */ - syslog_seq = prb_first_valid_seq(prb); + syslog_seq = info.seq; syslog_partial = 0; } if (source == SYSLOG_FROM_PROC) { @@ -1643,7 +1847,6 @@ int do_syslog(int type, char __user *buf, int len, int source) error = prb_next_seq(prb) - syslog_seq; } else { bool time = syslog_partial ? syslog_time : printk_time; - struct printk_info info; unsigned int line_count; u64 seq; @@ -1655,7 +1858,7 @@ int do_syslog(int type, char __user *buf, int len, int source) } error -= syslog_partial; } - logbuf_unlock_irq(); + mutex_unlock(&syslog_lock); break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: @@ -1698,12 +1901,25 @@ static bool console_waiter; * there may be a waiter spinning (like a spinlock). Also it must be * ready to hand over the lock at the end of the section. */ -static void console_lock_spinning_enable(void) +void console_lock_spinning_enable(void) { + /* + * Do not use spinning in panic(). The panic CPU wants to keep the lock. + * Non-panic CPUs abandon the flush anyway. + * + * Just keep the lockdep annotation. The panic-CPU should avoid + * taking console_owner_lock because it might cause a deadlock. + * This looks like the easiest way how to prevent false lockdep + * reports without handling races a lockless way. + */ + if (panic_in_progress()) + goto lockdep; + raw_spin_lock(&console_owner_lock); console_owner = current; raw_spin_unlock(&console_owner_lock); +lockdep: /* The waiter may spin on us after setting console_owner */ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); } @@ -1711,22 +1927,39 @@ static void console_lock_spinning_enable(void) /** * console_lock_spinning_disable_and_check - mark end of code where another * thread was able to busy wait and check if there is a waiter + * @cookie: cookie returned from console_srcu_read_lock() * * This is called at the end of the section where spinning is allowed. * It has two functions. First, it is a signal that it is no longer * safe to start busy waiting for the lock. Second, it checks if * there is a busy waiter and passes the lock rights to her. * - * Important: Callers lose the lock if there was a busy waiter. - * They must not touch items synchronized by console_lock - * in this case. + * Important: Callers lose both the console_lock and the SRCU read lock if + * there was a busy waiter. They must not touch items synchronized by + * console_lock or SRCU read lock in this case. * * Return: 1 if the lock rights were passed, 0 otherwise. */ -static int console_lock_spinning_disable_and_check(void) +int console_lock_spinning_disable_and_check(int cookie) { int waiter; + /* + * Ignore spinning waiters during panic() because they might get stopped + * or blocked at any time, + * + * It is safe because nobody is allowed to start spinning during panic + * in the first place. If there has been a waiter then non panic CPUs + * might stay spinning. They would get stopped anyway. The panic context + * will never start spinning and an interrupted spin on panic CPU will + * never continue. + */ + if (panic_in_progress()) { + /* Keep lockdep happy. */ + spin_release(&console_owner_dep_map, _THIS_IP_); + return 0; + } + raw_spin_lock(&console_owner_lock); waiter = READ_ONCE(console_waiter); console_owner = NULL; @@ -1743,6 +1976,12 @@ static int console_lock_spinning_disable_and_check(void) spin_release(&console_owner_dep_map, _THIS_IP_); /* + * Preserve lockdep lock ordering. Release the SRCU read lock before + * releasing the console_lock. + */ + console_srcu_read_unlock(cookie); + + /* * Hand off console_lock to waiter. The waiter will perform * the up(). After this, the waiter is the console_lock owner. */ @@ -1770,6 +2009,16 @@ static int console_trylock_spinning(void) if (console_trylock()) return 1; + /* + * It's unsafe to spin once a panic has begun. If we are the + * panic CPU, we may have already halted the owner of the + * console_sem. If we are not the panic CPU, then we should + * avoid taking console_sem, so the panic CPU has a better + * chance of cleanly acquiring it later. + */ + if (panic_in_progress()) + return 0; + printk_safe_enter_irqsave(flags); raw_spin_lock(&console_owner_lock); @@ -1811,57 +2060,91 @@ static int console_trylock_spinning(void) */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + /* + * Update @console_may_schedule for trylock because the previous + * owner may have been schedulable. + */ + console_may_schedule = 0; + return 1; } /* - * Call the console drivers, asking them to write out - * log_buf[start] to log_buf[end - 1]. - * The console_lock must be held. + * Recursion is tracked separately on each CPU. If NMIs are supported, an + * additional NMI context per CPU is also separately tracked. Until per-CPU + * is available, a separate "early tracking" is performed. */ -static void call_console_drivers(const char *ext_text, size_t ext_len, - const char *text, size_t len) -{ - static char dropped_text[64]; - size_t dropped_len = 0; - struct console *con; - - trace_console_rcuidle(text, len); - - if (!console_drivers) - return; +static DEFINE_PER_CPU(u8, printk_count); +static u8 printk_count_early; +#ifdef CONFIG_HAVE_NMI +static DEFINE_PER_CPU(u8, printk_count_nmi); +static u8 printk_count_nmi_early; +#endif - if (console_dropped) { - dropped_len = snprintf(dropped_text, sizeof(dropped_text), - "** %lu printk messages dropped **\n", - console_dropped); - console_dropped = 0; - } +/* + * Recursion is limited to keep the output sane. printk() should not require + * more than 1 level of recursion (allowing, for example, printk() to trigger + * a WARN), but a higher value is used in case some printk-internal errors + * exist, such as the ringbuffer validation checks failing. + */ +#define PRINTK_MAX_RECURSION 3 - for_each_console(con) { - if (exclusive_console && con != exclusive_console) - continue; - if (!(con->flags & CON_ENABLED)) - continue; - if (!con->write) - continue; - if (!cpu_online(smp_processor_id()) && - !(con->flags & CON_ANYTIME)) - continue; - if (con->flags & CON_EXTENDED) - con->write(con, ext_text, ext_len); - else { - if (dropped_len) - con->write(con, dropped_text, dropped_len); - con->write(con, text, len); - } +/* + * Return a pointer to the dedicated counter for the CPU+context of the + * caller. + */ +static u8 *__printk_recursion_counter(void) +{ +#ifdef CONFIG_HAVE_NMI + if (in_nmi()) { + if (printk_percpu_data_ready()) + return this_cpu_ptr(&printk_count_nmi); + return &printk_count_nmi_early; } +#endif + if (printk_percpu_data_ready()) + return this_cpu_ptr(&printk_count); + return &printk_count_early; } +/* + * Enter recursion tracking. Interrupts are disabled to simplify tracking. + * The caller must check the boolean return value to see if the recursion is + * allowed. On failure, interrupts are not disabled. + * + * @recursion_ptr must be a variable of type (u8 *) and is the same variable + * that is passed to printk_exit_irqrestore(). + */ +#define printk_enter_irqsave(recursion_ptr, flags) \ +({ \ + bool success = true; \ + \ + typecheck(u8 *, recursion_ptr); \ + local_irq_save(flags); \ + (recursion_ptr) = __printk_recursion_counter(); \ + if (*(recursion_ptr) > PRINTK_MAX_RECURSION) { \ + local_irq_restore(flags); \ + success = false; \ + } else { \ + (*(recursion_ptr))++; \ + } \ + success; \ +}) + +/* Exit recursion tracking, restoring interrupts. */ +#define printk_exit_irqrestore(recursion_ptr, flags) \ + do { \ + typecheck(u8 *, recursion_ptr); \ + (*(recursion_ptr))--; \ + local_irq_restore(flags); \ + } while (0) + int printk_delay_msec __read_mostly; -static inline void printk_delay(void) +static inline void printk_delay(int level) { + boot_delay_msec(level); + if (unlikely(printk_delay_msec)) { int m = printk_delay_msec; @@ -1875,27 +2158,28 @@ static inline void printk_delay(void) static inline u32 printk_caller_id(void) { return in_task() ? task_pid_nr(current) : - 0x80000000 + raw_smp_processor_id(); + 0x80000000 + smp_processor_id(); } /** - * parse_prefix - Parse level and control flags. + * printk_parse_prefix - Parse level and control flags. * * @text: The terminated text message. * @level: A pointer to the current level value, will be updated. - * @lflags: A pointer to the current log flags, will be updated. + * @flags: A pointer to the current printk_info flags, will be updated. * * @level may be NULL if the caller is not interested in the parsed value. * Otherwise the variable pointed to by @level must be set to * LOGLEVEL_DEFAULT in order to be updated with the parsed value. * - * @lflags may be NULL if the caller is not interested in the parsed value. - * Otherwise the variable pointed to by @lflags will be OR'd with the parsed + * @flags may be NULL if the caller is not interested in the parsed value. + * Otherwise the variable pointed to by @flags will be OR'd with the parsed * value. * * Return: The length of the parsed level and control flags. */ -static u16 parse_prefix(char *text, int *level, enum log_flags *lflags) +u16 printk_parse_prefix(const char *text, int *level, + enum printk_info_flags *flags) { u16 prefix_len = 0; int kern_level; @@ -1911,8 +2195,8 @@ static u16 parse_prefix(char *text, int *level, enum log_flags *lflags) *level = kern_level - '0'; break; case 'c': /* KERN_CONT */ - if (lflags) - *lflags |= LOG_CONT; + if (flags) + *flags |= LOG_CONT; } prefix_len += 2; @@ -1922,8 +2206,10 @@ static u16 parse_prefix(char *text, int *level, enum log_flags *lflags) return prefix_len; } -static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lflags, - const char *fmt, va_list args) +__printf(5, 0) +static u16 printk_sprint(char *text, u16 size, int facility, + enum printk_info_flags *flags, const char *fmt, + va_list args) { u16 text_len; @@ -1932,20 +2218,22 @@ static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lfl /* Mark and strip a trailing newline. */ if (text_len && text[text_len - 1] == '\n') { text_len--; - *lflags |= LOG_NEWLINE; + *flags |= LOG_NEWLINE; } /* Strip log level and control flags. */ if (facility == 0) { u16 prefix_len; - prefix_len = parse_prefix(text, NULL, NULL); + prefix_len = printk_parse_prefix(text, NULL, NULL); if (prefix_len) { text_len -= prefix_len; memmove(text, text + prefix_len, text_len); } } + trace_console(text, text_len); + return text_len; } @@ -1954,17 +2242,23 @@ int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args) { - const u32 caller_id = printk_caller_id(); struct prb_reserved_entry e; - enum log_flags lflags = 0; + enum printk_info_flags flags = 0; struct printk_record r; + unsigned long irqflags; u16 trunc_msg_len = 0; char prefix_buf[8]; + u8 *recursion_ptr; u16 reserve_size; va_list args2; + u32 caller_id; u16 text_len; + int ret = 0; u64 ts_nsec; + if (!printk_enter_irqsave(recursion_ptr, irqflags)) + return 0; + /* * Since the duration of printk() can vary depending on the message * and state of the ringbuffer, grab the timestamp now so that it is @@ -1973,6 +2267,8 @@ int vprintk_store(int facility, int level, */ ts_nsec = local_clock(); + caller_id = printk_caller_id(); + /* * The sprintf needs to come first since the syslog prefix might be * passed in as a parameter. An extra byte must be reserved so that @@ -1983,34 +2279,41 @@ int vprintk_store(int facility, int level, reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1; va_end(args2); - if (reserve_size > LOG_LINE_MAX) - reserve_size = LOG_LINE_MAX; + if (reserve_size > PRINTKRB_RECORD_MAX) + reserve_size = PRINTKRB_RECORD_MAX; /* Extract log level or control flags. */ if (facility == 0) - parse_prefix(&prefix_buf[0], &level, &lflags); + printk_parse_prefix(&prefix_buf[0], &level, &flags); if (level == LOGLEVEL_DEFAULT) level = default_message_loglevel; if (dev_info) - lflags |= LOG_NEWLINE; + flags |= LOG_NEWLINE; + + if (is_printk_force_console()) + flags |= LOG_FORCE_CON; - if (lflags & LOG_CONT) { + if (flags & LOG_CONT) { prb_rec_init_wr(&r, reserve_size); - if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { + if (prb_reserve_in_last(&e, prb, &r, caller_id, PRINTKRB_RECORD_MAX)) { text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, - facility, &lflags, fmt, args); + facility, &flags, fmt, args); r.info->text_len += text_len; - if (lflags & LOG_NEWLINE) { + if (flags & LOG_FORCE_CON) + r.info->flags |= LOG_FORCE_CON; + + if (flags & LOG_NEWLINE) { r.info->flags |= LOG_NEWLINE; prb_final_commit(&e); } else { prb_commit(&e); } - return text_len; + ret = text_len; + goto out; } } @@ -2026,156 +2329,153 @@ int vprintk_store(int facility, int level, prb_rec_init_wr(&r, reserve_size + trunc_msg_len); if (!prb_reserve(&e, prb, &r)) - return 0; + goto out; } /* fill message */ - text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args); + text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args); if (trunc_msg_len) memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); r.info->text_len = text_len + trunc_msg_len; r.info->facility = facility; r.info->level = level & 7; - r.info->flags = lflags & 0x1f; + r.info->flags = flags & 0x1f; r.info->ts_nsec = ts_nsec; r.info->caller_id = caller_id; if (dev_info) memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); /* A message without a trailing newline can be continued. */ - if (!(lflags & LOG_NEWLINE)) + if (!(flags & LOG_NEWLINE)) prb_commit(&e); else prb_final_commit(&e); - return (text_len + trunc_msg_len); + ret = text_len + trunc_msg_len; +out: + printk_exit_irqrestore(recursion_ptr, irqflags); + return ret; +} + +/* + * This acts as a one-way switch to allow legacy consoles to print from + * the printk() caller context on a panic CPU. It also attempts to flush + * the legacy consoles in this context. + */ +void printk_legacy_allow_panic_sync(void) +{ + struct console_flush_type ft; + + legacy_allow_panic_sync = true; + + printk_get_console_flush_type(&ft); + if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } } asmlinkage int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args) { + struct console_flush_type ft; int printed_len; - bool in_sched = false; - unsigned long flags; /* Suppress unimportant messages after panic happens */ if (unlikely(suppress_printk)) return 0; + /* + * The messages on the panic CPU are the most important. If + * non-panic CPUs are generating any messages, they will be + * silently dropped. + */ + if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) + return 0; + + printk_get_console_flush_type(&ft); + + /* If called from the scheduler, we can not call up(). */ if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; - in_sched = true; + ft.legacy_offload |= ft.legacy_direct; + ft.legacy_direct = false; } - boot_delay_msec(level); - printk_delay(); + printk_delay(level); - printk_safe_enter_irqsave(flags); printed_len = vprintk_store(facility, level, dev_info, fmt, args); - printk_safe_exit_irqrestore(flags); - /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + + if (ft.legacy_direct) { /* - * Disable preemption to avoid being preempted while holding - * console_sem which would prevent anyone from printing to - * console + * The caller may be holding system-critical or + * timing-sensitive locks. Disable preemption during + * printing of all remaining records to all consoles so that + * this context can return as soon as possible. Hopefully + * another printk() caller will take over the printing. */ preempt_disable(); /* * Try to acquire and then immediately release the console - * semaphore. The release will print out buffers and wake up - * /dev/kmsg and syslog() users. + * semaphore. The release will print out buffers. With the + * spinning variant, this context tries to take over the + * printing from another printing context. */ if (console_trylock_spinning()) console_unlock(); preempt_enable(); } - wake_up_klogd(); + if (ft.legacy_offload) + defer_console_output(); + else + wake_up_klogd(); + return printed_len; } EXPORT_SYMBOL(vprintk_emit); -asmlinkage int vprintk(const char *fmt, va_list args) -{ - return vprintk_func(fmt, args); -} -EXPORT_SYMBOL(vprintk); - int vprintk_default(const char *fmt, va_list args) { return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); } EXPORT_SYMBOL_GPL(vprintk_default); -/** - * printk - print a kernel message - * @fmt: format string - * - * This is printk(). It can be called from any context. We want it to work. - * - * We try to grab the console_lock. If we succeed, it's easy - we log the - * output and call the console drivers. If we fail to get the semaphore, we - * place the output into the log buffer and return. The current holder of - * the console_sem will notice the new output in console_unlock(); and will - * send it to the consoles before releasing the lock. - * - * One effect of this deferred printing is that code which calls printk() and - * then changes console_loglevel may break. This is because console_loglevel - * is inspected when the actual printing occurs. - * - * See also: - * printf(3) - * - * See the vsnprintf() documentation for format string extensions over C99. - */ -asmlinkage __visible int printk(const char *fmt, ...) +asmlinkage __visible int _printk(const char *fmt, ...) { va_list args; int r; va_start(args, fmt); - r = vprintk_func(fmt, args); + r = vprintk(fmt, args); va_end(args); return r; } -EXPORT_SYMBOL(printk); +EXPORT_SYMBOL(_printk); + +static bool pr_flush(int timeout_ms, bool reset_on_progress); +static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); #else /* CONFIG_PRINTK */ -#define LOG_LINE_MAX 0 -#define PREFIX_MAX 0 #define printk_time false #define prb_read_valid(rb, seq, r) false #define prb_first_valid_seq(rb) 0 +#define prb_next_seq(rb) 0 static u64 syslog_seq; -static u64 console_seq; -static u64 exclusive_console_stop_seq; -static unsigned long console_dropped; -static size_t record_print_text(const struct printk_record *r, - bool syslog, bool time) -{ - return 0; -} -static ssize_t info_print_ext_header(char *buf, size_t size, - struct printk_info *info) -{ - return 0; -} -static ssize_t msg_print_ext_body(char *buf, size_t size, - char *text, size_t text_len, - struct dev_printk_info *dev_info) { return 0; } -static void console_lock_spinning_enable(void) { } -static int console_lock_spinning_disable_and_check(void) { return 0; } -static void call_console_drivers(const char *ext_text, size_t ext_len, - const char *text, size_t len) {} -static bool suppress_message_printing(int level) { return false; } +static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } +static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } #endif /* CONFIG_PRINTK */ @@ -2199,24 +2499,51 @@ asmlinkage __visible void early_printk(const char *fmt, ...) } #endif -static int __add_preferred_console(char *name, int idx, char *options, +static void set_user_specified(struct console_cmdline *c, bool user_specified) +{ + if (!user_specified) + return; + + /* + * @c console was defined by the user on the command line. + * Do not clear when added twice also by SPCR or the device tree. + */ + c->user_specified = true; + /* At least one console defined by the user on the command line. */ + console_set_on_cmdline = 1; +} + +static int __add_preferred_console(const char *name, const short idx, + const char *devname, char *options, char *brl_options, bool user_specified) { struct console_cmdline *c; int i; + if (!name && !devname) + return -EINVAL; + + /* + * We use a signed short index for struct console for device drivers to + * indicate a not yet assigned index or port. However, a negative index + * value is not valid when the console name and index are defined on + * the command line. + */ + if (name && idx < 0) + return -EINVAL; + /* * See if this tty is not yet registered, and * if we have a slot free. */ for (i = 0, c = console_cmdline; - i < MAX_CMDLINECONSOLES && c->name[0]; + i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); i++, c++) { - if (strcmp(c->name, name) == 0 && c->index == idx) { + if ((name && strcmp(c->name, name) == 0 && c->index == idx) || + (devname && strcmp(c->devname, devname) == 0)) { if (!brl_options) preferred_console = i; - if (user_specified) - c->user_specified = true; + set_user_specified(c, user_specified); return 0; } } @@ -2224,9 +2551,12 @@ static int __add_preferred_console(char *name, int idx, char *options, return -E2BIG; if (!brl_options) preferred_console = i; - strlcpy(c->name, name, sizeof(c->name)); + if (name) + strscpy(c->name, name); + if (devname) + strscpy(c->devname, devname); c->options = options; - c->user_specified = user_specified; + set_user_specified(c, user_specified); braille_set_options(c, brl_options); c->index = idx; @@ -2249,50 +2579,66 @@ __setup("console_msg_format=", console_msg_format_setup); */ static int __init console_setup(char *str) { - char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */ - char *s, *options, *brl_options = NULL; + static_assert(sizeof(console_cmdline[0].devname) >= sizeof(console_cmdline[0].name) + 4); + char buf[sizeof(console_cmdline[0].devname)]; + char *brl_options = NULL; + char *ttyname = NULL; + char *devname = NULL; + char *options; + char *s; int idx; /* * console="" or console=null have been suggested as a way to * disable console output. Use ttynull that has been created - * for exacly this purpose. + * for exactly this purpose. */ if (str[0] == 0 || strcmp(str, "null") == 0) { - __add_preferred_console("ttynull", 0, NULL, NULL, true); + __add_preferred_console("ttynull", 0, NULL, NULL, NULL, true); return 1; } if (_braille_console_setup(&str, &brl_options)) return 1; + /* For a DEVNAME:0.0 style console the character device is unknown early */ + if (strchr(str, ':')) + devname = buf; + else + ttyname = buf; + /* * Decode str into name, index, options. */ - if (str[0] >= '0' && str[0] <= '9') { - strcpy(buf, "ttyS"); - strncpy(buf + 4, str, sizeof(buf) - 5); - } else { - strncpy(buf, str, sizeof(buf) - 1); - } - buf[sizeof(buf) - 1] = 0; + if (ttyname && isdigit(str[0])) + scnprintf(buf, sizeof(buf), "ttyS%s", str); + else + strscpy(buf, str); + options = strchr(str, ','); if (options) *(options++) = 0; + #ifdef __sparc__ if (!strcmp(str, "ttya")) - strcpy(buf, "ttyS0"); + strscpy(buf, "ttyS0"); if (!strcmp(str, "ttyb")) - strcpy(buf, "ttyS1"); + strscpy(buf, "ttyS1"); #endif + for (s = buf; *s; s++) - if (isdigit(*s) || *s == ',') + if ((ttyname && isdigit(*s)) || *s == ',') break; - idx = simple_strtoul(s, NULL, 10); + + /* @idx will get defined when devname matches. */ + if (devname) + idx = -1; + else + idx = simple_strtoul(s, NULL, 10); + *s = 0; - __add_preferred_console(buf, idx, options, brl_options, true); - console_set_on_cmdline = 1; + __add_preferred_console(ttyname, idx, devname, options, brl_options, true); return 1; } __setup("console=", console_setup); @@ -2310,10 +2656,55 @@ __setup("console=", console_setup); * commonly to provide a default console (ie from PROM variables) when * the user has not supplied one. */ -int add_preferred_console(char *name, int idx, char *options) +int add_preferred_console(const char *name, const short idx, char *options) +{ + return __add_preferred_console(name, idx, NULL, options, NULL, false); +} + +/** + * match_devname_and_update_preferred_console - Update a preferred console + * when matching devname is found. + * @devname: DEVNAME:0.0 style device name + * @name: Name of the corresponding console driver, e.g. "ttyS" + * @idx: Console index, e.g. port number. + * + * The function checks whether a device with the given @devname is + * preferred via the console=DEVNAME:0.0 command line option. + * It fills the missing console driver name and console index + * so that a later register_console() call could find (match) + * and enable this device. + * + * It might be used when a driver subsystem initializes particular + * devices with already known DEVNAME:0.0 style names. And it + * could predict which console driver name and index this device + * would later get associated with. + * + * Return: 0 on success, negative error code on failure. + */ +int match_devname_and_update_preferred_console(const char *devname, + const char *name, + const short idx) { - return __add_preferred_console(name, idx, options, NULL, false); + struct console_cmdline *c = console_cmdline; + int i; + + if (!devname || !strlen(devname) || !name || !strlen(name) || idx < 0) + return -EINVAL; + + for (i = 0; i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); + i++, c++) { + if (!strcmp(devname, c->devname)) { + pr_info("associate the preferred console \"%s\" with \"%s%d\"\n", + devname, name, idx); + strscpy(c->name, name); + c->index = idx; + return 0; + } + } + + return -ENOENT; } +EXPORT_SYMBOL_GPL(match_devname_and_update_preferred_console); bool console_suspend_enabled = true; EXPORT_SYMBOL(console_suspend_enabled); @@ -2329,6 +2720,18 @@ module_param_named(console_suspend, console_suspend_enabled, MODULE_PARM_DESC(console_suspend, "suspend console during suspend" " and hibernate operations"); +static bool printk_console_no_auto_verbose; + +void console_verbose(void) +{ + if (console_loglevel && !printk_console_no_auto_verbose) + console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; +} +EXPORT_SYMBOL_GPL(console_verbose); + +module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool, 0644); +MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc"); + /** * suspend_console - suspend the console subsystem * @@ -2336,21 +2739,54 @@ MODULE_PARM_DESC(console_suspend, "suspend console during suspend" */ void suspend_console(void) { + struct console *con; + if (!console_suspend_enabled) return; pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); - console_lock(); - console_suspended = 1; - up_console_sem(); + pr_flush(1000, true); + + console_list_lock(); + for_each_console(con) + console_srcu_write_flags(con, con->flags | CON_SUSPENDED); + console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. All printing + * contexts must be able to see that they are suspended so that it + * is guaranteed that all printing has stopped when this function + * completes. + */ + synchronize_srcu(&console_srcu); } void resume_console(void) { + struct console_flush_type ft; + struct console *con; + if (!console_suspend_enabled) return; - down_console_sem(); - console_suspended = 0; - console_unlock(); + + console_list_lock(); + for_each_console(con) + console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); + console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. All printing + * contexts must be able to see they are no longer suspended so + * that they are guaranteed to wake up and resume printing. + */ + synchronize_srcu(&console_srcu); + + printk_get_console_flush_type(&ft); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + if (ft.legacy_offload) + defer_console_output(); + + pr_flush(1000, true); } /** @@ -2364,19 +2800,25 @@ void resume_console(void) */ static int console_cpu_notify(unsigned int cpu) { + struct console_flush_type ft; + if (!cpuhp_tasks_frozen) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.legacy_direct) { + if (console_trylock()) + console_unlock(); + } } return 0; } /** - * console_lock - lock the console system for exclusive use. + * console_lock - block the console subsystem from printing * - * Acquires a lock which guarantees that the caller has - * exclusive access to the console system and the console_drivers list. + * Acquires a lock which guarantees that no consoles will + * be in or enter their write() callback. * * Can sleep, returns nothing. */ @@ -2384,30 +2826,31 @@ void console_lock(void) { might_sleep(); + /* On panic, the console_lock must be left to the panic cpu. */ + while (other_cpu_in_panic()) + msleep(1000); + down_console_sem(); - if (console_suspended) - return; console_locked = 1; console_may_schedule = 1; } EXPORT_SYMBOL(console_lock); /** - * console_trylock - try to lock the console system for exclusive use. + * console_trylock - try to block the console subsystem from printing * - * Try to acquire a lock which guarantees that the caller has exclusive - * access to the console system and the console_drivers list. + * Try to acquire a lock which guarantees that no consoles will + * be in or enter their write() callback. * * returns 1 on success, and 0 on failure to acquire the lock. */ int console_trylock(void) { - if (down_trylock_console_sem()) + /* On panic, the console_lock must be left to the panic cpu. */ + if (other_cpu_in_panic()) return 0; - if (console_suspended) { - up_console_sem(); + if (down_trylock_console_sem()) return 0; - } console_locked = 1; console_may_schedule = 0; return 1; @@ -2420,186 +2863,452 @@ int is_console_locked(void) } EXPORT_SYMBOL(is_console_locked); +static void __console_unlock(void) +{ + console_locked = 0; + up_console_sem(); +} + +#ifdef CONFIG_PRINTK + /* - * Check if we have any console that is capable of printing while cpu is - * booting or shutting down. Requires console_sem. + * Prepend the message in @pmsg->pbufs->outbuf. This is achieved by shifting + * the existing message over and inserting the scratchbuf message. + * + * @pmsg is the original printk message. + * @fmt is the printf format of the message which will prepend the existing one. + * + * If there is not enough space in @pmsg->pbufs->outbuf, the existing + * message text will be sufficiently truncated. + * + * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. */ -static int have_callable_console(void) +__printf(2, 3) +static void console_prepend_message(struct printk_message *pmsg, const char *fmt, ...) { - struct console *con; + struct printk_buffers *pbufs = pmsg->pbufs; + const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); + const size_t outbuf_sz = sizeof(pbufs->outbuf); + char *scratchbuf = &pbufs->scratchbuf[0]; + char *outbuf = &pbufs->outbuf[0]; + va_list args; + size_t len; - for_each_console(con) - if ((con->flags & CON_ENABLED) && - (con->flags & CON_ANYTIME)) - return 1; + va_start(args, fmt); + len = vscnprintf(scratchbuf, scratchbuf_sz, fmt, args); + va_end(args); - return 0; + /* + * Make sure outbuf is sufficiently large before prepending. + * Keep at least the prefix when the message must be truncated. + * It is a rather theoretical problem when someone tries to + * use a minimalist buffer. + */ + if (WARN_ON_ONCE(len + PRINTK_PREFIX_MAX >= outbuf_sz)) + return; + + if (pmsg->outbuf_len + len >= outbuf_sz) { + /* Truncate the message, but keep it terminated. */ + pmsg->outbuf_len = outbuf_sz - (len + 1); + outbuf[pmsg->outbuf_len] = 0; + } + + memmove(outbuf + len, outbuf, pmsg->outbuf_len + 1); + memcpy(outbuf, scratchbuf, len); + pmsg->outbuf_len += len; } /* - * Can we actually use the console at this time on this cpu? + * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". + * @pmsg->outbuf_len is updated appropriately. * - * Console drivers may assume that per-cpu resources have been allocated. So - * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't - * call them until this CPU is officially up. + * @pmsg is the printk message to prepend. + * + * @dropped is the dropped count to report in the dropped message. */ -static inline int can_use_console(void) +void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) { - return cpu_online(raw_smp_processor_id()) || have_callable_console(); + console_prepend_message(pmsg, "** %lu printk messages dropped **\n", dropped); } -/** - * console_unlock - unlock the console system +/* + * Prepend the message in @pmsg->pbufs->outbuf with a "replay message". + * @pmsg->outbuf_len is updated appropriately. + * + * @pmsg is the printk message to prepend. + */ +void console_prepend_replay(struct printk_message *pmsg) +{ + console_prepend_message(pmsg, "** replaying previous printk message **\n"); +} + +/* + * Read and format the specified record (or a later record if the specified + * record is not available). * - * Releases the console_lock which the caller holds on the console system - * and the console driver list. + * @pmsg will contain the formatted result. @pmsg->pbufs must point to a + * struct printk_buffers. * - * While the console_lock was held, console output may have been buffered - * by printk(). If this is the case, console_unlock(); emits - * the output prior to releasing the lock. + * @seq is the record to read and format. If it is not available, the next + * valid record is read. * - * If there is output waiting, we wake /dev/kmsg and syslog() users. + * @is_extended specifies if the message should be formatted for extended + * console output. * - * console_unlock(); may be called from any context. + * @may_supress specifies if records may be skipped based on loglevel. + * + * Returns false if no record is available. Otherwise true and all fields + * of @pmsg are valid. (See the documentation of struct printk_message + * for information about the @pmsg fields.) */ -void console_unlock(void) +bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + bool is_extended, bool may_suppress) { - static char ext_text[CONSOLE_EXT_LOG_MAX]; - static char text[LOG_LINE_MAX + PREFIX_MAX]; - unsigned long flags; - bool do_cond_resched, retry; + struct printk_buffers *pbufs = pmsg->pbufs; + const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); + const size_t outbuf_sz = sizeof(pbufs->outbuf); + char *scratchbuf = &pbufs->scratchbuf[0]; + char *outbuf = &pbufs->outbuf[0]; struct printk_info info; struct printk_record r; - - if (console_suspended) { - up_console_sem(); - return; - } - - prb_rec_init_rd(&r, &info, text, sizeof(text)); + size_t len = 0; + bool force_con; /* - * Console drivers are called with interrupts disabled, so - * @console_may_schedule should be cleared before; however, we may - * end up dumping a lot of lines, for example, if called from - * console registration path, and should invoke cond_resched() - * between lines if allowable. Not doing so can cause a very long - * scheduling stall on a slow console leading to RCU stall and - * softlockup warnings which exacerbate the issue with more - * messages practically incapacitating the system. + * Formatting extended messages requires a separate buffer, so use the + * scratch buffer to read in the ringbuffer text. * - * console_trylock() is not able to detect the preemptive - * context reliably. Therefore the value must be stored before - * and cleared after the "again" goto label. + * Formatting normal messages is done in-place, so read the ringbuffer + * text directly into the output buffer. */ - do_cond_resched = console_may_schedule; -again: - console_may_schedule = 0; + if (is_extended) + prb_rec_init_rd(&r, &info, scratchbuf, scratchbuf_sz); + else + prb_rec_init_rd(&r, &info, outbuf, outbuf_sz); + + if (!prb_read_valid(prb, seq, &r)) + return false; + + pmsg->seq = r.info->seq; + pmsg->dropped = r.info->seq - seq; + force_con = r.info->flags & LOG_FORCE_CON; /* - * We released the console_sem lock, so we need to recheck if - * cpu is online and (if not) is there at least one CON_ANYTIME - * console. + * Skip records that are not forced to be printed on consoles and that + * has level above the console loglevel. */ - if (!can_use_console()) { - console_locked = 0; - up_console_sem(); - return; + if (!force_con && may_suppress && suppress_message_printing(r.info->level)) + goto out; + + if (is_extended) { + len = info_print_ext_header(outbuf, outbuf_sz, r.info); + len += msg_print_ext_body(outbuf + len, outbuf_sz - len, + &r.text_buf[0], r.info->text_len, &r.info->dev_info); + } else { + len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); } +out: + pmsg->outbuf_len = len; + return true; +} - for (;;) { - size_t ext_len = 0; - size_t len; +/* + * Legacy console printing from printk() caller context does not respect + * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a + * false positive. For PREEMPT_RT the false positive condition does not + * occur. + * + * This map is used to temporarily establish LD_WAIT_SLEEP context for the + * console write() callback when legacy printing to avoid false positive + * lockdep complaints, thus allowing lockdep to continue to function for + * real issues. + */ +#ifdef CONFIG_PREEMPT_RT +static inline void printk_legacy_allow_spinlock_enter(void) { } +static inline void printk_legacy_allow_spinlock_exit(void) { } +#else +static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP); - printk_safe_enter_irqsave(flags); - raw_spin_lock(&logbuf_lock); -skip: - if (!prb_read_valid(prb, console_seq, &r)) - break; +static inline void printk_legacy_allow_spinlock_enter(void) +{ + lock_map_acquire_try(&printk_legacy_map); +} - if (console_seq != r.info->seq) { - console_dropped += r.info->seq - console_seq; - console_seq = r.info->seq; - } +static inline void printk_legacy_allow_spinlock_exit(void) +{ + lock_map_release(&printk_legacy_map); +} +#endif /* CONFIG_PREEMPT_RT */ - if (suppress_message_printing(r.info->level)) { - /* - * Skip record we have buffered and already printed - * directly to the console when we received it, and - * record that has level above the console loglevel. - */ - console_seq++; - goto skip; - } +/* + * Used as the printk buffers for non-panic, serialized console printing. + * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles. + * Its usage requires the console_lock held. + */ +struct printk_buffers printk_shared_pbufs; - /* Output to all consoles once old messages replayed. */ - if (unlikely(exclusive_console && - console_seq >= exclusive_console_stop_seq)) { - exclusive_console = NULL; - } +/* + * Print one record for the given console. The record printed is whatever + * record is the next available record for the given console. + * + * @handover will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding both the + * console_lock and the SRCU read lock. Otherwise it is set to false. + * + * @cookie is the cookie from the SRCU read lock. + * + * Returns false if the given console has no next record to print, otherwise + * true. + * + * Requires the console_lock and the SRCU read lock. + */ +static bool console_emit_next_record(struct console *con, bool *handover, int cookie) +{ + bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; + char *outbuf = &printk_shared_pbufs.outbuf[0]; + struct printk_message pmsg = { + .pbufs = &printk_shared_pbufs, + }; + unsigned long flags; + + *handover = false; + + if (!printk_get_next_message(&pmsg, con->seq, is_extended, true)) + return false; + + con->dropped += pmsg.dropped; + + /* Skip messages of formatted length 0. */ + if (pmsg.outbuf_len == 0) { + con->seq = pmsg.seq + 1; + goto skip; + } + + if (con->dropped && !is_extended) { + console_prepend_dropped(&pmsg, con->dropped); + con->dropped = 0; + } + + /* Write everything out to the hardware. */ + if (force_legacy_kthread() && !panic_in_progress()) { /* - * Handle extended console text first because later - * record_print_text() will modify the record buffer in-place. + * With forced threading this function is in a task context + * (either legacy kthread or get_init_console_seq()). There + * is no need for concern about printk reentrance, handovers, + * or lockdep complaints. */ - if (nr_ext_console_drivers) { - ext_len = info_print_ext_header(ext_text, - sizeof(ext_text), - r.info); - ext_len += msg_print_ext_body(ext_text + ext_len, - sizeof(ext_text) - ext_len, - &r.text_buf[0], - r.info->text_len, - &r.info->dev_info); - } - len = record_print_text(&r, - console_msg_format & MSG_FORMAT_SYSLOG, - printk_time); - console_seq++; - raw_spin_unlock(&logbuf_lock); + con->write(con, outbuf, pmsg.outbuf_len); + con->seq = pmsg.seq + 1; + } else { /* * While actively printing out messages, if another printk() * were to occur on another CPU, it may wait for this one to * finish. This task can not be preempted if there is a * waiter waiting to take over. + * + * Interrupts are disabled because the hand over to a waiter + * must not be interrupted until the hand over is completed + * (@console_waiter is cleared). */ + printk_safe_enter_irqsave(flags); console_lock_spinning_enable(); - stop_critical_timings(); /* don't trace print latency */ - call_console_drivers(ext_text, ext_len, text, len); + /* Do not trace print latency. */ + stop_critical_timings(); + + printk_legacy_allow_spinlock_enter(); + con->write(con, outbuf, pmsg.outbuf_len); + printk_legacy_allow_spinlock_exit(); + start_critical_timings(); - if (console_lock_spinning_disable_and_check()) { - printk_safe_exit_irqrestore(flags); - return; - } + con->seq = pmsg.seq + 1; + *handover = console_lock_spinning_disable_and_check(cookie); printk_safe_exit_irqrestore(flags); - - if (do_cond_resched) - cond_resched(); } +skip: + return true; +} - console_locked = 0; +#else - raw_spin_unlock(&logbuf_lock); +static bool console_emit_next_record(struct console *con, bool *handover, int cookie) +{ + *handover = false; + return false; +} - up_console_sem(); +static inline void printk_kthreads_check_locked(void) { } + +#endif /* CONFIG_PRINTK */ + +/* + * Print out all remaining records to all consoles. + * + * @do_cond_resched is set by the caller. It can be true only in schedulable + * context. + * + * @next_seq is set to the sequence number after the last available record. + * The value is valid only when this function returns true. It means that all + * usable consoles are completely flushed. + * + * @handover will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding the + * console_lock. Otherwise it is set to false. + * + * Returns true when there was at least one usable console and all messages + * were flushed to all usable consoles. A returned false informs the caller + * that everything was not flushed (either there were no usable consoles or + * another context has taken over printing or it is a panic situation and this + * is not the panic CPU). Regardless the reason, the caller should assume it + * is not useful to immediately try again. + * + * Requires the console_lock. + */ +static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) +{ + struct console_flush_type ft; + bool any_usable = false; + struct console *con; + bool any_progress; + int cookie; + + *next_seq = 0; + *handover = false; + + do { + any_progress = false; + + printk_get_console_flush_type(&ft); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + u64 printk_seq; + bool progress; + + /* + * console_flush_all() is only responsible for nbcon + * consoles when the nbcon consoles cannot print via + * their atomic or threaded flushing. + */ + if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) + continue; + + if (!console_is_usable(con, flags, !do_cond_resched)) + continue; + any_usable = true; + + if (flags & CON_NBCON) { + progress = nbcon_legacy_emit_next_record(con, handover, cookie, + !do_cond_resched); + printk_seq = nbcon_seq_read(con); + } else { + progress = console_emit_next_record(con, handover, cookie); + printk_seq = con->seq; + } + + /* + * If a handover has occurred, the SRCU read lock + * is already released. + */ + if (*handover) + return false; + + /* Track the next of the highest seq flushed. */ + if (printk_seq > *next_seq) + *next_seq = printk_seq; + + if (!progress) + continue; + any_progress = true; + + /* Allow panic_cpu to take over the consoles safely. */ + if (other_cpu_in_panic()) + goto abandon; + + if (do_cond_resched) + cond_resched(); + } + console_srcu_read_unlock(cookie); + } while (any_progress); + + return any_usable; + +abandon: + console_srcu_read_unlock(cookie); + return false; +} + +static void __console_flush_and_unlock(void) +{ + bool do_cond_resched; + bool handover; + bool flushed; + u64 next_seq; /* - * Someone could have filled up the buffer again, so re-check if there's - * something to flush. In case we cannot trylock the console_sem again, - * there's a new owner and the console_unlock() from them will do the - * flush, no worries. + * Console drivers are called with interrupts disabled, so + * @console_may_schedule should be cleared before; however, we may + * end up dumping a lot of lines, for example, if called from + * console registration path, and should invoke cond_resched() + * between lines if allowable. Not doing so can cause a very long + * scheduling stall on a slow console leading to RCU stall and + * softlockup warnings which exacerbate the issue with more + * messages practically incapacitating the system. Therefore, create + * a local to use for the printing loop. */ - raw_spin_lock(&logbuf_lock); - retry = prb_read_valid(prb, console_seq, NULL); - raw_spin_unlock(&logbuf_lock); - printk_safe_exit_irqrestore(flags); + do_cond_resched = console_may_schedule; + + do { + console_may_schedule = 0; + + flushed = console_flush_all(do_cond_resched, &next_seq, &handover); + if (!handover) + __console_unlock(); + + /* + * Abort if there was a failure to flush all messages to all + * usable consoles. Either it is not possible to flush (in + * which case it would be an infinite loop of retrying) or + * another context has taken over printing. + */ + if (!flushed) + break; - if (retry && console_trylock()) - goto again; + /* + * Some context may have added new records after + * console_flush_all() but before unlocking the console. + * Re-check if there is a new record to flush. If the trylock + * fails, another context is already handling the printing. + */ + } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); +} + +/** + * console_unlock - unblock the legacy console subsystem from printing + * + * Releases the console_lock which the caller holds to block printing of + * the legacy console subsystem. + * + * While the console_lock was held, console output may have been buffered + * by printk(). If this is the case, console_unlock() emits the output on + * legacy consoles prior to releasing the lock. + * + * console_unlock(); may be called from any context. + */ +void console_unlock(void) +{ + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (ft.legacy_direct) + __console_flush_and_unlock(); + else + __console_unlock(); } EXPORT_SYMBOL(console_unlock); @@ -2621,13 +3330,45 @@ EXPORT_SYMBOL(console_conditional_schedule); void console_unblank(void) { + bool found_unblank = false; struct console *c; + int cookie; + + /* + * First check if there are any consoles implementing the unblank() + * callback. If not, there is no reason to continue and take the + * console lock, which in particular can be dangerous if + * @oops_in_progress is set. + */ + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { + if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) { + found_unblank = true; + break; + } + } + console_srcu_read_unlock(cookie); + if (!found_unblank) + return; /* - * console_unblank can no longer be called in interrupt context unless - * oops_in_progress is set to 1.. + * Stop console printing because the unblank() callback may + * assume the console is not within its write() callback. + * + * If @oops_in_progress is set, this may be an atomic context. + * In that case, attempt a trylock as best-effort. */ if (oops_in_progress) { + /* Semaphores are not NMI-safe. */ + if (in_nmi()) + return; + + /* + * Attempting to trylock the console lock can deadlock + * if another CPU was stopped while modifying the + * semaphore. "Hope and pray" that this is not the + * current situation. + */ if (down_trylock_console_sem() != 0) return; } else @@ -2635,10 +3376,52 @@ void console_unblank(void) console_locked = 1; console_may_schedule = 0; - for_each_console(c) - if ((c->flags & CON_ENABLED) && c->unblank) + + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { + if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) c->unblank(); + } + console_srcu_read_unlock(cookie); + console_unlock(); + + if (!oops_in_progress) + pr_flush(1000, true); +} + +/* + * Rewind all consoles to the oldest available record. + * + * IMPORTANT: The function is safe only when called under + * console_lock(). It is not enforced because + * it is used as a best effort in panic(). + */ +static void __console_rewind_all(void) +{ + struct console *c; + short flags; + int cookie; + u64 seq; + + seq = prb_first_valid_seq(prb); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { + flags = console_srcu_read_flags(c); + + if (flags & CON_NBCON) { + nbcon_seq_force(c, seq); + } else { + /* + * This assignment is safe only when called under + * console_lock(). On panic, legacy consoles are + * only best effort. + */ + c->seq = seq; + } + } + console_srcu_read_unlock(cookie); } /** @@ -2649,24 +3432,37 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { + struct console_flush_type ft; + bool handover; + u64 next_seq; + + /* + * Ignore the console lock and flush out the messages. Attempting a + * trylock would not be useful because: + * + * - if it is contended, it must be ignored anyway + * - console_lock() and console_trylock() block and fail + * respectively in panic for non-panic CPUs + * - semaphores are not NMI-safe + */ + /* - * If someone else is holding the console lock, trylock will fail - * and may_schedule may be set. Ignore and proceed to unlock so - * that messages are flushed out. As this can be called from any - * context and we don't want to get preempted while flushing, - * ensure may_schedule is cleared. + * If another context is holding the console lock, + * @console_may_schedule might be set. Clear it so that + * this context does not call cond_resched() while flushing. */ - console_trylock(); console_may_schedule = 0; - if (mode == CONSOLE_REPLAY_ALL) { - unsigned long flags; + if (mode == CONSOLE_REPLAY_ALL) + __console_rewind_all(); - logbuf_lock_irqsave(flags); - console_seq = prb_first_valid_seq(prb); - logbuf_unlock_irqrestore(flags); - } - console_unlock(); + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + + /* Flush legacy consoles once allowed, even when dangerous. */ + if (legacy_allow_panic_sync) + console_flush_all(false, &next_seq, &handover); } /* @@ -2676,15 +3472,25 @@ struct tty_driver *console_device(int *index) { struct console *c; struct tty_driver *driver = NULL; + int cookie; + /* + * Take console_lock to serialize device() callback with + * other console operations. For example, fg_console is + * modified under console_lock when switching vt. + */ console_lock(); - for_each_console(c) { + + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { if (!c->device) continue; driver = c->device(c, index); if (driver) break; } + console_srcu_read_unlock(cookie); + console_unlock(); return driver; } @@ -2696,20 +3502,253 @@ struct tty_driver *console_device(int *index) */ void console_stop(struct console *console) { - console_lock(); - console->flags &= ~CON_ENABLED; - console_unlock(); + __pr_flush(console, 1000, true); + console_list_lock(); + console_srcu_write_flags(console, console->flags & ~CON_ENABLED); + console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. All contexts must + * be able to see that this console is disabled so that (for example) + * the caller can suspend the port without risk of another context + * using the port. + */ + synchronize_srcu(&console_srcu); } EXPORT_SYMBOL(console_stop); void console_start(struct console *console) { - console_lock(); - console->flags |= CON_ENABLED; - console_unlock(); + struct console_flush_type ft; + bool is_nbcon; + + console_list_lock(); + console_srcu_write_flags(console, console->flags | CON_ENABLED); + is_nbcon = console->flags & CON_NBCON; + console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. The related + * printing context must be able to see it is enabled so that + * it is guaranteed to wake up and resume printing. + */ + synchronize_srcu(&console_srcu); + + printk_get_console_flush_type(&ft); + if (is_nbcon && ft.nbcon_offload) + nbcon_kthread_wake(console); + else if (ft.legacy_offload) + defer_console_output(); + + __pr_flush(console, 1000, true); } EXPORT_SYMBOL(console_start); +#ifdef CONFIG_PRINTK +static int unregister_console_locked(struct console *console); + +/* True when system boot is far enough to create printer threads. */ +static bool printk_kthreads_ready __ro_after_init; + +static struct task_struct *printk_legacy_kthread; + +static bool legacy_kthread_should_wakeup(void) +{ + struct console_flush_type ft; + struct console *con; + bool ret = false; + int cookie; + + if (kthread_should_stop()) + return true; + + printk_get_console_flush_type(&ft); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + short flags = console_srcu_read_flags(con); + u64 printk_seq; + + /* + * The legacy printer thread is only responsible for nbcon + * consoles when the nbcon consoles cannot print via their + * atomic or threaded flushing. + */ + if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) + continue; + + if (!console_is_usable(con, flags, false)) + continue; + + if (flags & CON_NBCON) { + printk_seq = nbcon_seq_read(con); + } else { + /* + * It is safe to read @seq because only this + * thread context updates @seq. + */ + printk_seq = con->seq; + } + + if (prb_read_valid(prb, printk_seq, NULL)) { + ret = true; + break; + } + } + console_srcu_read_unlock(cookie); + + return ret; +} + +static int legacy_kthread_func(void *unused) +{ + for (;;) { + wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup()); + + if (kthread_should_stop()) + break; + + console_lock(); + __console_flush_and_unlock(); + } + + return 0; +} + +static bool legacy_kthread_create(void) +{ + struct task_struct *kt; + + lockdep_assert_console_list_lock_held(); + + kt = kthread_run(legacy_kthread_func, NULL, "pr/legacy"); + if (WARN_ON(IS_ERR(kt))) { + pr_err("failed to start legacy printing thread\n"); + return false; + } + + printk_legacy_kthread = kt; + + /* + * It is important that console printing threads are scheduled + * shortly after a printk call and with generous runtime budgets. + */ + sched_set_normal(printk_legacy_kthread, -20); + + return true; +} + +/** + * printk_kthreads_shutdown - shutdown all threaded printers + * + * On system shutdown all threaded printers are stopped. This allows printk + * to transition back to atomic printing, thus providing a robust mechanism + * for the final shutdown/reboot messages to be output. + */ +static void printk_kthreads_shutdown(void) +{ + struct console *con; + + console_list_lock(); + if (printk_kthreads_running) { + printk_kthreads_running = false; + + for_each_console(con) { + if (con->flags & CON_NBCON) + nbcon_kthread_stop(con); + } + + /* + * The threads may have been stopped while printing a + * backlog. Flush any records left over. + */ + nbcon_atomic_flush_pending(); + } + console_list_unlock(); +} + +static struct syscore_ops printk_syscore_ops = { + .shutdown = printk_kthreads_shutdown, +}; + +/* + * If appropriate, start nbcon kthreads and set @printk_kthreads_running. + * If any kthreads fail to start, those consoles are unregistered. + * + * Must be called under console_list_lock(). + */ +static void printk_kthreads_check_locked(void) +{ + struct hlist_node *tmp; + struct console *con; + + lockdep_assert_console_list_lock_held(); + + if (!printk_kthreads_ready) + return; + + if (have_legacy_console || have_boot_console) { + if (!printk_legacy_kthread && + force_legacy_kthread() && + !legacy_kthread_create()) { + /* + * All legacy consoles must be unregistered. If there + * are any nbcon consoles, they will set up their own + * kthread. + */ + hlist_for_each_entry_safe(con, tmp, &console_list, node) { + if (con->flags & CON_NBCON) + continue; + + unregister_console_locked(con); + } + } + } else if (printk_legacy_kthread) { + kthread_stop(printk_legacy_kthread); + printk_legacy_kthread = NULL; + } + + /* + * Printer threads cannot be started as long as any boot console is + * registered because there is no way to synchronize the hardware + * registers between boot console code and regular console code. + * It can only be known that there will be no new boot consoles when + * an nbcon console is registered. + */ + if (have_boot_console || !have_nbcon_console) { + /* Clear flag in case all nbcon consoles unregistered. */ + printk_kthreads_running = false; + return; + } + + if (printk_kthreads_running) + return; + + hlist_for_each_entry_safe(con, tmp, &console_list, node) { + if (!(con->flags & CON_NBCON)) + continue; + + if (!nbcon_kthread_create(con)) + unregister_console_locked(con); + } + + printk_kthreads_running = true; +} + +static int __init printk_set_kthreads_ready(void) +{ + register_syscore_ops(&printk_syscore_ops); + + console_list_lock(); + printk_kthreads_ready = true; + printk_kthreads_check_locked(); + console_list_unlock(); + + return 0; +} +early_initcall(printk_set_kthreads_ready); +#endif /* CONFIG_PRINTK */ + static int __read_mostly keep_bootcon; static int __init keep_bootcon_setup(char *str) @@ -2722,6 +3761,21 @@ static int __init keep_bootcon_setup(char *str) early_param("keep_bootcon", keep_bootcon_setup); +static int console_call_setup(struct console *newcon, char *options) +{ + int err; + + if (!newcon->setup) + return 0; + + /* Synchronize with possible boot console. */ + console_lock(); + err = newcon->setup(newcon, options); + console_unlock(); + + return err; +} + /* * This is called by register_console() to try to match * the newly registered console with any of the ones selected @@ -2731,14 +3785,18 @@ early_param("keep_bootcon", keep_bootcon_setup); * Care need to be taken with consoles that are statically * enabled such as netconsole */ -static int try_enable_new_console(struct console *newcon, bool user_specified) +static int try_enable_preferred_console(struct console *newcon, + bool user_specified) { struct console_cmdline *c; int i, err; for (i = 0, c = console_cmdline; - i < MAX_CMDLINECONSOLES && c->name[0]; + i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); i++, c++) { + /* Console not yet initialized? */ + if (!c->name[0]) + continue; if (c->user_specified != user_specified) continue; if (!newcon->match || @@ -2756,15 +3814,13 @@ static int try_enable_new_console(struct console *newcon, bool user_specified) if (_braille_register_console(newcon, c)) return 0; - if (newcon->setup && - (err = newcon->setup(newcon, c->options)) != 0) + err = console_call_setup(newcon, c->options); + if (err) return err; } newcon->flags |= CON_ENABLED; - if (i == preferred_console) { + if (i == preferred_console) newcon->flags |= CON_CONSDEV; - has_preferred_console = true; - } return 0; } @@ -2779,6 +3835,100 @@ static int try_enable_new_console(struct console *newcon, bool user_specified) return -ENOENT; } +/* Try to enable the console unconditionally */ +static void try_enable_default_console(struct console *newcon) +{ + if (newcon->index < 0) + newcon->index = 0; + + if (console_call_setup(newcon, NULL) != 0) + return; + + newcon->flags |= CON_ENABLED; + + if (newcon->device) + newcon->flags |= CON_CONSDEV; +} + +/* Return the starting sequence number for a newly registered console. */ +static u64 get_init_console_seq(struct console *newcon, bool bootcon_registered) +{ + struct console *con; + bool handover; + u64 init_seq; + + if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) { + /* Get a consistent copy of @syslog_seq. */ + mutex_lock(&syslog_lock); + init_seq = syslog_seq; + mutex_unlock(&syslog_lock); + } else { + /* Begin with next message added to ringbuffer. */ + init_seq = prb_next_seq(prb); + + /* + * If any enabled boot consoles are due to be unregistered + * shortly, some may not be caught up and may be the same + * device as @newcon. Since it is not known which boot console + * is the same device, flush all consoles and, if necessary, + * start with the message of the enabled boot console that is + * the furthest behind. + */ + if (bootcon_registered && !keep_bootcon) { + /* + * Hold the console_lock to stop console printing and + * guarantee safe access to console->seq. + */ + console_lock(); + + /* + * Flush all consoles and set the console to start at + * the next unprinted sequence number. + */ + if (!console_flush_all(true, &init_seq, &handover)) { + /* + * Flushing failed. Just choose the lowest + * sequence of the enabled boot consoles. + */ + + /* + * If there was a handover, this context no + * longer holds the console_lock. + */ + if (handover) + console_lock(); + + init_seq = prb_next_seq(prb); + for_each_console(con) { + u64 seq; + + if (!(con->flags & CON_BOOT) || + !(con->flags & CON_ENABLED)) { + continue; + } + + if (con->flags & CON_NBCON) + seq = nbcon_seq_read(con); + else + seq = con->seq; + + if (seq < init_seq) + init_seq = seq; + } + } + + console_unlock(); + } + } + + return init_seq; +} + +#define console_first() \ + hlist_entry(console_list.first, struct console, node) + +static int unregister_console_locked(struct console *console); + /* * The console driver calls this routine during kernel initialization * to register the console printing procedure with printk() and to @@ -2800,64 +3950,76 @@ static int try_enable_new_console(struct console *newcon, bool user_specified) */ void register_console(struct console *newcon) { + bool use_device_lock = (newcon->flags & CON_NBCON) && newcon->write_atomic; + bool bootcon_registered = false; + bool realcon_registered = false; + struct console *con; unsigned long flags; - struct console *bcon = NULL; + u64 init_seq; int err; - for_each_console(bcon) { - if (WARN(bcon == newcon, "console '%s%d' already registered\n", - bcon->name, bcon->index)) - return; - } + console_list_lock(); - /* - * before we register a new CON_BOOT console, make sure we don't - * already have a valid console - */ - if (newcon->flags & CON_BOOT) { - for_each_console(bcon) { - if (!(bcon->flags & CON_BOOT)) { - pr_info("Too late to register bootconsole %s%d\n", - newcon->name, newcon->index); - return; - } + for_each_console(con) { + if (WARN(con == newcon, "console '%s%d' already registered\n", + con->name, con->index)) { + goto unlock; } + + if (con->flags & CON_BOOT) + bootcon_registered = true; + else + realcon_registered = true; } - if (console_drivers && console_drivers->flags & CON_BOOT) - bcon = console_drivers; + /* Do not register boot consoles when there already is a real one. */ + if ((newcon->flags & CON_BOOT) && realcon_registered) { + pr_info("Too late to register bootconsole %s%d\n", + newcon->name, newcon->index); + goto unlock; + } - if (!has_preferred_console || bcon || !console_drivers) - has_preferred_console = preferred_console >= 0; + if (newcon->flags & CON_NBCON) { + /* + * Ensure the nbcon console buffers can be allocated + * before modifying any global data. + */ + if (!nbcon_alloc(newcon)) + goto unlock; + } /* - * See if we want to use this console driver. If we - * didn't select a console we take the first one - * that registers here. + * See if we want to enable this console driver by default. + * + * Nope when a console is preferred by the command line, device + * tree, or SPCR. + * + * The first real console with tty binding (driver) wins. More + * consoles might get enabled before the right one is found. + * + * Note that a console with tty binding will have CON_CONSDEV + * flag set and will be first in the list. */ - if (!has_preferred_console) { - if (newcon->index < 0) - newcon->index = 0; - if (newcon->setup == NULL || - newcon->setup(newcon, NULL) == 0) { - newcon->flags |= CON_ENABLED; - if (newcon->device) { - newcon->flags |= CON_CONSDEV; - has_preferred_console = true; - } + if (preferred_console < 0) { + if (hlist_empty(&console_list) || !console_first()->device || + console_first()->flags & CON_BOOT) { + try_enable_default_console(newcon); } } /* See if this console matches one we selected on the command line */ - err = try_enable_new_console(newcon, true); + err = try_enable_preferred_console(newcon, true); /* If not, try to match against the platform default(s) */ if (err == -ENOENT) - err = try_enable_new_console(newcon, false); + err = try_enable_preferred_console(newcon, false); /* printk() messages are not printed to the Braille console. */ - if (err || newcon->flags & CON_BRL) - return; + if (err || newcon->flags & CON_BRL) { + if (newcon->flags & CON_NBCON) + nbcon_free(newcon); + goto unlock; + } /* * If we have a bootconsole, and are switching to a real console, @@ -2865,50 +4027,66 @@ void register_console(struct console *newcon) * the real console are the same physical device, it's annoying to * see the beginning boot messages twice */ - if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) + if (bootcon_registered && + ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { newcon->flags &= ~CON_PRINTBUFFER; + } + + newcon->dropped = 0; + init_seq = get_init_console_seq(newcon, bootcon_registered); + + if (newcon->flags & CON_NBCON) { + have_nbcon_console = true; + nbcon_seq_force(newcon, init_seq); + } else { + have_legacy_console = true; + newcon->seq = init_seq; + } + + if (newcon->flags & CON_BOOT) + have_boot_console = true; /* - * Put this console in the list - keep the - * preferred driver at the head of the list. + * If another context is actively using the hardware of this new + * console, it will not be aware of the nbcon synchronization. This + * is a risk that two contexts could access the hardware + * simultaneously if this new console is used for atomic printing + * and the other context is still using the hardware. + * + * Use the driver synchronization to ensure that the hardware is not + * in use while this new console transitions to being registered. */ - console_lock(); - if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { - newcon->next = console_drivers; - console_drivers = newcon; - if (newcon->next) - newcon->next->flags &= ~CON_CONSDEV; - /* Ensure this flag is always set for the head of the list */ + if (use_device_lock) + newcon->device_lock(newcon, &flags); + + /* + * Put this console in the list - keep the + * preferred driver at the head of the list. + */ + if (hlist_empty(&console_list)) { + /* Ensure CON_CONSDEV is always set for the head. */ newcon->flags |= CON_CONSDEV; + hlist_add_head_rcu(&newcon->node, &console_list); + + } else if (newcon->flags & CON_CONSDEV) { + /* Only the new head can have CON_CONSDEV set. */ + console_srcu_write_flags(console_first(), console_first()->flags & ~CON_CONSDEV); + hlist_add_head_rcu(&newcon->node, &console_list); + } else { - newcon->next = console_drivers->next; - console_drivers->next = newcon; + hlist_add_behind_rcu(&newcon->node, console_list.first); } - if (newcon->flags & CON_EXTENDED) - nr_ext_console_drivers++; + /* + * No need to synchronize SRCU here! The caller does not rely + * on all contexts being able to see the new console before + * register_console() completes. + */ + + /* This new console is now registered. */ + if (use_device_lock) + newcon->device_unlock(newcon, flags); - if (newcon->flags & CON_PRINTBUFFER) { - /* - * console_unlock(); will print out the buffered messages - * for us. - */ - logbuf_lock_irqsave(flags); - /* - * We're about to replay the log buffer. Only do this to the - * just-registered console to avoid excessive message spam to - * the already-registered consoles. - * - * Set exclusive_console with disabled interrupts to reduce - * race window with eventual console_flush_on_panic() that - * ignores console_lock. - */ - exclusive_console = newcon; - exclusive_console_stop_seq = console_seq; - console_seq = syslog_seq; - logbuf_unlock_irqrestore(flags); - } - console_unlock(); console_sysfs_notify(); /* @@ -2918,30 +4096,39 @@ void register_console(struct console *newcon) * users know there might be something in the kernel's log buffer that * went to the bootconsole (that they do not see on the real console) */ - pr_info("%sconsole [%s%d] enabled\n", - (newcon->flags & CON_BOOT) ? "boot" : "" , - newcon->name, newcon->index); - if (bcon && + con_printk(KERN_INFO, newcon, "enabled\n"); + if (bootcon_registered && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && !keep_bootcon) { - /* We need to iterate through all boot consoles, to make - * sure we print everything out, before we unregister them. - */ - for_each_console(bcon) - if (bcon->flags & CON_BOOT) - unregister_console(bcon); + struct hlist_node *tmp; + + hlist_for_each_entry_safe(con, tmp, &console_list, node) { + if (con->flags & CON_BOOT) + unregister_console_locked(con); + } } + + /* Changed console list, may require printer threads to start/stop. */ + printk_kthreads_check_locked(); +unlock: + console_list_unlock(); } EXPORT_SYMBOL(register_console); -int unregister_console(struct console *console) +/* Must be called under console_list_lock(). */ +static int unregister_console_locked(struct console *console) { - struct console *con; + bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic; + bool found_legacy_con = false; + bool found_nbcon_con = false; + bool found_boot_con = false; + unsigned long flags; + struct console *c; int res; - pr_info("%sconsole [%s%d] disabled\n", - (console->flags & CON_BOOT) ? "boot" : "" , - console->name, console->index); + lockdep_assert_console_list_lock_held(); + + con_printk(KERN_INFO, console, "disabled\n"); res = _braille_unregister_console(console); if (res < 0) @@ -2949,51 +4136,135 @@ int unregister_console(struct console *console) if (res > 0) return 0; - res = -ENODEV; - console_lock(); - if (console_drivers == console) { - console_drivers=console->next; - res = 0; - } else { - for_each_console(con) { - if (con->next == console) { - con->next = console->next; - res = 0; - break; - } - } - } + if (!console_is_registered_locked(console)) + res = -ENODEV; + else if (console_is_usable(console, console->flags, true)) + __pr_flush(console, 1000, true); + + /* Disable it unconditionally */ + console_srcu_write_flags(console, console->flags & ~CON_ENABLED); + + if (res < 0) + return res; + + /* + * Use the driver synchronization to ensure that the hardware is not + * in use while this console transitions to being unregistered. + */ + if (use_device_lock) + console->device_lock(console, &flags); - if (res) - goto out_disable_unlock; + hlist_del_init_rcu(&console->node); - if (console->flags & CON_EXTENDED) - nr_ext_console_drivers--; + if (use_device_lock) + console->device_unlock(console, flags); /* + * <HISTORICAL> * If this isn't the last console and it has CON_CONSDEV set, we * need to set it on the next preferred console. + * </HISTORICAL> + * + * The above makes no sense as there is no guarantee that the next + * console has any device attached. Oh well.... */ - if (console_drivers != NULL && console->flags & CON_CONSDEV) - console_drivers->flags |= CON_CONSDEV; + if (!hlist_empty(&console_list) && console->flags & CON_CONSDEV) + console_srcu_write_flags(console_first(), console_first()->flags | CON_CONSDEV); + + /* + * Ensure that all SRCU list walks have completed. All contexts + * must not be able to see this console in the list so that any + * exit/cleanup routines can be performed safely. + */ + synchronize_srcu(&console_srcu); + + if (console->flags & CON_NBCON) + nbcon_free(console); - console->flags &= ~CON_ENABLED; - console_unlock(); console_sysfs_notify(); if (console->exit) res = console->exit(console); + /* + * With this console gone, the global flags tracking registered + * console types may have changed. Update them. + */ + for_each_console(c) { + if (c->flags & CON_BOOT) + found_boot_con = true; + + if (c->flags & CON_NBCON) + found_nbcon_con = true; + else + found_legacy_con = true; + } + if (!found_boot_con) + have_boot_console = found_boot_con; + if (!found_legacy_con) + have_legacy_console = found_legacy_con; + if (!found_nbcon_con) + have_nbcon_console = found_nbcon_con; + + /* Changed console list, may require printer threads to start/stop. */ + printk_kthreads_check_locked(); + return res; +} -out_disable_unlock: - console->flags &= ~CON_ENABLED; - console_unlock(); +int unregister_console(struct console *console) +{ + int res; + console_list_lock(); + res = unregister_console_locked(console); + console_list_unlock(); return res; } EXPORT_SYMBOL(unregister_console); +/** + * console_force_preferred_locked - force a registered console preferred + * @con: The registered console to force preferred. + * + * Must be called under console_list_lock(). + */ +void console_force_preferred_locked(struct console *con) +{ + struct console *cur_pref_con; + + if (!console_is_registered_locked(con)) + return; + + cur_pref_con = console_first(); + + /* Already preferred? */ + if (cur_pref_con == con) + return; + + /* + * Delete, but do not re-initialize the entry. This allows the console + * to continue to appear registered (via any hlist_unhashed_lockless() + * checks), even though it was briefly removed from the console list. + */ + hlist_del_rcu(&con->node); + + /* + * Ensure that all SRCU list walks have completed so that the console + * can be added to the beginning of the console list and its forward + * list pointer can be re-initialized. + */ + synchronize_srcu(&console_srcu); + + con->flags |= CON_CONSDEV; + WARN_ON(!con->device); + + /* Only the new head can have CON_CONSDEV set. */ + console_srcu_write_flags(cur_pref_con, cur_pref_con->flags & ~CON_CONSDEV); + hlist_add_head_rcu(&con->node, &console_list); +} +EXPORT_SYMBOL(console_force_preferred_locked); + /* * Initialize the console device. This is called *early*, so * we can't necessarily depend on lots of kernel help here. @@ -3036,14 +4307,16 @@ void __init console_init(void) * * To mitigate this problem somewhat, only unregister consoles whose memory * intersects with the init section. Note that all other boot consoles will - * get unregistred when the real preferred console is registered. + * get unregistered when the real preferred console is registered. */ static int __init printk_late_init(void) { + struct hlist_node *tmp; struct console *con; int ret; - for_each_console(con) { + console_list_lock(); + hlist_for_each_entry_safe(con, tmp, &console_list, node) { if (!(con->flags & CON_BOOT)) continue; @@ -3060,20 +4333,144 @@ static int __init printk_late_init(void) */ pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n", con->name, con->index); - unregister_console(con); + unregister_console_locked(con); } } + console_list_unlock(); + ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, console_cpu_notify); WARN_ON(ret < 0); ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", console_cpu_notify, NULL); WARN_ON(ret < 0); + printk_sysctl_init(); return 0; } late_initcall(printk_late_init); #if defined CONFIG_PRINTK +/* If @con is specified, only wait for that console. Otherwise wait for all. */ +static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) +{ + unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms); + unsigned long remaining_jiffies = timeout_jiffies; + struct console_flush_type ft; + struct console *c; + u64 last_diff = 0; + u64 printk_seq; + short flags; + int cookie; + u64 diff; + u64 seq; + + /* Sorry, pr_flush() will not work this early. */ + if (system_state < SYSTEM_SCHEDULING) + return false; + + might_sleep(); + + seq = prb_next_reserve_seq(prb); + + /* Flush the consoles so that records up to @seq are printed. */ + printk_get_console_flush_type(&ft); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.legacy_direct) { + console_lock(); + console_unlock(); + } + + for (;;) { + unsigned long begin_jiffies; + unsigned long slept_jiffies; + + diff = 0; + + /* + * Hold the console_lock to guarantee safe access to + * console->seq. Releasing console_lock flushes more + * records in case @seq is still not printed on all + * usable consoles. + * + * Holding the console_lock is not necessary if there + * are no legacy or boot consoles. However, such a + * console could register at any time. Always hold the + * console_lock as a precaution rather than + * synchronizing against register_console(). + */ + console_lock(); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { + if (con && con != c) + continue; + + flags = console_srcu_read_flags(c); + + /* + * If consoles are not usable, it cannot be expected + * that they make forward progress, so only increment + * @diff for usable consoles. + */ + if (!console_is_usable(c, flags, true) && + !console_is_usable(c, flags, false)) { + continue; + } + + if (flags & CON_NBCON) { + printk_seq = nbcon_seq_read(c); + } else { + printk_seq = c->seq; + } + + if (printk_seq < seq) + diff += seq - printk_seq; + } + console_srcu_read_unlock(cookie); + + if (diff != last_diff && reset_on_progress) + remaining_jiffies = timeout_jiffies; + + console_unlock(); + + /* Note: @diff is 0 if there are no usable consoles. */ + if (diff == 0 || remaining_jiffies == 0) + break; + + /* msleep(1) might sleep much longer. Check time by jiffies. */ + begin_jiffies = jiffies; + msleep(1); + slept_jiffies = jiffies - begin_jiffies; + + remaining_jiffies -= min(slept_jiffies, remaining_jiffies); + + last_diff = diff; + } + + return (diff == 0); +} + +/** + * pr_flush() - Wait for printing threads to catch up. + * + * @timeout_ms: The maximum time (in ms) to wait. + * @reset_on_progress: Reset the timeout if forward progress is seen. + * + * A value of 0 for @timeout_ms means no waiting will occur. A value of -1 + * represents infinite waiting. + * + * If @reset_on_progress is true, the timeout will be reset whenever any + * printer has been seen to make some forward progress. + * + * Context: Process context. May sleep while acquiring console lock. + * Return: true if all usable printers are caught up. + */ +static bool pr_flush(int timeout_ms, bool reset_on_progress) +{ + return __pr_flush(NULL, timeout_ms, reset_on_progress); +} + /* * Delayed printk version, for scheduler-internal messages: */ @@ -3084,12 +4481,16 @@ static DEFINE_PER_CPU(int, printk_pending); static void wake_up_klogd_work_func(struct irq_work *irq_work) { - int pending = __this_cpu_xchg(printk_pending, 0); + int pending = this_cpu_xchg(printk_pending, 0); if (pending & PRINTK_PENDING_OUTPUT) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); + if (force_legacy_kthread()) { + if (printk_legacy_kthread) + wake_up_interruptible(&legacy_wait); + } else { + if (console_trylock()) + console_unlock(); + } } if (pending & PRINTK_PENDING_WAKEUP) @@ -3099,41 +4500,78 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func); -void wake_up_klogd(void) +static void __wake_up_klogd(int val) { if (!printk_percpu_data_ready()) return; preempt_disable(); - if (waitqueue_active(&log_wait)) { - this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); + /* + * Guarantee any new records can be seen by tasks preparing to wait + * before this context checks if the wait queue is empty. + * + * The full memory barrier within wq_has_sleeper() pairs with the full + * memory barrier within set_current_state() of + * prepare_to_wait_event(), which is called after ___wait_event() adds + * the waiter but before it has checked the wait condition. + * + * This pairs with devkmsg_read:A and syslog_print:A. + */ + if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ + (val & PRINTK_PENDING_OUTPUT)) { + this_cpu_or(printk_pending, val); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } preempt_enable(); } -void defer_console_output(void) +/** + * wake_up_klogd - Wake kernel logging daemon + * + * Use this function when new records have been added to the ringbuffer + * and the console printing of those records has already occurred or is + * known to be handled by some other context. This function will only + * wake the logging daemon. + * + * Context: Any context. + */ +void wake_up_klogd(void) { - if (!printk_percpu_data_ready()) - return; - - preempt_disable(); - __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); - irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); - preempt_enable(); + __wake_up_klogd(PRINTK_PENDING_WAKEUP); } -int vprintk_deferred(const char *fmt, va_list args) +/** + * defer_console_output - Wake kernel logging daemon and trigger + * console printing in a deferred context + * + * Use this function when new records have been added to the ringbuffer, + * this context is responsible for console printing those records, but + * the current context is not allowed to perform the console printing. + * Trigger an irq_work context to perform the console printing. This + * function also wakes the logging daemon. + * + * Context: Any context. + */ +void defer_console_output(void) { - int r; + /* + * New messages may have been added directly to the ringbuffer + * using vprintk_store(), so wake any waiters as well. + */ + __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); +} - r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); +void printk_trigger_flush(void) +{ defer_console_output(); +} - return r; +int vprintk_deferred(const char *fmt, va_list args) +{ + return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); } -int printk_deferred(const char *fmt, ...) +int _printk_deferred(const char *fmt, ...) { va_list args; int r; @@ -3260,17 +4698,21 @@ const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); /** - * kmsg_dump - dump kernel log to kernel message dumpers. + * kmsg_dump_desc - dump kernel log to kernel message dumpers. * @reason: the reason (oops, panic etc) for dumping + * @desc: a short string to describe what caused the panic or oops. Can be NULL + * if no additional description is available. * * Call each of the registered dumper's dump() callback, which can * retrieve the kmsg records with kmsg_dump_get_line() or * kmsg_dump_get_buffer(). */ -void kmsg_dump(enum kmsg_dump_reason reason) +void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc) { struct kmsg_dumper *dumper; - unsigned long flags; + struct kmsg_dump_detail detail = { + .reason = reason, + .description = desc}; rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { @@ -3287,26 +4729,15 @@ void kmsg_dump(enum kmsg_dump_reason reason) if (reason > max_reason) continue; - /* initialize iterator with data about the stored records */ - dumper->active = true; - - logbuf_lock_irqsave(flags); - dumper->cur_seq = clear_seq; - dumper->next_seq = prb_next_seq(prb); - logbuf_unlock_irqrestore(flags); - /* invoke dumper which will iterate over records */ - dumper->dump(dumper, reason); - - /* reset iterator */ - dumper->active = false; + dumper->dump(dumper, &detail); } rcu_read_unlock(); } /** - * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) - * @dumper: registered kmsg dumper + * kmsg_dump_get_line - retrieve one kmsg log line + * @iter: kmsg dump iterator * @syslog: include the "<4>" prefixes * @line: buffer to copy the line to * @size: maximum size of the buffer @@ -3320,30 +4751,29 @@ void kmsg_dump(enum kmsg_dump_reason reason) * * A return value of FALSE indicates that there are no more records to * read. - * - * The function is similar to kmsg_dump_get_line(), but grabs no locks. */ -bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, - char *line, size_t size, size_t *len) +bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, + char *line, size_t size, size_t *len) { + u64 min_seq = latched_seq_read_nolock(&clear_seq); struct printk_info info; unsigned int line_count; struct printk_record r; size_t l = 0; bool ret = false; - prb_rec_init_rd(&r, &info, line, size); + if (iter->cur_seq < min_seq) + iter->cur_seq = min_seq; - if (!dumper->active) - goto out; + prb_rec_init_rd(&r, &info, line, size); /* Read text or count text lines? */ if (line) { - if (!prb_read_valid(prb, dumper->cur_seq, &r)) + if (!prb_read_valid(prb, iter->cur_seq, &r)) goto out; l = record_print_text(&r, syslog, printk_time); } else { - if (!prb_read_valid_info(prb, dumper->cur_seq, + if (!prb_read_valid_info(prb, iter->cur_seq, &info, &line_count)) { goto out; } @@ -3352,52 +4782,22 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, } - dumper->cur_seq = r.info->seq + 1; + iter->cur_seq = r.info->seq + 1; ret = true; out: if (len) *len = l; return ret; } - -/** - * kmsg_dump_get_line - retrieve one kmsg log line - * @dumper: registered kmsg dumper - * @syslog: include the "<4>" prefixes - * @line: buffer to copy the line to - * @size: maximum size of the buffer - * @len: length of line placed into buffer - * - * Start at the beginning of the kmsg buffer, with the oldest kmsg - * record, and copy one record into the provided buffer. - * - * Consecutive calls will return the next available record moving - * towards the end of the buffer with the youngest messages. - * - * A return value of FALSE indicates that there are no more records to - * read. - */ -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, - char *line, size_t size, size_t *len) -{ - unsigned long flags; - bool ret; - - logbuf_lock_irqsave(flags); - ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); - logbuf_unlock_irqrestore(flags); - - return ret; -} EXPORT_SYMBOL_GPL(kmsg_dump_get_line); /** * kmsg_dump_get_buffer - copy kmsg log lines - * @dumper: registered kmsg dumper + * @iter: kmsg dump iterator * @syslog: include the "<4>" prefixes * @buf: buffer to copy the line to * @size: maximum size of the buffer - * @len: length of line placed into buffer + * @len_out: length of line placed into buffer * * Start at the end of the kmsg buffer and fill the provided buffer * with as many of the *youngest* kmsg records that fit into it. @@ -3410,114 +4810,232 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); * A return value of FALSE indicates that there are no more records to * read. */ -bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, - char *buf, size_t size, size_t *len) +bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, + char *buf, size_t size, size_t *len_out) { + u64 min_seq = latched_seq_read_nolock(&clear_seq); struct printk_info info; - unsigned int line_count; struct printk_record r; - unsigned long flags; u64 seq; u64 next_seq; - size_t l = 0; + size_t len = 0; bool ret = false; bool time = printk_time; - prb_rec_init_rd(&r, &info, buf, size); - - if (!dumper->active || !buf || !size) + if (!buf || !size) goto out; - logbuf_lock_irqsave(flags); - if (dumper->cur_seq < prb_first_valid_seq(prb)) { - /* messages are gone, move to first available one */ - dumper->cur_seq = prb_first_valid_seq(prb); + if (iter->cur_seq < min_seq) + iter->cur_seq = min_seq; + + if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { + if (info.seq != iter->cur_seq) { + /* messages are gone, move to first available one */ + iter->cur_seq = info.seq; + } } /* last entry */ - if (dumper->cur_seq >= dumper->next_seq) { - logbuf_unlock_irqrestore(flags); + if (iter->cur_seq >= iter->next_seq) goto out; - } - - /* calculate length of entire buffer */ - seq = dumper->cur_seq; - while (prb_read_valid_info(prb, seq, &info, &line_count)) { - if (r.info->seq >= dumper->next_seq) - break; - l += get_record_print_text_size(&info, line_count, syslog, time); - seq = r.info->seq + 1; - } - /* move first record forward until length fits into the buffer */ - seq = dumper->cur_seq; - while (l >= size && prb_read_valid_info(prb, seq, - &info, &line_count)) { - if (r.info->seq >= dumper->next_seq) - break; - l -= get_record_print_text_size(&info, line_count, syslog, time); - seq = r.info->seq + 1; - } + /* + * Find first record that fits, including all following records, + * into the user-provided buffer for this dump. Pass in size-1 + * because this function (by way of record_print_text()) will + * not write more than size-1 bytes of text into @buf. + */ + seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, + size - 1, syslog, time); - /* last message in next interation */ + /* + * Next kmsg_dump_get_buffer() invocation will dump block of + * older records stored right before this one. + */ next_seq = seq; - /* actually read text into the buffer now */ - l = 0; - while (prb_read_valid(prb, seq, &r)) { - if (r.info->seq >= dumper->next_seq) - break; + prb_rec_init_rd(&r, &info, buf, size); - l += record_print_text(&r, syslog, time); + prb_for_each_record(seq, prb, seq, &r) { + if (r.info->seq >= iter->next_seq) + break; - /* adjust record to store to remaining buffer space */ - prb_rec_init_rd(&r, &info, buf + l, size - l); + len += record_print_text(&r, syslog, time); - seq = r.info->seq + 1; + /* Adjust record to store to remaining buffer space. */ + prb_rec_init_rd(&r, &info, buf + len, size - len); } - dumper->next_seq = next_seq; + iter->next_seq = next_seq; ret = true; - logbuf_unlock_irqrestore(flags); out: - if (len) - *len = l; + if (len_out) + *len_out = len; return ret; } EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); /** - * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) - * @dumper: registered kmsg dumper + * kmsg_dump_rewind - reset the iterator + * @iter: kmsg dump iterator * * Reset the dumper's iterator so that kmsg_dump_get_line() and * kmsg_dump_get_buffer() can be called again and used multiple * times within the same dumper.dump() callback. + */ +void kmsg_dump_rewind(struct kmsg_dump_iter *iter) +{ + iter->cur_seq = latched_seq_read_nolock(&clear_seq); + iter->next_seq = prb_next_seq(prb); +} +EXPORT_SYMBOL_GPL(kmsg_dump_rewind); + +/** + * console_try_replay_all - try to replay kernel log on consoles + * + * Try to obtain lock on console subsystem and replay all + * available records in printk buffer on the consoles. + * Does nothing if lock is not obtained. + * + * Context: Any, except for NMI. + */ +void console_try_replay_all(void) +{ + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (console_trylock()) { + __console_rewind_all(); + if (ft.nbcon_atomic) + nbcon_atomic_flush_pending(); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + if (ft.legacy_offload) + defer_console_output(); + /* Consoles are flushed as part of console_unlock(). */ + console_unlock(); + } +} +#endif + +#ifdef CONFIG_SMP +static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1); +static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0); + +bool is_printk_cpu_sync_owner(void) +{ + return (atomic_read(&printk_cpu_sync_owner) == raw_smp_processor_id()); +} + +/** + * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant + * spinning lock is not owned by any CPU. * - * The function is similar to kmsg_dump_rewind(), but grabs no locks. + * Context: Any context. */ -void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +void __printk_cpu_sync_wait(void) { - dumper->cur_seq = clear_seq; - dumper->next_seq = prb_next_seq(prb); + do { + cpu_relax(); + } while (atomic_read(&printk_cpu_sync_owner) != -1); } +EXPORT_SYMBOL(__printk_cpu_sync_wait); /** - * kmsg_dump_rewind - reset the iterator - * @dumper: registered kmsg dumper + * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant + * spinning lock. * - * Reset the dumper's iterator so that kmsg_dump_get_line() and - * kmsg_dump_get_buffer() can be called again and used multiple - * times within the same dumper.dump() callback. + * If no processor has the lock, the calling processor takes the lock and + * becomes the owner. If the calling processor is already the owner of the + * lock, this function succeeds immediately. + * + * Context: Any context. Expects interrupts to be disabled. + * Return: 1 on success, otherwise 0. */ -void kmsg_dump_rewind(struct kmsg_dumper *dumper) +int __printk_cpu_sync_try_get(void) { - unsigned long flags; + int cpu; + int old; + + cpu = smp_processor_id(); - logbuf_lock_irqsave(flags); - kmsg_dump_rewind_nolock(dumper); - logbuf_unlock_irqrestore(flags); + /* + * Guarantee loads and stores from this CPU when it is the lock owner + * are _not_ visible to the previous lock owner. This pairs with + * __printk_cpu_sync_put:B. + * + * Memory barrier involvement: + * + * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, + * then __printk_cpu_sync_put:A can never read from + * __printk_cpu_sync_try_get:B. + * + * Relies on: + * + * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B + * of the previous CPU + * matching + * ACQUIRE from __printk_cpu_sync_try_get:A to + * __printk_cpu_sync_try_get:B of this CPU + */ + old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1, + cpu); /* LMM(__printk_cpu_sync_try_get:A) */ + if (old == -1) { + /* + * This CPU is now the owner and begins loading/storing + * data: LMM(__printk_cpu_sync_try_get:B) + */ + return 1; + + } else if (old == cpu) { + /* This CPU is already the owner. */ + atomic_inc(&printk_cpu_sync_nested); + return 1; + } + + return 0; } -EXPORT_SYMBOL_GPL(kmsg_dump_rewind); +EXPORT_SYMBOL(__printk_cpu_sync_try_get); -#endif +/** + * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock. + * + * The calling processor must be the owner of the lock. + * + * Context: Any context. Expects interrupts to be disabled. + */ +void __printk_cpu_sync_put(void) +{ + if (atomic_read(&printk_cpu_sync_nested)) { + atomic_dec(&printk_cpu_sync_nested); + return; + } + + /* + * This CPU is finished loading/storing data: + * LMM(__printk_cpu_sync_put:A) + */ + + /* + * Guarantee loads and stores from this CPU when it was the + * lock owner are visible to the next lock owner. This pairs + * with __printk_cpu_sync_try_get:A. + * + * Memory barrier involvement: + * + * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, + * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A. + * + * Relies on: + * + * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B + * of this CPU + * matching + * ACQUIRE from __printk_cpu_sync_try_get:A to + * __printk_cpu_sync_try_get:B of the next CPU + */ + atomic_set_release(&printk_cpu_sync_owner, + -1); /* LMM(__printk_cpu_sync_put:B) */ +} +EXPORT_SYMBOL(__printk_cpu_sync_put); +#endif /* CONFIG_SMP */ |