summaryrefslogtreecommitdiff
path: root/kernel/printk/printk.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/printk/printk.c')
-rw-r--r--kernel/printk/printk.c3126
1 files changed, 2322 insertions, 804 deletions
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 5a95c688621f..07668433644b 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -34,7 +34,8 @@
#include <linux/security.h>
#include <linux/memblock.h>
#include <linux/syscalls.h>
-#include <linux/crash_core.h>
+#include <linux/syscore_ops.h>
+#include <linux/vmcore_info.h>
#include <linux/ratelimit.h>
#include <linux/kmsg_dump.h>
#include <linux/syslog.h>
@@ -71,6 +72,8 @@ EXPORT_SYMBOL_GPL(console_printk);
atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
EXPORT_SYMBOL(ignore_console_lock_warning);
+EXPORT_TRACEPOINT_SYMBOL_GPL(console);
+
/*
* Low level drivers may need that to know if they can schedule in
* their unblank() callback or not. So let's export it.
@@ -79,13 +82,20 @@ int oops_in_progress;
EXPORT_SYMBOL(oops_in_progress);
/*
- * console_sem protects the console_drivers list, and also
- * provides serialisation for access to the entire console
- * driver system.
+ * console_mutex protects console_list updates and console->flags updates.
+ * The flags are synchronized only for consoles that are registered, i.e.
+ * accessible via the console list.
*/
-static DEFINE_SEMAPHORE(console_sem);
-struct console *console_drivers;
-EXPORT_SYMBOL_GPL(console_drivers);
+static DEFINE_MUTEX(console_mutex);
+
+/*
+ * console_sem protects updates to console->seq
+ * and also provides serialization for console printing.
+ */
+static DEFINE_SEMAPHORE(console_sem, 1);
+HLIST_HEAD(console_list);
+EXPORT_SYMBOL_GPL(console_list);
+DEFINE_STATIC_SRCU(console_srcu);
/*
* System may need to suppress printk message under certain
@@ -97,6 +107,20 @@ int __read_mostly suppress_printk;
static struct lockdep_map console_lock_dep_map = {
.name = "console_lock"
};
+
+void lockdep_assert_console_list_lock_held(void)
+{
+ lockdep_assert_held(&console_mutex);
+}
+EXPORT_SYMBOL(lockdep_assert_console_list_lock_held);
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+bool console_srcu_read_lock_is_held(void)
+{
+ return srcu_read_lock_held(&console_srcu);
+}
+EXPORT_SYMBOL(console_srcu_read_lock_is_held);
#endif
enum devkmsg_log_bits {
@@ -146,16 +170,18 @@ static int __control_devkmsg(char *str)
static int __init control_devkmsg(char *str)
{
- if (__control_devkmsg(str) < 0)
+ if (__control_devkmsg(str) < 0) {
+ pr_warn("printk.devkmsg: bad option string '%s'\n", str);
return 1;
+ }
/*
* Set sysctl string accordingly:
*/
if (devkmsg_log == DEVKMSG_LOG_MASK_ON)
- strcpy(devkmsg_log_str, "on");
+ strscpy(devkmsg_log_str, "on");
else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF)
- strcpy(devkmsg_log_str, "off");
+ strscpy(devkmsg_log_str, "off");
/* else "ratelimit" which is set by default. */
/*
@@ -166,13 +192,13 @@ static int __init control_devkmsg(char *str)
*/
devkmsg_log |= DEVKMSG_LOG_MASK_LOCK;
- return 0;
+ return 1;
}
__setup("printk.devkmsg=", control_devkmsg);
char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
-
-int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
+#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
+int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
char old_str[DEVKMSG_STR_MAX_SIZE];
@@ -184,7 +210,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
return -EINVAL;
old = devkmsg_log;
- strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE);
+ strscpy(old_str, devkmsg_log_str);
}
err = proc_dostring(table, write, buffer, lenp, ppos);
@@ -202,7 +228,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
/* ... and restore old setting. */
devkmsg_log = old;
- strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE);
+ strscpy(devkmsg_log_str, old_str);
return -EINVAL;
}
@@ -210,9 +236,72 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
return 0;
}
+#endif /* CONFIG_PRINTK && CONFIG_SYSCTL */
+
+/**
+ * console_list_lock - Lock the console list
+ *
+ * For console list or console->flags updates
+ */
+void console_list_lock(void)
+{
+ /*
+ * In unregister_console() and console_force_preferred_locked(),
+ * synchronize_srcu() is called with the console_list_lock held.
+ * Therefore it is not allowed that the console_list_lock is taken
+ * with the srcu_lock held.
+ *
+ * Detecting if this context is really in the read-side critical
+ * section is only possible if the appropriate debug options are
+ * enabled.
+ */
+ WARN_ON_ONCE(debug_lockdep_rcu_enabled() &&
+ srcu_read_lock_held(&console_srcu));
+
+ mutex_lock(&console_mutex);
+}
+EXPORT_SYMBOL(console_list_lock);
+
+/**
+ * console_list_unlock - Unlock the console list
+ *
+ * Counterpart to console_list_lock()
+ */
+void console_list_unlock(void)
+{
+ mutex_unlock(&console_mutex);
+}
+EXPORT_SYMBOL(console_list_unlock);
+
+/**
+ * console_srcu_read_lock - Register a new reader for the
+ * SRCU-protected console list
+ *
+ * Use for_each_console_srcu() to iterate the console list
+ *
+ * Context: Any context.
+ * Return: A cookie to pass to console_srcu_read_unlock().
+ */
+int console_srcu_read_lock(void)
+ __acquires(&console_srcu)
+{
+ return srcu_read_lock_nmisafe(&console_srcu);
+}
+EXPORT_SYMBOL(console_srcu_read_lock);
-/* Number of registered extended console drivers. */
-static int nr_ext_console_drivers;
+/**
+ * console_srcu_read_unlock - Unregister an old reader from
+ * the SRCU-protected console list
+ * @cookie: cookie returned from console_srcu_read_lock()
+ *
+ * Counterpart to console_srcu_read_lock()
+ */
+void console_srcu_read_unlock(int cookie)
+ __releases(&console_srcu)
+{
+ srcu_read_unlock_nmisafe(&console_srcu, cookie);
+}
+EXPORT_SYMBOL(console_srcu_read_unlock);
/*
* Helper macros to handle lockdep when locking/unlocking console_sem. We use
@@ -256,20 +345,43 @@ static void __up_console_sem(unsigned long ip)
}
#define up_console_sem() __up_console_sem(_RET_IP_)
+static bool panic_in_progress(void)
+{
+ return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
+}
+
+/* Return true if a panic is in progress on the current CPU. */
+bool this_cpu_in_panic(void)
+{
+ /*
+ * We can use raw_smp_processor_id() here because it is impossible for
+ * the task to be migrated to the panic_cpu, or away from it. If
+ * panic_cpu has already been set, and we're not currently executing on
+ * that CPU, then we never will be.
+ */
+ return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
+}
+
+/*
+ * Return true if a panic is in progress on a remote CPU.
+ *
+ * On true, the local CPU should immediately release any printing resources
+ * that may be needed by the panic CPU.
+ */
+bool other_cpu_in_panic(void)
+{
+ return (panic_in_progress() && !this_cpu_in_panic());
+}
+
/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
* definitely not the perfect debug tool (we don't know if _WE_
* hold it and are racing, but it helps tracking those weird code
* paths in the console code where we end up in places I want
- * locked without the console sempahore held).
- */
-static int console_locked, console_suspended;
-
-/*
- * If exclusive_console is non-NULL then only this console is to be printed to.
+ * locked without the console semaphore held).
*/
-static struct console *exclusive_console;
+static int console_locked;
/*
* Array of consoles built from command line options (console=)
@@ -280,7 +392,6 @@ static struct console *exclusive_console;
static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
static int preferred_console = -1;
-static bool has_preferred_console;
int console_set_on_cmdline;
EXPORT_SYMBOL(console_set_on_cmdline);
@@ -350,67 +461,61 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
* non-prinatable characters are escaped in the "\xff" notation.
*/
-enum log_flags {
- LOG_NEWLINE = 2, /* text ended with a newline */
- LOG_CONT = 8, /* text is a fragment of a continuation line */
-};
+/* syslog_lock protects syslog_* variables and write access to clear_seq. */
+static DEFINE_MUTEX(syslog_lock);
/*
- * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken
- * within the scheduler's rq lock. It must be released before calling
- * console_unlock() or anything else that might wake up a process.
+ * Specifies if a legacy console is registered. If legacy consoles are
+ * present, it is necessary to perform the console lock/unlock dance
+ * whenever console flushing should occur.
*/
-DEFINE_RAW_SPINLOCK(logbuf_lock);
+bool have_legacy_console;
/*
- * Helper macros to lock/unlock logbuf_lock and switch between
- * printk-safe/unsafe modes.
+ * Specifies if an nbcon console is registered. If nbcon consoles are present,
+ * synchronous printing of legacy consoles will not occur during panic until
+ * the backtrace has been stored to the ringbuffer.
*/
-#define logbuf_lock_irq() \
- do { \
- printk_safe_enter_irq(); \
- raw_spin_lock(&logbuf_lock); \
- } while (0)
-
-#define logbuf_unlock_irq() \
- do { \
- raw_spin_unlock(&logbuf_lock); \
- printk_safe_exit_irq(); \
- } while (0)
+bool have_nbcon_console;
-#define logbuf_lock_irqsave(flags) \
- do { \
- printk_safe_enter_irqsave(flags); \
- raw_spin_lock(&logbuf_lock); \
- } while (0)
+/*
+ * Specifies if a boot console is registered. If boot consoles are present,
+ * nbcon consoles cannot print simultaneously and must be synchronized by
+ * the console lock. This is because boot consoles and nbcon consoles may
+ * have mapped the same hardware.
+ */
+bool have_boot_console;
-#define logbuf_unlock_irqrestore(flags) \
- do { \
- raw_spin_unlock(&logbuf_lock); \
- printk_safe_exit_irqrestore(flags); \
- } while (0)
+/* See printk_legacy_allow_panic_sync() for details. */
+bool legacy_allow_panic_sync;
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
+static DECLARE_WAIT_QUEUE_HEAD(legacy_wait);
+/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static size_t syslog_partial;
static bool syslog_time;
-/* the next printk record to write to the console */
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
+/* True when _all_ printer threads are available for printing. */
+bool printk_kthreads_running;
-/* the next printk record to read after the last 'clear' command */
-static u64 clear_seq;
+struct latched_seq {
+ seqcount_latch_t latch;
+ u64 val[2];
+};
-#ifdef CONFIG_PRINTK_CALLER
-#define PREFIX_MAX 48
-#else
-#define PREFIX_MAX 32
-#endif
-#define LOG_LINE_MAX (1024 - PREFIX_MAX)
+/*
+ * The next printk record to read after the last 'clear' command. There are
+ * two copies (updated with seqcount_latch) so that reads can locklessly
+ * access a valid value. Writers are synchronized by @syslog_lock.
+ */
+static struct latched_seq clear_seq = {
+ .latch = SEQCNT_LATCH_ZERO(clear_seq.latch),
+ .val[0] = 0,
+ .val[1] = 0,
+};
#define LOG_LEVEL(v) ((v) & 0x07)
#define LOG_FACILITY(v) ((v) >> 3 & 0xff)
@@ -418,7 +523,7 @@ static u64 clear_seq;
/* record buffer */
#define LOG_ALIGN __alignof__(unsigned long)
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
-#define LOG_BUF_LEN_MAX (u32)(1 << 31)
+#define LOG_BUF_LEN_MAX ((u32)1 << 31)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;
@@ -438,20 +543,46 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
static struct printk_ringbuffer printk_rb_dynamic;
-static struct printk_ringbuffer *prb = &printk_rb_static;
+struct printk_ringbuffer *prb = &printk_rb_static;
/*
* We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
* per_cpu_areas are initialised. This variable is set to true when
* it's safe to access per-CPU data.
*/
-static bool __printk_percpu_data_ready __read_mostly;
+static bool __printk_percpu_data_ready __ro_after_init;
bool printk_percpu_data_ready(void)
{
return __printk_percpu_data_ready;
}
+/* Must be called under syslog_lock. */
+static void latched_seq_write(struct latched_seq *ls, u64 val)
+{
+ write_seqcount_latch_begin(&ls->latch);
+ ls->val[0] = val;
+ write_seqcount_latch(&ls->latch);
+ ls->val[1] = val;
+ write_seqcount_latch_end(&ls->latch);
+}
+
+/* Can be called from any context. */
+static u64 latched_seq_read_nolock(struct latched_seq *ls)
+{
+ unsigned int seq;
+ unsigned int idx;
+ u64 val;
+
+ do {
+ seq = read_seqcount_latch(&ls->latch);
+ idx = seq & 0x1;
+ val = ls->val[idx];
+ } while (read_seqcount_latch_retry(&ls->latch, seq));
+
+ return val;
+}
+
/* Return log buffer address */
char *log_buf_addr_get(void)
{
@@ -517,17 +648,6 @@ static int check_syslog_permissions(int type, int source)
if (syslog_action_restricted(type)) {
if (capable(CAP_SYSLOG))
goto ok;
- /*
- * For historical reasons, accept CAP_SYS_ADMIN too, with
- * a warning.
- */
- if (capable(CAP_SYS_ADMIN)) {
- pr_warn_once("%s (%d): Attempt to access syslog with "
- "CAP_SYS_ADMIN but no CAP_SYSLOG "
- "(deprecated).\n",
- current->comm, task_pid_nr(current));
- goto ok;
- }
return -EPERM;
}
ok:
@@ -619,14 +739,10 @@ out:
/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
- u64 seq;
+ atomic64_t seq;
struct ratelimit_state rs;
struct mutex lock;
- char buf[CONSOLE_EXT_LOG_MAX];
-
- struct printk_info info;
- char text_buf[CONSOLE_EXT_LOG_MAX];
- struct printk_record record;
+ struct printk_buffers pbufs;
};
static __printf(3, 4) __cold
@@ -652,7 +768,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
size_t len = iov_iter_count(from);
ssize_t ret = len;
- if (!user || len > LOG_LINE_MAX)
+ if (len > PRINTKRB_RECORD_MAX)
return -EINVAL;
/* Ignore when user logging is disabled. */
@@ -708,59 +824,58 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct devkmsg_user *user = file->private_data;
- struct printk_record *r = &user->record;
- size_t len;
+ char *outbuf = &user->pbufs.outbuf[0];
+ struct printk_message pmsg = {
+ .pbufs = &user->pbufs,
+ };
ssize_t ret;
- if (!user)
- return -EBADF;
-
ret = mutex_lock_interruptible(&user->lock);
if (ret)
return ret;
- logbuf_lock_irq();
- if (!prb_read_valid(prb, user->seq, r)) {
+ if (!printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, false)) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
- logbuf_unlock_irq();
goto out;
}
- logbuf_unlock_irq();
+ /*
+ * Guarantee this task is visible on the waitqueue before
+ * checking the wake condition.
+ *
+ * The full memory barrier within set_current_state() of
+ * prepare_to_wait_event() pairs with the full memory barrier
+ * within wq_has_sleeper().
+ *
+ * This pairs with __wake_up_klogd:A.
+ */
ret = wait_event_interruptible(log_wait,
- prb_read_valid(prb, user->seq, r));
+ printk_get_next_message(&pmsg, atomic64_read(&user->seq), true,
+ false)); /* LMM(devkmsg_read:A) */
if (ret)
goto out;
- logbuf_lock_irq();
}
- if (user->seq < prb_first_valid_seq(prb)) {
+ if (pmsg.dropped) {
/* our last seen message is gone, return error and reset */
- user->seq = prb_first_valid_seq(prb);
+ atomic64_set(&user->seq, pmsg.seq);
ret = -EPIPE;
- logbuf_unlock_irq();
goto out;
}
- len = info_print_ext_header(user->buf, sizeof(user->buf), r->info);
- len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
- &r->text_buf[0], r->info->text_len,
- &r->info->dev_info);
-
- user->seq = r->info->seq + 1;
- logbuf_unlock_irq();
+ atomic64_set(&user->seq, pmsg.seq + 1);
- if (len > count) {
+ if (pmsg.outbuf_len > count) {
ret = -EINVAL;
goto out;
}
- if (copy_to_user(buf, user->buf, len)) {
+ if (copy_to_user(buf, outbuf, pmsg.outbuf_len)) {
ret = -EFAULT;
goto out;
}
- ret = len;
+ ret = pmsg.outbuf_len;
out:
mutex_unlock(&user->lock);
return ret;
@@ -779,16 +894,13 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
struct devkmsg_user *user = file->private_data;
loff_t ret = 0;
- if (!user)
- return -EBADF;
if (offset)
return -ESPIPE;
- logbuf_lock_irq();
switch (whence) {
case SEEK_SET:
/* the first record */
- user->seq = prb_first_valid_seq(prb);
+ atomic64_set(&user->seq, prb_first_valid_seq(prb));
break;
case SEEK_DATA:
/*
@@ -796,38 +908,33 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
* like issued by 'dmesg -c'. Reading /dev/kmsg itself
* changes no global state, and does not clear anything.
*/
- user->seq = clear_seq;
+ atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq));
break;
case SEEK_END:
/* after the last record */
- user->seq = prb_next_seq(prb);
+ atomic64_set(&user->seq, prb_next_seq(prb));
break;
default:
ret = -EINVAL;
}
- logbuf_unlock_irq();
return ret;
}
static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
{
struct devkmsg_user *user = file->private_data;
+ struct printk_info info;
__poll_t ret = 0;
- if (!user)
- return EPOLLERR|EPOLLNVAL;
-
poll_wait(file, &log_wait, wait);
- logbuf_lock_irq();
- if (prb_read_valid(prb, user->seq, NULL)) {
+ if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) {
/* return error when data has vanished underneath us */
- if (user->seq < prb_first_valid_seq(prb))
+ if (info.seq != atomic64_read(&user->seq))
ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
else
ret = EPOLLIN|EPOLLRDNORM;
}
- logbuf_unlock_irq();
return ret;
}
@@ -848,7 +955,7 @@ static int devkmsg_open(struct inode *inode, struct file *file)
return err;
}
- user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL);
+ user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL);
if (!user)
return -ENOMEM;
@@ -857,12 +964,7 @@ static int devkmsg_open(struct inode *inode, struct file *file)
mutex_init(&user->lock);
- prb_rec_init_rd(&user->record, &user->info,
- &user->text_buf[0], sizeof(user->text_buf));
-
- logbuf_lock_irq();
- user->seq = prb_first_valid_seq(prb);
- logbuf_unlock_irq();
+ atomic64_set(&user->seq, prb_first_valid_seq(prb));
file->private_data = user;
return 0;
@@ -872,13 +974,10 @@ static int devkmsg_release(struct inode *inode, struct file *file)
{
struct devkmsg_user *user = file->private_data;
- if (!user)
- return 0;
-
ratelimit_state_exit(&user->rs);
mutex_destroy(&user->lock);
- kfree(user);
+ kvfree(user);
return 0;
}
@@ -891,7 +990,7 @@ const struct file_operations kmsg_fops = {
.release = devkmsg_release,
};
-#ifdef CONFIG_CRASH_CORE
+#ifdef CONFIG_VMCORE_INFO
/*
* This appends the listed symbols to /proc/vmcore
*
@@ -954,6 +1053,9 @@ void log_buf_vmcoreinfo_setup(void)
VMCOREINFO_SIZE(atomic_long_t);
VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter);
+
+ VMCOREINFO_STRUCT_SIZE(latched_seq);
+ VMCOREINFO_OFFSET(latched_seq, val);
}
#endif
@@ -1025,9 +1127,6 @@ static inline void log_buf_add_cpu(void) {}
static void __init set_percpu_data_ready(void)
{
- printk_safe_init();
- /* Make sure we set this flag only after printk_safe() init is done */
- barrier();
__printk_percpu_data_ready = true;
}
@@ -1056,7 +1155,18 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
return prb_record_text_space(&e);
}
-static char setup_text_buf[LOG_LINE_MAX] __initdata;
+static char setup_text_buf[PRINTKRB_RECORD_MAX] __initdata;
+
+static void print_log_buf_usage_stats(void)
+{
+ unsigned int descs_count = log_buf_len >> PRB_AVGBITS;
+ size_t meta_data_size;
+
+ meta_data_size = descs_count * (sizeof(struct prb_desc) + sizeof(struct printk_info));
+
+ pr_info("log buffer data + meta data: %u + %zu = %zu bytes\n",
+ log_buf_len, meta_data_size, log_buf_len + meta_data_size);
+}
void __init setup_log_buf(int early)
{
@@ -1065,6 +1175,7 @@ void __init setup_log_buf(int early)
struct prb_desc *new_descs;
struct printk_info info;
struct printk_record r;
+ unsigned int text_size;
size_t new_descs_size;
size_t new_infos_size;
unsigned long flags;
@@ -1086,20 +1197,25 @@ void __init setup_log_buf(int early)
if (!early && !new_log_buf_len)
log_buf_add_cpu();
- if (!new_log_buf_len)
+ if (!new_log_buf_len) {
+ /* Show the memory stats only once. */
+ if (!early)
+ goto out;
+
return;
+ }
new_descs_count = new_log_buf_len >> PRB_AVGBITS;
if (new_descs_count == 0) {
pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len);
- return;
+ goto out;
}
new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
if (unlikely(!new_log_buf)) {
pr_err("log_buf_len: %lu text bytes not available\n",
new_log_buf_len);
- return;
+ goto out;
}
new_descs_size = new_descs_count * sizeof(struct prb_desc);
@@ -1125,39 +1241,54 @@ void __init setup_log_buf(int early)
new_descs, ilog2(new_descs_count),
new_infos);
- printk_safe_enter_irqsave(flags);
+ local_irq_save(flags);
log_buf_len = new_log_buf_len;
log_buf = new_log_buf;
new_log_buf_len = 0;
free = __LOG_BUF_LEN;
- prb_for_each_record(0, &printk_rb_static, seq, &r)
- free -= add_to_rb(&printk_rb_dynamic, &r);
+ prb_for_each_record(0, &printk_rb_static, seq, &r) {
+ text_size = add_to_rb(&printk_rb_dynamic, &r);
+ if (text_size > free)
+ free = 0;
+ else
+ free -= text_size;
+ }
- /*
- * This is early enough that everything is still running on the
- * boot CPU and interrupts are disabled. So no new messages will
- * appear during the transition to the dynamic buffer.
- */
prb = &printk_rb_dynamic;
- printk_safe_exit_irqrestore(flags);
+ local_irq_restore(flags);
+
+ /*
+ * Copy any remaining messages that might have appeared from
+ * NMI context after copying but before switching to the
+ * dynamic buffer.
+ */
+ prb_for_each_record(seq, &printk_rb_static, seq, &r) {
+ text_size = add_to_rb(&printk_rb_dynamic, &r);
+ if (text_size > free)
+ free = 0;
+ else
+ free -= text_size;
+ }
if (seq != prb_next_seq(&printk_rb_static)) {
pr_err("dropped %llu messages\n",
prb_next_seq(&printk_rb_static) - seq);
}
- pr_info("log_buf_len: %u bytes\n", log_buf_len);
+ print_log_buf_usage_stats();
pr_info("early log buf free: %u(%u%%)\n",
free, (free * 100) / __LOG_BUF_LEN);
return;
err_free_descs:
- memblock_free(__pa(new_descs), new_descs_size);
+ memblock_free(new_descs, new_descs_size);
err_free_log_buf:
- memblock_free(__pa(new_log_buf), new_log_buf_len);
+ memblock_free(new_log_buf, new_log_buf_len);
+out:
+ print_log_buf_usage_stats();
}
static bool __read_mostly ignore_loglevel;
@@ -1207,11 +1338,11 @@ static void boot_delay_msec(int level)
{
unsigned long long k;
unsigned long timeout;
+ bool suppress = !is_printk_force_console() &&
+ suppress_message_printing(level);
- if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
- || suppress_message_printing(level)) {
+ if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) || suppress)
return;
- }
k = (unsigned long long)loops_per_msec * boot_delay;
@@ -1308,7 +1439,7 @@ static size_t record_print_text(struct printk_record *r, bool syslog,
size_t text_len = r->info->text_len;
size_t buf_size = r->text_buf_size;
char *text = r->text_buf;
- char prefix[PREFIX_MAX];
+ char prefix[PRINTK_PREFIX_MAX];
bool truncated = false;
size_t prefix_len;
size_t line_len;
@@ -1407,7 +1538,7 @@ static size_t get_record_print_text_size(struct printk_info *info,
unsigned int line_count,
bool syslog, bool time)
{
- char prefix[PREFIX_MAX];
+ char prefix[PRINTK_PREFIX_MAX];
size_t prefix_len;
prefix_len = info_print_prefix(info, syslog, time, prefix);
@@ -1420,28 +1551,105 @@ static size_t get_record_print_text_size(struct printk_info *info,
return ((prefix_len * line_count) + info->text_len + 1);
}
+/*
+ * Beginning with @start_seq, find the first record where it and all following
+ * records up to (but not including) @max_seq fit into @size.
+ *
+ * @max_seq is simply an upper bound and does not need to exist. If the caller
+ * does not require an upper bound, -1 can be used for @max_seq.
+ */
+static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
+ bool syslog, bool time)
+{
+ struct printk_info info;
+ unsigned int line_count;
+ size_t len = 0;
+ u64 seq;
+
+ /* Determine the size of the records up to @max_seq. */
+ prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
+ if (info.seq >= max_seq)
+ break;
+ len += get_record_print_text_size(&info, line_count, syslog, time);
+ }
+
+ /*
+ * Adjust the upper bound for the next loop to avoid subtracting
+ * lengths that were never added.
+ */
+ if (seq < max_seq)
+ max_seq = seq;
+
+ /*
+ * Move first record forward until length fits into the buffer. Ignore
+ * newest messages that were not counted in the above cycle. Messages
+ * might appear and get lost in the meantime. This is a best effort
+ * that prevents an infinite loop that could occur with a retry.
+ */
+ prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
+ if (len <= size || info.seq >= max_seq)
+ break;
+ len -= get_record_print_text_size(&info, line_count, syslog, time);
+ }
+
+ return seq;
+}
+
+/* The caller is responsible for making sure @size is greater than 0. */
static int syslog_print(char __user *buf, int size)
{
struct printk_info info;
struct printk_record r;
char *text;
int len = 0;
+ u64 seq;
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+ text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
+ prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX);
+
+ mutex_lock(&syslog_lock);
+
+ /*
+ * Wait for the @syslog_seq record to be available. @syslog_seq may
+ * change while waiting.
+ */
+ do {
+ seq = syslog_seq;
+
+ mutex_unlock(&syslog_lock);
+ /*
+ * Guarantee this task is visible on the waitqueue before
+ * checking the wake condition.
+ *
+ * The full memory barrier within set_current_state() of
+ * prepare_to_wait_event() pairs with the full memory barrier
+ * within wq_has_sleeper().
+ *
+ * This pairs with __wake_up_klogd:A.
+ */
+ len = wait_event_interruptible(log_wait,
+ prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */
+ mutex_lock(&syslog_lock);
+
+ if (len)
+ goto out;
+ } while (syslog_seq != seq);
- while (size > 0) {
+ /*
+ * Copy records that fit into the buffer. The above cycle makes sure
+ * that the first record is always available.
+ */
+ do {
size_t n;
size_t skip;
+ int err;
- logbuf_lock_irq();
- if (!prb_read_valid(prb, syslog_seq, &r)) {
- logbuf_unlock_irq();
+ if (!prb_read_valid(prb, syslog_seq, &r))
break;
- }
+
if (r.info->seq != syslog_seq) {
/* message is gone, move to next valid one */
syslog_seq = r.info->seq;
@@ -1468,12 +1676,15 @@ static int syslog_print(char __user *buf, int size)
syslog_partial += n;
} else
n = 0;
- logbuf_unlock_irq();
if (!n)
break;
- if (copy_to_user(buf, text + skip, n)) {
+ mutex_unlock(&syslog_lock);
+ err = copy_to_user(buf, text + skip, n);
+ mutex_lock(&syslog_lock);
+
+ if (err) {
if (!len)
len = -EFAULT;
break;
@@ -1482,8 +1693,9 @@ static int syslog_print(char __user *buf, int size)
len += n;
size -= n;
buf += n;
- }
-
+ } while (size);
+out:
+ mutex_unlock(&syslog_lock);
kfree(text);
return len;
}
@@ -1491,36 +1703,26 @@ static int syslog_print(char __user *buf, int size)
static int syslog_print_all(char __user *buf, int size, bool clear)
{
struct printk_info info;
- unsigned int line_count;
struct printk_record r;
char *text;
int len = 0;
u64 seq;
bool time;
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+ text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
time = printk_time;
- logbuf_lock_irq();
/*
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
*/
- prb_for_each_info(clear_seq, prb, seq, &info, &line_count)
- len += get_record_print_text_size(&info, line_count, true, time);
-
- /* move first record forward until length fits into the buffer */
- prb_for_each_info(clear_seq, prb, seq, &info, &line_count) {
- if (len <= size)
- break;
- len -= get_record_print_text_size(&info, line_count, true, time);
- }
+ seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
+ size, true, time);
- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
+ prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX);
- len = 0;
prb_for_each_record(seq, prb, seq, &r) {
int textlen;
@@ -1531,20 +1733,20 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
break;
}
- logbuf_unlock_irq();
if (copy_to_user(buf + len, text, textlen))
len = -EFAULT;
else
len += textlen;
- logbuf_lock_irq();
if (len < 0)
break;
}
- if (clear)
- clear_seq = seq;
- logbuf_unlock_irq();
+ if (clear) {
+ mutex_lock(&syslog_lock);
+ latched_seq_write(&clear_seq, seq);
+ mutex_unlock(&syslog_lock);
+ }
kfree(text);
return len;
@@ -1552,13 +1754,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
static void syslog_clear(void)
{
- logbuf_lock_irq();
- clear_seq = prb_next_seq(prb);
- logbuf_unlock_irq();
+ mutex_lock(&syslog_lock);
+ latched_seq_write(&clear_seq, prb_next_seq(prb));
+ mutex_unlock(&syslog_lock);
}
int do_syslog(int type, char __user *buf, int len, int source)
{
+ struct printk_info info;
bool clear = false;
static int saved_console_loglevel = LOGLEVEL_DEFAULT;
int error;
@@ -1579,10 +1782,6 @@ int do_syslog(int type, char __user *buf, int len, int source)
return 0;
if (!access_ok(buf, len))
return -EFAULT;
- error = wait_event_interruptible(log_wait,
- prb_read_valid(prb, syslog_seq, NULL));
- if (error)
- return error;
error = syslog_print(buf, len);
break;
/* Read/clear last kernel messages */
@@ -1628,10 +1827,15 @@ int do_syslog(int type, char __user *buf, int len, int source)
break;
/* Number of chars in the log buffer */
case SYSLOG_ACTION_SIZE_UNREAD:
- logbuf_lock_irq();
- if (syslog_seq < prb_first_valid_seq(prb)) {
+ mutex_lock(&syslog_lock);
+ if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
+ /* No unread messages. */
+ mutex_unlock(&syslog_lock);
+ return 0;
+ }
+ if (info.seq != syslog_seq) {
/* messages are gone, move to first one */
- syslog_seq = prb_first_valid_seq(prb);
+ syslog_seq = info.seq;
syslog_partial = 0;
}
if (source == SYSLOG_FROM_PROC) {
@@ -1643,7 +1847,6 @@ int do_syslog(int type, char __user *buf, int len, int source)
error = prb_next_seq(prb) - syslog_seq;
} else {
bool time = syslog_partial ? syslog_time : printk_time;
- struct printk_info info;
unsigned int line_count;
u64 seq;
@@ -1655,7 +1858,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
}
error -= syslog_partial;
}
- logbuf_unlock_irq();
+ mutex_unlock(&syslog_lock);
break;
/* Size of the log buffer */
case SYSLOG_ACTION_SIZE_BUFFER:
@@ -1698,12 +1901,25 @@ static bool console_waiter;
* there may be a waiter spinning (like a spinlock). Also it must be
* ready to hand over the lock at the end of the section.
*/
-static void console_lock_spinning_enable(void)
+void console_lock_spinning_enable(void)
{
+ /*
+ * Do not use spinning in panic(). The panic CPU wants to keep the lock.
+ * Non-panic CPUs abandon the flush anyway.
+ *
+ * Just keep the lockdep annotation. The panic-CPU should avoid
+ * taking console_owner_lock because it might cause a deadlock.
+ * This looks like the easiest way how to prevent false lockdep
+ * reports without handling races a lockless way.
+ */
+ if (panic_in_progress())
+ goto lockdep;
+
raw_spin_lock(&console_owner_lock);
console_owner = current;
raw_spin_unlock(&console_owner_lock);
+lockdep:
/* The waiter may spin on us after setting console_owner */
spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
}
@@ -1711,22 +1927,39 @@ static void console_lock_spinning_enable(void)
/**
* console_lock_spinning_disable_and_check - mark end of code where another
* thread was able to busy wait and check if there is a waiter
+ * @cookie: cookie returned from console_srcu_read_lock()
*
* This is called at the end of the section where spinning is allowed.
* It has two functions. First, it is a signal that it is no longer
* safe to start busy waiting for the lock. Second, it checks if
* there is a busy waiter and passes the lock rights to her.
*
- * Important: Callers lose the lock if there was a busy waiter.
- * They must not touch items synchronized by console_lock
- * in this case.
+ * Important: Callers lose both the console_lock and the SRCU read lock if
+ * there was a busy waiter. They must not touch items synchronized by
+ * console_lock or SRCU read lock in this case.
*
* Return: 1 if the lock rights were passed, 0 otherwise.
*/
-static int console_lock_spinning_disable_and_check(void)
+int console_lock_spinning_disable_and_check(int cookie)
{
int waiter;
+ /*
+ * Ignore spinning waiters during panic() because they might get stopped
+ * or blocked at any time,
+ *
+ * It is safe because nobody is allowed to start spinning during panic
+ * in the first place. If there has been a waiter then non panic CPUs
+ * might stay spinning. They would get stopped anyway. The panic context
+ * will never start spinning and an interrupted spin on panic CPU will
+ * never continue.
+ */
+ if (panic_in_progress()) {
+ /* Keep lockdep happy. */
+ spin_release(&console_owner_dep_map, _THIS_IP_);
+ return 0;
+ }
+
raw_spin_lock(&console_owner_lock);
waiter = READ_ONCE(console_waiter);
console_owner = NULL;
@@ -1743,6 +1976,12 @@ static int console_lock_spinning_disable_and_check(void)
spin_release(&console_owner_dep_map, _THIS_IP_);
/*
+ * Preserve lockdep lock ordering. Release the SRCU read lock before
+ * releasing the console_lock.
+ */
+ console_srcu_read_unlock(cookie);
+
+ /*
* Hand off console_lock to waiter. The waiter will perform
* the up(). After this, the waiter is the console_lock owner.
*/
@@ -1770,6 +2009,16 @@ static int console_trylock_spinning(void)
if (console_trylock())
return 1;
+ /*
+ * It's unsafe to spin once a panic has begun. If we are the
+ * panic CPU, we may have already halted the owner of the
+ * console_sem. If we are not the panic CPU, then we should
+ * avoid taking console_sem, so the panic CPU has a better
+ * chance of cleanly acquiring it later.
+ */
+ if (panic_in_progress())
+ return 0;
+
printk_safe_enter_irqsave(flags);
raw_spin_lock(&console_owner_lock);
@@ -1811,57 +2060,91 @@ static int console_trylock_spinning(void)
*/
mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
+ /*
+ * Update @console_may_schedule for trylock because the previous
+ * owner may have been schedulable.
+ */
+ console_may_schedule = 0;
+
return 1;
}
/*
- * Call the console drivers, asking them to write out
- * log_buf[start] to log_buf[end - 1].
- * The console_lock must be held.
+ * Recursion is tracked separately on each CPU. If NMIs are supported, an
+ * additional NMI context per CPU is also separately tracked. Until per-CPU
+ * is available, a separate "early tracking" is performed.
*/
-static void call_console_drivers(const char *ext_text, size_t ext_len,
- const char *text, size_t len)
-{
- static char dropped_text[64];
- size_t dropped_len = 0;
- struct console *con;
-
- trace_console_rcuidle(text, len);
-
- if (!console_drivers)
- return;
+static DEFINE_PER_CPU(u8, printk_count);
+static u8 printk_count_early;
+#ifdef CONFIG_HAVE_NMI
+static DEFINE_PER_CPU(u8, printk_count_nmi);
+static u8 printk_count_nmi_early;
+#endif
- if (console_dropped) {
- dropped_len = snprintf(dropped_text, sizeof(dropped_text),
- "** %lu printk messages dropped **\n",
- console_dropped);
- console_dropped = 0;
- }
+/*
+ * Recursion is limited to keep the output sane. printk() should not require
+ * more than 1 level of recursion (allowing, for example, printk() to trigger
+ * a WARN), but a higher value is used in case some printk-internal errors
+ * exist, such as the ringbuffer validation checks failing.
+ */
+#define PRINTK_MAX_RECURSION 3
- for_each_console(con) {
- if (exclusive_console && con != exclusive_console)
- continue;
- if (!(con->flags & CON_ENABLED))
- continue;
- if (!con->write)
- continue;
- if (!cpu_online(smp_processor_id()) &&
- !(con->flags & CON_ANYTIME))
- continue;
- if (con->flags & CON_EXTENDED)
- con->write(con, ext_text, ext_len);
- else {
- if (dropped_len)
- con->write(con, dropped_text, dropped_len);
- con->write(con, text, len);
- }
+/*
+ * Return a pointer to the dedicated counter for the CPU+context of the
+ * caller.
+ */
+static u8 *__printk_recursion_counter(void)
+{
+#ifdef CONFIG_HAVE_NMI
+ if (in_nmi()) {
+ if (printk_percpu_data_ready())
+ return this_cpu_ptr(&printk_count_nmi);
+ return &printk_count_nmi_early;
}
+#endif
+ if (printk_percpu_data_ready())
+ return this_cpu_ptr(&printk_count);
+ return &printk_count_early;
}
+/*
+ * Enter recursion tracking. Interrupts are disabled to simplify tracking.
+ * The caller must check the boolean return value to see if the recursion is
+ * allowed. On failure, interrupts are not disabled.
+ *
+ * @recursion_ptr must be a variable of type (u8 *) and is the same variable
+ * that is passed to printk_exit_irqrestore().
+ */
+#define printk_enter_irqsave(recursion_ptr, flags) \
+({ \
+ bool success = true; \
+ \
+ typecheck(u8 *, recursion_ptr); \
+ local_irq_save(flags); \
+ (recursion_ptr) = __printk_recursion_counter(); \
+ if (*(recursion_ptr) > PRINTK_MAX_RECURSION) { \
+ local_irq_restore(flags); \
+ success = false; \
+ } else { \
+ (*(recursion_ptr))++; \
+ } \
+ success; \
+})
+
+/* Exit recursion tracking, restoring interrupts. */
+#define printk_exit_irqrestore(recursion_ptr, flags) \
+ do { \
+ typecheck(u8 *, recursion_ptr); \
+ (*(recursion_ptr))--; \
+ local_irq_restore(flags); \
+ } while (0)
+
int printk_delay_msec __read_mostly;
-static inline void printk_delay(void)
+static inline void printk_delay(int level)
{
+ boot_delay_msec(level);
+
if (unlikely(printk_delay_msec)) {
int m = printk_delay_msec;
@@ -1875,27 +2158,28 @@ static inline void printk_delay(void)
static inline u32 printk_caller_id(void)
{
return in_task() ? task_pid_nr(current) :
- 0x80000000 + raw_smp_processor_id();
+ 0x80000000 + smp_processor_id();
}
/**
- * parse_prefix - Parse level and control flags.
+ * printk_parse_prefix - Parse level and control flags.
*
* @text: The terminated text message.
* @level: A pointer to the current level value, will be updated.
- * @lflags: A pointer to the current log flags, will be updated.
+ * @flags: A pointer to the current printk_info flags, will be updated.
*
* @level may be NULL if the caller is not interested in the parsed value.
* Otherwise the variable pointed to by @level must be set to
* LOGLEVEL_DEFAULT in order to be updated with the parsed value.
*
- * @lflags may be NULL if the caller is not interested in the parsed value.
- * Otherwise the variable pointed to by @lflags will be OR'd with the parsed
+ * @flags may be NULL if the caller is not interested in the parsed value.
+ * Otherwise the variable pointed to by @flags will be OR'd with the parsed
* value.
*
* Return: The length of the parsed level and control flags.
*/
-static u16 parse_prefix(char *text, int *level, enum log_flags *lflags)
+u16 printk_parse_prefix(const char *text, int *level,
+ enum printk_info_flags *flags)
{
u16 prefix_len = 0;
int kern_level;
@@ -1911,8 +2195,8 @@ static u16 parse_prefix(char *text, int *level, enum log_flags *lflags)
*level = kern_level - '0';
break;
case 'c': /* KERN_CONT */
- if (lflags)
- *lflags |= LOG_CONT;
+ if (flags)
+ *flags |= LOG_CONT;
}
prefix_len += 2;
@@ -1922,8 +2206,10 @@ static u16 parse_prefix(char *text, int *level, enum log_flags *lflags)
return prefix_len;
}
-static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lflags,
- const char *fmt, va_list args)
+__printf(5, 0)
+static u16 printk_sprint(char *text, u16 size, int facility,
+ enum printk_info_flags *flags, const char *fmt,
+ va_list args)
{
u16 text_len;
@@ -1932,20 +2218,22 @@ static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lfl
/* Mark and strip a trailing newline. */
if (text_len && text[text_len - 1] == '\n') {
text_len--;
- *lflags |= LOG_NEWLINE;
+ *flags |= LOG_NEWLINE;
}
/* Strip log level and control flags. */
if (facility == 0) {
u16 prefix_len;
- prefix_len = parse_prefix(text, NULL, NULL);
+ prefix_len = printk_parse_prefix(text, NULL, NULL);
if (prefix_len) {
text_len -= prefix_len;
memmove(text, text + prefix_len, text_len);
}
}
+ trace_console(text, text_len);
+
return text_len;
}
@@ -1954,17 +2242,23 @@ int vprintk_store(int facility, int level,
const struct dev_printk_info *dev_info,
const char *fmt, va_list args)
{
- const u32 caller_id = printk_caller_id();
struct prb_reserved_entry e;
- enum log_flags lflags = 0;
+ enum printk_info_flags flags = 0;
struct printk_record r;
+ unsigned long irqflags;
u16 trunc_msg_len = 0;
char prefix_buf[8];
+ u8 *recursion_ptr;
u16 reserve_size;
va_list args2;
+ u32 caller_id;
u16 text_len;
+ int ret = 0;
u64 ts_nsec;
+ if (!printk_enter_irqsave(recursion_ptr, irqflags))
+ return 0;
+
/*
* Since the duration of printk() can vary depending on the message
* and state of the ringbuffer, grab the timestamp now so that it is
@@ -1973,6 +2267,8 @@ int vprintk_store(int facility, int level,
*/
ts_nsec = local_clock();
+ caller_id = printk_caller_id();
+
/*
* The sprintf needs to come first since the syslog prefix might be
* passed in as a parameter. An extra byte must be reserved so that
@@ -1983,34 +2279,41 @@ int vprintk_store(int facility, int level,
reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1;
va_end(args2);
- if (reserve_size > LOG_LINE_MAX)
- reserve_size = LOG_LINE_MAX;
+ if (reserve_size > PRINTKRB_RECORD_MAX)
+ reserve_size = PRINTKRB_RECORD_MAX;
/* Extract log level or control flags. */
if (facility == 0)
- parse_prefix(&prefix_buf[0], &level, &lflags);
+ printk_parse_prefix(&prefix_buf[0], &level, &flags);
if (level == LOGLEVEL_DEFAULT)
level = default_message_loglevel;
if (dev_info)
- lflags |= LOG_NEWLINE;
+ flags |= LOG_NEWLINE;
+
+ if (is_printk_force_console())
+ flags |= LOG_FORCE_CON;
- if (lflags & LOG_CONT) {
+ if (flags & LOG_CONT) {
prb_rec_init_wr(&r, reserve_size);
- if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
+ if (prb_reserve_in_last(&e, prb, &r, caller_id, PRINTKRB_RECORD_MAX)) {
text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
- facility, &lflags, fmt, args);
+ facility, &flags, fmt, args);
r.info->text_len += text_len;
- if (lflags & LOG_NEWLINE) {
+ if (flags & LOG_FORCE_CON)
+ r.info->flags |= LOG_FORCE_CON;
+
+ if (flags & LOG_NEWLINE) {
r.info->flags |= LOG_NEWLINE;
prb_final_commit(&e);
} else {
prb_commit(&e);
}
- return text_len;
+ ret = text_len;
+ goto out;
}
}
@@ -2026,156 +2329,153 @@ int vprintk_store(int facility, int level,
prb_rec_init_wr(&r, reserve_size + trunc_msg_len);
if (!prb_reserve(&e, prb, &r))
- return 0;
+ goto out;
}
/* fill message */
- text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args);
+ text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
if (trunc_msg_len)
memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len);
r.info->text_len = text_len + trunc_msg_len;
r.info->facility = facility;
r.info->level = level & 7;
- r.info->flags = lflags & 0x1f;
+ r.info->flags = flags & 0x1f;
r.info->ts_nsec = ts_nsec;
r.info->caller_id = caller_id;
if (dev_info)
memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
/* A message without a trailing newline can be continued. */
- if (!(lflags & LOG_NEWLINE))
+ if (!(flags & LOG_NEWLINE))
prb_commit(&e);
else
prb_final_commit(&e);
- return (text_len + trunc_msg_len);
+ ret = text_len + trunc_msg_len;
+out:
+ printk_exit_irqrestore(recursion_ptr, irqflags);
+ return ret;
+}
+
+/*
+ * This acts as a one-way switch to allow legacy consoles to print from
+ * the printk() caller context on a panic CPU. It also attempts to flush
+ * the legacy consoles in this context.
+ */
+void printk_legacy_allow_panic_sync(void)
+{
+ struct console_flush_type ft;
+
+ legacy_allow_panic_sync = true;
+
+ printk_get_console_flush_type(&ft);
+ if (ft.legacy_direct) {
+ if (console_trylock())
+ console_unlock();
+ }
}
asmlinkage int vprintk_emit(int facility, int level,
const struct dev_printk_info *dev_info,
const char *fmt, va_list args)
{
+ struct console_flush_type ft;
int printed_len;
- bool in_sched = false;
- unsigned long flags;
/* Suppress unimportant messages after panic happens */
if (unlikely(suppress_printk))
return 0;
+ /*
+ * The messages on the panic CPU are the most important. If
+ * non-panic CPUs are generating any messages, they will be
+ * silently dropped.
+ */
+ if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace)
+ return 0;
+
+ printk_get_console_flush_type(&ft);
+
+ /* If called from the scheduler, we can not call up(). */
if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
- in_sched = true;
+ ft.legacy_offload |= ft.legacy_direct;
+ ft.legacy_direct = false;
}
- boot_delay_msec(level);
- printk_delay();
+ printk_delay(level);
- printk_safe_enter_irqsave(flags);
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
- printk_safe_exit_irqrestore(flags);
- /* If called from the scheduler, we can not call up(). */
- if (!in_sched) {
+ if (ft.nbcon_atomic)
+ nbcon_atomic_flush_pending();
+
+ if (ft.nbcon_offload)
+ nbcon_kthreads_wake();
+
+ if (ft.legacy_direct) {
/*
- * Disable preemption to avoid being preempted while holding
- * console_sem which would prevent anyone from printing to
- * console
+ * The caller may be holding system-critical or
+ * timing-sensitive locks. Disable preemption during
+ * printing of all remaining records to all consoles so that
+ * this context can return as soon as possible. Hopefully
+ * another printk() caller will take over the printing.
*/
preempt_disable();
/*
* Try to acquire and then immediately release the console
- * semaphore. The release will print out buffers and wake up
- * /dev/kmsg and syslog() users.
+ * semaphore. The release will print out buffers. With the
+ * spinning variant, this context tries to take over the
+ * printing from another printing context.
*/
if (console_trylock_spinning())
console_unlock();
preempt_enable();
}
- wake_up_klogd();
+ if (ft.legacy_offload)
+ defer_console_output();
+ else
+ wake_up_klogd();
+
return printed_len;
}
EXPORT_SYMBOL(vprintk_emit);
-asmlinkage int vprintk(const char *fmt, va_list args)
-{
- return vprintk_func(fmt, args);
-}
-EXPORT_SYMBOL(vprintk);
-
int vprintk_default(const char *fmt, va_list args)
{
return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
}
EXPORT_SYMBOL_GPL(vprintk_default);
-/**
- * printk - print a kernel message
- * @fmt: format string
- *
- * This is printk(). It can be called from any context. We want it to work.
- *
- * We try to grab the console_lock. If we succeed, it's easy - we log the
- * output and call the console drivers. If we fail to get the semaphore, we
- * place the output into the log buffer and return. The current holder of
- * the console_sem will notice the new output in console_unlock(); and will
- * send it to the consoles before releasing the lock.
- *
- * One effect of this deferred printing is that code which calls printk() and
- * then changes console_loglevel may break. This is because console_loglevel
- * is inspected when the actual printing occurs.
- *
- * See also:
- * printf(3)
- *
- * See the vsnprintf() documentation for format string extensions over C99.
- */
-asmlinkage __visible int printk(const char *fmt, ...)
+asmlinkage __visible int _printk(const char *fmt, ...)
{
va_list args;
int r;
va_start(args, fmt);
- r = vprintk_func(fmt, args);
+ r = vprintk(fmt, args);
va_end(args);
return r;
}
-EXPORT_SYMBOL(printk);
+EXPORT_SYMBOL(_printk);
+
+static bool pr_flush(int timeout_ms, bool reset_on_progress);
+static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
#else /* CONFIG_PRINTK */
-#define LOG_LINE_MAX 0
-#define PREFIX_MAX 0
#define printk_time false
#define prb_read_valid(rb, seq, r) false
#define prb_first_valid_seq(rb) 0
+#define prb_next_seq(rb) 0
static u64 syslog_seq;
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
-static size_t record_print_text(const struct printk_record *r,
- bool syslog, bool time)
-{
- return 0;
-}
-static ssize_t info_print_ext_header(char *buf, size_t size,
- struct printk_info *info)
-{
- return 0;
-}
-static ssize_t msg_print_ext_body(char *buf, size_t size,
- char *text, size_t text_len,
- struct dev_printk_info *dev_info) { return 0; }
-static void console_lock_spinning_enable(void) { }
-static int console_lock_spinning_disable_and_check(void) { return 0; }
-static void call_console_drivers(const char *ext_text, size_t ext_len,
- const char *text, size_t len) {}
-static bool suppress_message_printing(int level) { return false; }
+static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; }
+static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
#endif /* CONFIG_PRINTK */
@@ -2199,24 +2499,51 @@ asmlinkage __visible void early_printk(const char *fmt, ...)
}
#endif
-static int __add_preferred_console(char *name, int idx, char *options,
+static void set_user_specified(struct console_cmdline *c, bool user_specified)
+{
+ if (!user_specified)
+ return;
+
+ /*
+ * @c console was defined by the user on the command line.
+ * Do not clear when added twice also by SPCR or the device tree.
+ */
+ c->user_specified = true;
+ /* At least one console defined by the user on the command line. */
+ console_set_on_cmdline = 1;
+}
+
+static int __add_preferred_console(const char *name, const short idx,
+ const char *devname, char *options,
char *brl_options, bool user_specified)
{
struct console_cmdline *c;
int i;
+ if (!name && !devname)
+ return -EINVAL;
+
+ /*
+ * We use a signed short index for struct console for device drivers to
+ * indicate a not yet assigned index or port. However, a negative index
+ * value is not valid when the console name and index are defined on
+ * the command line.
+ */
+ if (name && idx < 0)
+ return -EINVAL;
+
/*
* See if this tty is not yet registered, and
* if we have a slot free.
*/
for (i = 0, c = console_cmdline;
- i < MAX_CMDLINECONSOLES && c->name[0];
+ i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]);
i++, c++) {
- if (strcmp(c->name, name) == 0 && c->index == idx) {
+ if ((name && strcmp(c->name, name) == 0 && c->index == idx) ||
+ (devname && strcmp(c->devname, devname) == 0)) {
if (!brl_options)
preferred_console = i;
- if (user_specified)
- c->user_specified = true;
+ set_user_specified(c, user_specified);
return 0;
}
}
@@ -2224,9 +2551,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
return -E2BIG;
if (!brl_options)
preferred_console = i;
- strlcpy(c->name, name, sizeof(c->name));
+ if (name)
+ strscpy(c->name, name);
+ if (devname)
+ strscpy(c->devname, devname);
c->options = options;
- c->user_specified = user_specified;
+ set_user_specified(c, user_specified);
braille_set_options(c, brl_options);
c->index = idx;
@@ -2249,50 +2579,66 @@ __setup("console_msg_format=", console_msg_format_setup);
*/
static int __init console_setup(char *str)
{
- char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
- char *s, *options, *brl_options = NULL;
+ static_assert(sizeof(console_cmdline[0].devname) >= sizeof(console_cmdline[0].name) + 4);
+ char buf[sizeof(console_cmdline[0].devname)];
+ char *brl_options = NULL;
+ char *ttyname = NULL;
+ char *devname = NULL;
+ char *options;
+ char *s;
int idx;
/*
* console="" or console=null have been suggested as a way to
* disable console output. Use ttynull that has been created
- * for exacly this purpose.
+ * for exactly this purpose.
*/
if (str[0] == 0 || strcmp(str, "null") == 0) {
- __add_preferred_console("ttynull", 0, NULL, NULL, true);
+ __add_preferred_console("ttynull", 0, NULL, NULL, NULL, true);
return 1;
}
if (_braille_console_setup(&str, &brl_options))
return 1;
+ /* For a DEVNAME:0.0 style console the character device is unknown early */
+ if (strchr(str, ':'))
+ devname = buf;
+ else
+ ttyname = buf;
+
/*
* Decode str into name, index, options.
*/
- if (str[0] >= '0' && str[0] <= '9') {
- strcpy(buf, "ttyS");
- strncpy(buf + 4, str, sizeof(buf) - 5);
- } else {
- strncpy(buf, str, sizeof(buf) - 1);
- }
- buf[sizeof(buf) - 1] = 0;
+ if (ttyname && isdigit(str[0]))
+ scnprintf(buf, sizeof(buf), "ttyS%s", str);
+ else
+ strscpy(buf, str);
+
options = strchr(str, ',');
if (options)
*(options++) = 0;
+
#ifdef __sparc__
if (!strcmp(str, "ttya"))
- strcpy(buf, "ttyS0");
+ strscpy(buf, "ttyS0");
if (!strcmp(str, "ttyb"))
- strcpy(buf, "ttyS1");
+ strscpy(buf, "ttyS1");
#endif
+
for (s = buf; *s; s++)
- if (isdigit(*s) || *s == ',')
+ if ((ttyname && isdigit(*s)) || *s == ',')
break;
- idx = simple_strtoul(s, NULL, 10);
+
+ /* @idx will get defined when devname matches. */
+ if (devname)
+ idx = -1;
+ else
+ idx = simple_strtoul(s, NULL, 10);
+
*s = 0;
- __add_preferred_console(buf, idx, options, brl_options, true);
- console_set_on_cmdline = 1;
+ __add_preferred_console(ttyname, idx, devname, options, brl_options, true);
return 1;
}
__setup("console=", console_setup);
@@ -2310,10 +2656,55 @@ __setup("console=", console_setup);
* commonly to provide a default console (ie from PROM variables) when
* the user has not supplied one.
*/
-int add_preferred_console(char *name, int idx, char *options)
+int add_preferred_console(const char *name, const short idx, char *options)
+{
+ return __add_preferred_console(name, idx, NULL, options, NULL, false);
+}
+
+/**
+ * match_devname_and_update_preferred_console - Update a preferred console
+ * when matching devname is found.
+ * @devname: DEVNAME:0.0 style device name
+ * @name: Name of the corresponding console driver, e.g. "ttyS"
+ * @idx: Console index, e.g. port number.
+ *
+ * The function checks whether a device with the given @devname is
+ * preferred via the console=DEVNAME:0.0 command line option.
+ * It fills the missing console driver name and console index
+ * so that a later register_console() call could find (match)
+ * and enable this device.
+ *
+ * It might be used when a driver subsystem initializes particular
+ * devices with already known DEVNAME:0.0 style names. And it
+ * could predict which console driver name and index this device
+ * would later get associated with.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int match_devname_and_update_preferred_console(const char *devname,
+ const char *name,
+ const short idx)
{
- return __add_preferred_console(name, idx, options, NULL, false);
+ struct console_cmdline *c = console_cmdline;
+ int i;
+
+ if (!devname || !strlen(devname) || !name || !strlen(name) || idx < 0)
+ return -EINVAL;
+
+ for (i = 0; i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]);
+ i++, c++) {
+ if (!strcmp(devname, c->devname)) {
+ pr_info("associate the preferred console \"%s\" with \"%s%d\"\n",
+ devname, name, idx);
+ strscpy(c->name, name);
+ c->index = idx;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
}
+EXPORT_SYMBOL_GPL(match_devname_and_update_preferred_console);
bool console_suspend_enabled = true;
EXPORT_SYMBOL(console_suspend_enabled);
@@ -2329,6 +2720,18 @@ module_param_named(console_suspend, console_suspend_enabled,
MODULE_PARM_DESC(console_suspend, "suspend console during suspend"
" and hibernate operations");
+static bool printk_console_no_auto_verbose;
+
+void console_verbose(void)
+{
+ if (console_loglevel && !printk_console_no_auto_verbose)
+ console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
+}
+EXPORT_SYMBOL_GPL(console_verbose);
+
+module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool, 0644);
+MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc");
+
/**
* suspend_console - suspend the console subsystem
*
@@ -2336,21 +2739,54 @@ MODULE_PARM_DESC(console_suspend, "suspend console during suspend"
*/
void suspend_console(void)
{
+ struct console *con;
+
if (!console_suspend_enabled)
return;
pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
- console_lock();
- console_suspended = 1;
- up_console_sem();
+ pr_flush(1000, true);
+
+ console_list_lock();
+ for_each_console(con)
+ console_srcu_write_flags(con, con->flags | CON_SUSPENDED);
+ console_list_unlock();
+
+ /*
+ * Ensure that all SRCU list walks have completed. All printing
+ * contexts must be able to see that they are suspended so that it
+ * is guaranteed that all printing has stopped when this function
+ * completes.
+ */
+ synchronize_srcu(&console_srcu);
}
void resume_console(void)
{
+ struct console_flush_type ft;
+ struct console *con;
+
if (!console_suspend_enabled)
return;
- down_console_sem();
- console_suspended = 0;
- console_unlock();
+
+ console_list_lock();
+ for_each_console(con)
+ console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED);
+ console_list_unlock();
+
+ /*
+ * Ensure that all SRCU list walks have completed. All printing
+ * contexts must be able to see they are no longer suspended so
+ * that they are guaranteed to wake up and resume printing.
+ */
+ synchronize_srcu(&console_srcu);
+
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_offload)
+ nbcon_kthreads_wake();
+ if (ft.legacy_offload)
+ defer_console_output();
+
+ pr_flush(1000, true);
}
/**
@@ -2364,19 +2800,25 @@ void resume_console(void)
*/
static int console_cpu_notify(unsigned int cpu)
{
+ struct console_flush_type ft;
+
if (!cpuhp_tasks_frozen) {
- /* If trylock fails, someone else is doing the printing */
- if (console_trylock())
- console_unlock();
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_atomic)
+ nbcon_atomic_flush_pending();
+ if (ft.legacy_direct) {
+ if (console_trylock())
+ console_unlock();
+ }
}
return 0;
}
/**
- * console_lock - lock the console system for exclusive use.
+ * console_lock - block the console subsystem from printing
*
- * Acquires a lock which guarantees that the caller has
- * exclusive access to the console system and the console_drivers list.
+ * Acquires a lock which guarantees that no consoles will
+ * be in or enter their write() callback.
*
* Can sleep, returns nothing.
*/
@@ -2384,30 +2826,31 @@ void console_lock(void)
{
might_sleep();
+ /* On panic, the console_lock must be left to the panic cpu. */
+ while (other_cpu_in_panic())
+ msleep(1000);
+
down_console_sem();
- if (console_suspended)
- return;
console_locked = 1;
console_may_schedule = 1;
}
EXPORT_SYMBOL(console_lock);
/**
- * console_trylock - try to lock the console system for exclusive use.
+ * console_trylock - try to block the console subsystem from printing
*
- * Try to acquire a lock which guarantees that the caller has exclusive
- * access to the console system and the console_drivers list.
+ * Try to acquire a lock which guarantees that no consoles will
+ * be in or enter their write() callback.
*
* returns 1 on success, and 0 on failure to acquire the lock.
*/
int console_trylock(void)
{
- if (down_trylock_console_sem())
+ /* On panic, the console_lock must be left to the panic cpu. */
+ if (other_cpu_in_panic())
return 0;
- if (console_suspended) {
- up_console_sem();
+ if (down_trylock_console_sem())
return 0;
- }
console_locked = 1;
console_may_schedule = 0;
return 1;
@@ -2420,186 +2863,452 @@ int is_console_locked(void)
}
EXPORT_SYMBOL(is_console_locked);
+static void __console_unlock(void)
+{
+ console_locked = 0;
+ up_console_sem();
+}
+
+#ifdef CONFIG_PRINTK
+
/*
- * Check if we have any console that is capable of printing while cpu is
- * booting or shutting down. Requires console_sem.
+ * Prepend the message in @pmsg->pbufs->outbuf. This is achieved by shifting
+ * the existing message over and inserting the scratchbuf message.
+ *
+ * @pmsg is the original printk message.
+ * @fmt is the printf format of the message which will prepend the existing one.
+ *
+ * If there is not enough space in @pmsg->pbufs->outbuf, the existing
+ * message text will be sufficiently truncated.
+ *
+ * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated.
*/
-static int have_callable_console(void)
+__printf(2, 3)
+static void console_prepend_message(struct printk_message *pmsg, const char *fmt, ...)
{
- struct console *con;
+ struct printk_buffers *pbufs = pmsg->pbufs;
+ const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
+ const size_t outbuf_sz = sizeof(pbufs->outbuf);
+ char *scratchbuf = &pbufs->scratchbuf[0];
+ char *outbuf = &pbufs->outbuf[0];
+ va_list args;
+ size_t len;
- for_each_console(con)
- if ((con->flags & CON_ENABLED) &&
- (con->flags & CON_ANYTIME))
- return 1;
+ va_start(args, fmt);
+ len = vscnprintf(scratchbuf, scratchbuf_sz, fmt, args);
+ va_end(args);
- return 0;
+ /*
+ * Make sure outbuf is sufficiently large before prepending.
+ * Keep at least the prefix when the message must be truncated.
+ * It is a rather theoretical problem when someone tries to
+ * use a minimalist buffer.
+ */
+ if (WARN_ON_ONCE(len + PRINTK_PREFIX_MAX >= outbuf_sz))
+ return;
+
+ if (pmsg->outbuf_len + len >= outbuf_sz) {
+ /* Truncate the message, but keep it terminated. */
+ pmsg->outbuf_len = outbuf_sz - (len + 1);
+ outbuf[pmsg->outbuf_len] = 0;
+ }
+
+ memmove(outbuf + len, outbuf, pmsg->outbuf_len + 1);
+ memcpy(outbuf, scratchbuf, len);
+ pmsg->outbuf_len += len;
}
/*
- * Can we actually use the console at this time on this cpu?
+ * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message".
+ * @pmsg->outbuf_len is updated appropriately.
*
- * Console drivers may assume that per-cpu resources have been allocated. So
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * call them until this CPU is officially up.
+ * @pmsg is the printk message to prepend.
+ *
+ * @dropped is the dropped count to report in the dropped message.
*/
-static inline int can_use_console(void)
+void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
{
- return cpu_online(raw_smp_processor_id()) || have_callable_console();
+ console_prepend_message(pmsg, "** %lu printk messages dropped **\n", dropped);
}
-/**
- * console_unlock - unlock the console system
+/*
+ * Prepend the message in @pmsg->pbufs->outbuf with a "replay message".
+ * @pmsg->outbuf_len is updated appropriately.
+ *
+ * @pmsg is the printk message to prepend.
+ */
+void console_prepend_replay(struct printk_message *pmsg)
+{
+ console_prepend_message(pmsg, "** replaying previous printk message **\n");
+}
+
+/*
+ * Read and format the specified record (or a later record if the specified
+ * record is not available).
*
- * Releases the console_lock which the caller holds on the console system
- * and the console driver list.
+ * @pmsg will contain the formatted result. @pmsg->pbufs must point to a
+ * struct printk_buffers.
*
- * While the console_lock was held, console output may have been buffered
- * by printk(). If this is the case, console_unlock(); emits
- * the output prior to releasing the lock.
+ * @seq is the record to read and format. If it is not available, the next
+ * valid record is read.
*
- * If there is output waiting, we wake /dev/kmsg and syslog() users.
+ * @is_extended specifies if the message should be formatted for extended
+ * console output.
*
- * console_unlock(); may be called from any context.
+ * @may_supress specifies if records may be skipped based on loglevel.
+ *
+ * Returns false if no record is available. Otherwise true and all fields
+ * of @pmsg are valid. (See the documentation of struct printk_message
+ * for information about the @pmsg fields.)
*/
-void console_unlock(void)
+bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
+ bool is_extended, bool may_suppress)
{
- static char ext_text[CONSOLE_EXT_LOG_MAX];
- static char text[LOG_LINE_MAX + PREFIX_MAX];
- unsigned long flags;
- bool do_cond_resched, retry;
+ struct printk_buffers *pbufs = pmsg->pbufs;
+ const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
+ const size_t outbuf_sz = sizeof(pbufs->outbuf);
+ char *scratchbuf = &pbufs->scratchbuf[0];
+ char *outbuf = &pbufs->outbuf[0];
struct printk_info info;
struct printk_record r;
-
- if (console_suspended) {
- up_console_sem();
- return;
- }
-
- prb_rec_init_rd(&r, &info, text, sizeof(text));
+ size_t len = 0;
+ bool force_con;
/*
- * Console drivers are called with interrupts disabled, so
- * @console_may_schedule should be cleared before; however, we may
- * end up dumping a lot of lines, for example, if called from
- * console registration path, and should invoke cond_resched()
- * between lines if allowable. Not doing so can cause a very long
- * scheduling stall on a slow console leading to RCU stall and
- * softlockup warnings which exacerbate the issue with more
- * messages practically incapacitating the system.
+ * Formatting extended messages requires a separate buffer, so use the
+ * scratch buffer to read in the ringbuffer text.
*
- * console_trylock() is not able to detect the preemptive
- * context reliably. Therefore the value must be stored before
- * and cleared after the "again" goto label.
+ * Formatting normal messages is done in-place, so read the ringbuffer
+ * text directly into the output buffer.
*/
- do_cond_resched = console_may_schedule;
-again:
- console_may_schedule = 0;
+ if (is_extended)
+ prb_rec_init_rd(&r, &info, scratchbuf, scratchbuf_sz);
+ else
+ prb_rec_init_rd(&r, &info, outbuf, outbuf_sz);
+
+ if (!prb_read_valid(prb, seq, &r))
+ return false;
+
+ pmsg->seq = r.info->seq;
+ pmsg->dropped = r.info->seq - seq;
+ force_con = r.info->flags & LOG_FORCE_CON;
/*
- * We released the console_sem lock, so we need to recheck if
- * cpu is online and (if not) is there at least one CON_ANYTIME
- * console.
+ * Skip records that are not forced to be printed on consoles and that
+ * has level above the console loglevel.
*/
- if (!can_use_console()) {
- console_locked = 0;
- up_console_sem();
- return;
+ if (!force_con && may_suppress && suppress_message_printing(r.info->level))
+ goto out;
+
+ if (is_extended) {
+ len = info_print_ext_header(outbuf, outbuf_sz, r.info);
+ len += msg_print_ext_body(outbuf + len, outbuf_sz - len,
+ &r.text_buf[0], r.info->text_len, &r.info->dev_info);
+ } else {
+ len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
}
+out:
+ pmsg->outbuf_len = len;
+ return true;
+}
- for (;;) {
- size_t ext_len = 0;
- size_t len;
+/*
+ * Legacy console printing from printk() caller context does not respect
+ * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a
+ * false positive. For PREEMPT_RT the false positive condition does not
+ * occur.
+ *
+ * This map is used to temporarily establish LD_WAIT_SLEEP context for the
+ * console write() callback when legacy printing to avoid false positive
+ * lockdep complaints, thus allowing lockdep to continue to function for
+ * real issues.
+ */
+#ifdef CONFIG_PREEMPT_RT
+static inline void printk_legacy_allow_spinlock_enter(void) { }
+static inline void printk_legacy_allow_spinlock_exit(void) { }
+#else
+static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP);
- printk_safe_enter_irqsave(flags);
- raw_spin_lock(&logbuf_lock);
-skip:
- if (!prb_read_valid(prb, console_seq, &r))
- break;
+static inline void printk_legacy_allow_spinlock_enter(void)
+{
+ lock_map_acquire_try(&printk_legacy_map);
+}
- if (console_seq != r.info->seq) {
- console_dropped += r.info->seq - console_seq;
- console_seq = r.info->seq;
- }
+static inline void printk_legacy_allow_spinlock_exit(void)
+{
+ lock_map_release(&printk_legacy_map);
+}
+#endif /* CONFIG_PREEMPT_RT */
- if (suppress_message_printing(r.info->level)) {
- /*
- * Skip record we have buffered and already printed
- * directly to the console when we received it, and
- * record that has level above the console loglevel.
- */
- console_seq++;
- goto skip;
- }
+/*
+ * Used as the printk buffers for non-panic, serialized console printing.
+ * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles.
+ * Its usage requires the console_lock held.
+ */
+struct printk_buffers printk_shared_pbufs;
- /* Output to all consoles once old messages replayed. */
- if (unlikely(exclusive_console &&
- console_seq >= exclusive_console_stop_seq)) {
- exclusive_console = NULL;
- }
+/*
+ * Print one record for the given console. The record printed is whatever
+ * record is the next available record for the given console.
+ *
+ * @handover will be set to true if a printk waiter has taken over the
+ * console_lock, in which case the caller is no longer holding both the
+ * console_lock and the SRCU read lock. Otherwise it is set to false.
+ *
+ * @cookie is the cookie from the SRCU read lock.
+ *
+ * Returns false if the given console has no next record to print, otherwise
+ * true.
+ *
+ * Requires the console_lock and the SRCU read lock.
+ */
+static bool console_emit_next_record(struct console *con, bool *handover, int cookie)
+{
+ bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED;
+ char *outbuf = &printk_shared_pbufs.outbuf[0];
+ struct printk_message pmsg = {
+ .pbufs = &printk_shared_pbufs,
+ };
+ unsigned long flags;
+
+ *handover = false;
+
+ if (!printk_get_next_message(&pmsg, con->seq, is_extended, true))
+ return false;
+
+ con->dropped += pmsg.dropped;
+
+ /* Skip messages of formatted length 0. */
+ if (pmsg.outbuf_len == 0) {
+ con->seq = pmsg.seq + 1;
+ goto skip;
+ }
+
+ if (con->dropped && !is_extended) {
+ console_prepend_dropped(&pmsg, con->dropped);
+ con->dropped = 0;
+ }
+
+ /* Write everything out to the hardware. */
+ if (force_legacy_kthread() && !panic_in_progress()) {
/*
- * Handle extended console text first because later
- * record_print_text() will modify the record buffer in-place.
+ * With forced threading this function is in a task context
+ * (either legacy kthread or get_init_console_seq()). There
+ * is no need for concern about printk reentrance, handovers,
+ * or lockdep complaints.
*/
- if (nr_ext_console_drivers) {
- ext_len = info_print_ext_header(ext_text,
- sizeof(ext_text),
- r.info);
- ext_len += msg_print_ext_body(ext_text + ext_len,
- sizeof(ext_text) - ext_len,
- &r.text_buf[0],
- r.info->text_len,
- &r.info->dev_info);
- }
- len = record_print_text(&r,
- console_msg_format & MSG_FORMAT_SYSLOG,
- printk_time);
- console_seq++;
- raw_spin_unlock(&logbuf_lock);
+ con->write(con, outbuf, pmsg.outbuf_len);
+ con->seq = pmsg.seq + 1;
+ } else {
/*
* While actively printing out messages, if another printk()
* were to occur on another CPU, it may wait for this one to
* finish. This task can not be preempted if there is a
* waiter waiting to take over.
+ *
+ * Interrupts are disabled because the hand over to a waiter
+ * must not be interrupted until the hand over is completed
+ * (@console_waiter is cleared).
*/
+ printk_safe_enter_irqsave(flags);
console_lock_spinning_enable();
- stop_critical_timings(); /* don't trace print latency */
- call_console_drivers(ext_text, ext_len, text, len);
+ /* Do not trace print latency. */
+ stop_critical_timings();
+
+ printk_legacy_allow_spinlock_enter();
+ con->write(con, outbuf, pmsg.outbuf_len);
+ printk_legacy_allow_spinlock_exit();
+
start_critical_timings();
- if (console_lock_spinning_disable_and_check()) {
- printk_safe_exit_irqrestore(flags);
- return;
- }
+ con->seq = pmsg.seq + 1;
+ *handover = console_lock_spinning_disable_and_check(cookie);
printk_safe_exit_irqrestore(flags);
-
- if (do_cond_resched)
- cond_resched();
}
+skip:
+ return true;
+}
- console_locked = 0;
+#else
- raw_spin_unlock(&logbuf_lock);
+static bool console_emit_next_record(struct console *con, bool *handover, int cookie)
+{
+ *handover = false;
+ return false;
+}
- up_console_sem();
+static inline void printk_kthreads_check_locked(void) { }
+
+#endif /* CONFIG_PRINTK */
+
+/*
+ * Print out all remaining records to all consoles.
+ *
+ * @do_cond_resched is set by the caller. It can be true only in schedulable
+ * context.
+ *
+ * @next_seq is set to the sequence number after the last available record.
+ * The value is valid only when this function returns true. It means that all
+ * usable consoles are completely flushed.
+ *
+ * @handover will be set to true if a printk waiter has taken over the
+ * console_lock, in which case the caller is no longer holding the
+ * console_lock. Otherwise it is set to false.
+ *
+ * Returns true when there was at least one usable console and all messages
+ * were flushed to all usable consoles. A returned false informs the caller
+ * that everything was not flushed (either there were no usable consoles or
+ * another context has taken over printing or it is a panic situation and this
+ * is not the panic CPU). Regardless the reason, the caller should assume it
+ * is not useful to immediately try again.
+ *
+ * Requires the console_lock.
+ */
+static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
+{
+ struct console_flush_type ft;
+ bool any_usable = false;
+ struct console *con;
+ bool any_progress;
+ int cookie;
+
+ *next_seq = 0;
+ *handover = false;
+
+ do {
+ any_progress = false;
+
+ printk_get_console_flush_type(&ft);
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
+ short flags = console_srcu_read_flags(con);
+ u64 printk_seq;
+ bool progress;
+
+ /*
+ * console_flush_all() is only responsible for nbcon
+ * consoles when the nbcon consoles cannot print via
+ * their atomic or threaded flushing.
+ */
+ if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
+ continue;
+
+ if (!console_is_usable(con, flags, !do_cond_resched))
+ continue;
+ any_usable = true;
+
+ if (flags & CON_NBCON) {
+ progress = nbcon_legacy_emit_next_record(con, handover, cookie,
+ !do_cond_resched);
+ printk_seq = nbcon_seq_read(con);
+ } else {
+ progress = console_emit_next_record(con, handover, cookie);
+ printk_seq = con->seq;
+ }
+
+ /*
+ * If a handover has occurred, the SRCU read lock
+ * is already released.
+ */
+ if (*handover)
+ return false;
+
+ /* Track the next of the highest seq flushed. */
+ if (printk_seq > *next_seq)
+ *next_seq = printk_seq;
+
+ if (!progress)
+ continue;
+ any_progress = true;
+
+ /* Allow panic_cpu to take over the consoles safely. */
+ if (other_cpu_in_panic())
+ goto abandon;
+
+ if (do_cond_resched)
+ cond_resched();
+ }
+ console_srcu_read_unlock(cookie);
+ } while (any_progress);
+
+ return any_usable;
+
+abandon:
+ console_srcu_read_unlock(cookie);
+ return false;
+}
+
+static void __console_flush_and_unlock(void)
+{
+ bool do_cond_resched;
+ bool handover;
+ bool flushed;
+ u64 next_seq;
/*
- * Someone could have filled up the buffer again, so re-check if there's
- * something to flush. In case we cannot trylock the console_sem again,
- * there's a new owner and the console_unlock() from them will do the
- * flush, no worries.
+ * Console drivers are called with interrupts disabled, so
+ * @console_may_schedule should be cleared before; however, we may
+ * end up dumping a lot of lines, for example, if called from
+ * console registration path, and should invoke cond_resched()
+ * between lines if allowable. Not doing so can cause a very long
+ * scheduling stall on a slow console leading to RCU stall and
+ * softlockup warnings which exacerbate the issue with more
+ * messages practically incapacitating the system. Therefore, create
+ * a local to use for the printing loop.
*/
- raw_spin_lock(&logbuf_lock);
- retry = prb_read_valid(prb, console_seq, NULL);
- raw_spin_unlock(&logbuf_lock);
- printk_safe_exit_irqrestore(flags);
+ do_cond_resched = console_may_schedule;
+
+ do {
+ console_may_schedule = 0;
+
+ flushed = console_flush_all(do_cond_resched, &next_seq, &handover);
+ if (!handover)
+ __console_unlock();
+
+ /*
+ * Abort if there was a failure to flush all messages to all
+ * usable consoles. Either it is not possible to flush (in
+ * which case it would be an infinite loop of retrying) or
+ * another context has taken over printing.
+ */
+ if (!flushed)
+ break;
- if (retry && console_trylock())
- goto again;
+ /*
+ * Some context may have added new records after
+ * console_flush_all() but before unlocking the console.
+ * Re-check if there is a new record to flush. If the trylock
+ * fails, another context is already handling the printing.
+ */
+ } while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
+}
+
+/**
+ * console_unlock - unblock the legacy console subsystem from printing
+ *
+ * Releases the console_lock which the caller holds to block printing of
+ * the legacy console subsystem.
+ *
+ * While the console_lock was held, console output may have been buffered
+ * by printk(). If this is the case, console_unlock() emits the output on
+ * legacy consoles prior to releasing the lock.
+ *
+ * console_unlock(); may be called from any context.
+ */
+void console_unlock(void)
+{
+ struct console_flush_type ft;
+
+ printk_get_console_flush_type(&ft);
+ if (ft.legacy_direct)
+ __console_flush_and_unlock();
+ else
+ __console_unlock();
}
EXPORT_SYMBOL(console_unlock);
@@ -2621,13 +3330,45 @@ EXPORT_SYMBOL(console_conditional_schedule);
void console_unblank(void)
{
+ bool found_unblank = false;
struct console *c;
+ int cookie;
+
+ /*
+ * First check if there are any consoles implementing the unblank()
+ * callback. If not, there is no reason to continue and take the
+ * console lock, which in particular can be dangerous if
+ * @oops_in_progress is set.
+ */
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
+ if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) {
+ found_unblank = true;
+ break;
+ }
+ }
+ console_srcu_read_unlock(cookie);
+ if (!found_unblank)
+ return;
/*
- * console_unblank can no longer be called in interrupt context unless
- * oops_in_progress is set to 1..
+ * Stop console printing because the unblank() callback may
+ * assume the console is not within its write() callback.
+ *
+ * If @oops_in_progress is set, this may be an atomic context.
+ * In that case, attempt a trylock as best-effort.
*/
if (oops_in_progress) {
+ /* Semaphores are not NMI-safe. */
+ if (in_nmi())
+ return;
+
+ /*
+ * Attempting to trylock the console lock can deadlock
+ * if another CPU was stopped while modifying the
+ * semaphore. "Hope and pray" that this is not the
+ * current situation.
+ */
if (down_trylock_console_sem() != 0)
return;
} else
@@ -2635,10 +3376,52 @@ void console_unblank(void)
console_locked = 1;
console_may_schedule = 0;
- for_each_console(c)
- if ((c->flags & CON_ENABLED) && c->unblank)
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
+ if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank)
c->unblank();
+ }
+ console_srcu_read_unlock(cookie);
+
console_unlock();
+
+ if (!oops_in_progress)
+ pr_flush(1000, true);
+}
+
+/*
+ * Rewind all consoles to the oldest available record.
+ *
+ * IMPORTANT: The function is safe only when called under
+ * console_lock(). It is not enforced because
+ * it is used as a best effort in panic().
+ */
+static void __console_rewind_all(void)
+{
+ struct console *c;
+ short flags;
+ int cookie;
+ u64 seq;
+
+ seq = prb_first_valid_seq(prb);
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
+ flags = console_srcu_read_flags(c);
+
+ if (flags & CON_NBCON) {
+ nbcon_seq_force(c, seq);
+ } else {
+ /*
+ * This assignment is safe only when called under
+ * console_lock(). On panic, legacy consoles are
+ * only best effort.
+ */
+ c->seq = seq;
+ }
+ }
+ console_srcu_read_unlock(cookie);
}
/**
@@ -2649,24 +3432,37 @@ void console_unblank(void)
*/
void console_flush_on_panic(enum con_flush_mode mode)
{
+ struct console_flush_type ft;
+ bool handover;
+ u64 next_seq;
+
+ /*
+ * Ignore the console lock and flush out the messages. Attempting a
+ * trylock would not be useful because:
+ *
+ * - if it is contended, it must be ignored anyway
+ * - console_lock() and console_trylock() block and fail
+ * respectively in panic for non-panic CPUs
+ * - semaphores are not NMI-safe
+ */
+
/*
- * If someone else is holding the console lock, trylock will fail
- * and may_schedule may be set. Ignore and proceed to unlock so
- * that messages are flushed out. As this can be called from any
- * context and we don't want to get preempted while flushing,
- * ensure may_schedule is cleared.
+ * If another context is holding the console lock,
+ * @console_may_schedule might be set. Clear it so that
+ * this context does not call cond_resched() while flushing.
*/
- console_trylock();
console_may_schedule = 0;
- if (mode == CONSOLE_REPLAY_ALL) {
- unsigned long flags;
+ if (mode == CONSOLE_REPLAY_ALL)
+ __console_rewind_all();
- logbuf_lock_irqsave(flags);
- console_seq = prb_first_valid_seq(prb);
- logbuf_unlock_irqrestore(flags);
- }
- console_unlock();
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_atomic)
+ nbcon_atomic_flush_pending();
+
+ /* Flush legacy consoles once allowed, even when dangerous. */
+ if (legacy_allow_panic_sync)
+ console_flush_all(false, &next_seq, &handover);
}
/*
@@ -2676,15 +3472,25 @@ struct tty_driver *console_device(int *index)
{
struct console *c;
struct tty_driver *driver = NULL;
+ int cookie;
+ /*
+ * Take console_lock to serialize device() callback with
+ * other console operations. For example, fg_console is
+ * modified under console_lock when switching vt.
+ */
console_lock();
- for_each_console(c) {
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
if (!c->device)
continue;
driver = c->device(c, index);
if (driver)
break;
}
+ console_srcu_read_unlock(cookie);
+
console_unlock();
return driver;
}
@@ -2696,20 +3502,253 @@ struct tty_driver *console_device(int *index)
*/
void console_stop(struct console *console)
{
- console_lock();
- console->flags &= ~CON_ENABLED;
- console_unlock();
+ __pr_flush(console, 1000, true);
+ console_list_lock();
+ console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
+ console_list_unlock();
+
+ /*
+ * Ensure that all SRCU list walks have completed. All contexts must
+ * be able to see that this console is disabled so that (for example)
+ * the caller can suspend the port without risk of another context
+ * using the port.
+ */
+ synchronize_srcu(&console_srcu);
}
EXPORT_SYMBOL(console_stop);
void console_start(struct console *console)
{
- console_lock();
- console->flags |= CON_ENABLED;
- console_unlock();
+ struct console_flush_type ft;
+ bool is_nbcon;
+
+ console_list_lock();
+ console_srcu_write_flags(console, console->flags | CON_ENABLED);
+ is_nbcon = console->flags & CON_NBCON;
+ console_list_unlock();
+
+ /*
+ * Ensure that all SRCU list walks have completed. The related
+ * printing context must be able to see it is enabled so that
+ * it is guaranteed to wake up and resume printing.
+ */
+ synchronize_srcu(&console_srcu);
+
+ printk_get_console_flush_type(&ft);
+ if (is_nbcon && ft.nbcon_offload)
+ nbcon_kthread_wake(console);
+ else if (ft.legacy_offload)
+ defer_console_output();
+
+ __pr_flush(console, 1000, true);
}
EXPORT_SYMBOL(console_start);
+#ifdef CONFIG_PRINTK
+static int unregister_console_locked(struct console *console);
+
+/* True when system boot is far enough to create printer threads. */
+static bool printk_kthreads_ready __ro_after_init;
+
+static struct task_struct *printk_legacy_kthread;
+
+static bool legacy_kthread_should_wakeup(void)
+{
+ struct console_flush_type ft;
+ struct console *con;
+ bool ret = false;
+ int cookie;
+
+ if (kthread_should_stop())
+ return true;
+
+ printk_get_console_flush_type(&ft);
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
+ short flags = console_srcu_read_flags(con);
+ u64 printk_seq;
+
+ /*
+ * The legacy printer thread is only responsible for nbcon
+ * consoles when the nbcon consoles cannot print via their
+ * atomic or threaded flushing.
+ */
+ if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
+ continue;
+
+ if (!console_is_usable(con, flags, false))
+ continue;
+
+ if (flags & CON_NBCON) {
+ printk_seq = nbcon_seq_read(con);
+ } else {
+ /*
+ * It is safe to read @seq because only this
+ * thread context updates @seq.
+ */
+ printk_seq = con->seq;
+ }
+
+ if (prb_read_valid(prb, printk_seq, NULL)) {
+ ret = true;
+ break;
+ }
+ }
+ console_srcu_read_unlock(cookie);
+
+ return ret;
+}
+
+static int legacy_kthread_func(void *unused)
+{
+ for (;;) {
+ wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup());
+
+ if (kthread_should_stop())
+ break;
+
+ console_lock();
+ __console_flush_and_unlock();
+ }
+
+ return 0;
+}
+
+static bool legacy_kthread_create(void)
+{
+ struct task_struct *kt;
+
+ lockdep_assert_console_list_lock_held();
+
+ kt = kthread_run(legacy_kthread_func, NULL, "pr/legacy");
+ if (WARN_ON(IS_ERR(kt))) {
+ pr_err("failed to start legacy printing thread\n");
+ return false;
+ }
+
+ printk_legacy_kthread = kt;
+
+ /*
+ * It is important that console printing threads are scheduled
+ * shortly after a printk call and with generous runtime budgets.
+ */
+ sched_set_normal(printk_legacy_kthread, -20);
+
+ return true;
+}
+
+/**
+ * printk_kthreads_shutdown - shutdown all threaded printers
+ *
+ * On system shutdown all threaded printers are stopped. This allows printk
+ * to transition back to atomic printing, thus providing a robust mechanism
+ * for the final shutdown/reboot messages to be output.
+ */
+static void printk_kthreads_shutdown(void)
+{
+ struct console *con;
+
+ console_list_lock();
+ if (printk_kthreads_running) {
+ printk_kthreads_running = false;
+
+ for_each_console(con) {
+ if (con->flags & CON_NBCON)
+ nbcon_kthread_stop(con);
+ }
+
+ /*
+ * The threads may have been stopped while printing a
+ * backlog. Flush any records left over.
+ */
+ nbcon_atomic_flush_pending();
+ }
+ console_list_unlock();
+}
+
+static struct syscore_ops printk_syscore_ops = {
+ .shutdown = printk_kthreads_shutdown,
+};
+
+/*
+ * If appropriate, start nbcon kthreads and set @printk_kthreads_running.
+ * If any kthreads fail to start, those consoles are unregistered.
+ *
+ * Must be called under console_list_lock().
+ */
+static void printk_kthreads_check_locked(void)
+{
+ struct hlist_node *tmp;
+ struct console *con;
+
+ lockdep_assert_console_list_lock_held();
+
+ if (!printk_kthreads_ready)
+ return;
+
+ if (have_legacy_console || have_boot_console) {
+ if (!printk_legacy_kthread &&
+ force_legacy_kthread() &&
+ !legacy_kthread_create()) {
+ /*
+ * All legacy consoles must be unregistered. If there
+ * are any nbcon consoles, they will set up their own
+ * kthread.
+ */
+ hlist_for_each_entry_safe(con, tmp, &console_list, node) {
+ if (con->flags & CON_NBCON)
+ continue;
+
+ unregister_console_locked(con);
+ }
+ }
+ } else if (printk_legacy_kthread) {
+ kthread_stop(printk_legacy_kthread);
+ printk_legacy_kthread = NULL;
+ }
+
+ /*
+ * Printer threads cannot be started as long as any boot console is
+ * registered because there is no way to synchronize the hardware
+ * registers between boot console code and regular console code.
+ * It can only be known that there will be no new boot consoles when
+ * an nbcon console is registered.
+ */
+ if (have_boot_console || !have_nbcon_console) {
+ /* Clear flag in case all nbcon consoles unregistered. */
+ printk_kthreads_running = false;
+ return;
+ }
+
+ if (printk_kthreads_running)
+ return;
+
+ hlist_for_each_entry_safe(con, tmp, &console_list, node) {
+ if (!(con->flags & CON_NBCON))
+ continue;
+
+ if (!nbcon_kthread_create(con))
+ unregister_console_locked(con);
+ }
+
+ printk_kthreads_running = true;
+}
+
+static int __init printk_set_kthreads_ready(void)
+{
+ register_syscore_ops(&printk_syscore_ops);
+
+ console_list_lock();
+ printk_kthreads_ready = true;
+ printk_kthreads_check_locked();
+ console_list_unlock();
+
+ return 0;
+}
+early_initcall(printk_set_kthreads_ready);
+#endif /* CONFIG_PRINTK */
+
static int __read_mostly keep_bootcon;
static int __init keep_bootcon_setup(char *str)
@@ -2722,6 +3761,21 @@ static int __init keep_bootcon_setup(char *str)
early_param("keep_bootcon", keep_bootcon_setup);
+static int console_call_setup(struct console *newcon, char *options)
+{
+ int err;
+
+ if (!newcon->setup)
+ return 0;
+
+ /* Synchronize with possible boot console. */
+ console_lock();
+ err = newcon->setup(newcon, options);
+ console_unlock();
+
+ return err;
+}
+
/*
* This is called by register_console() to try to match
* the newly registered console with any of the ones selected
@@ -2731,14 +3785,18 @@ early_param("keep_bootcon", keep_bootcon_setup);
* Care need to be taken with consoles that are statically
* enabled such as netconsole
*/
-static int try_enable_new_console(struct console *newcon, bool user_specified)
+static int try_enable_preferred_console(struct console *newcon,
+ bool user_specified)
{
struct console_cmdline *c;
int i, err;
for (i = 0, c = console_cmdline;
- i < MAX_CMDLINECONSOLES && c->name[0];
+ i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]);
i++, c++) {
+ /* Console not yet initialized? */
+ if (!c->name[0])
+ continue;
if (c->user_specified != user_specified)
continue;
if (!newcon->match ||
@@ -2756,15 +3814,13 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
if (_braille_register_console(newcon, c))
return 0;
- if (newcon->setup &&
- (err = newcon->setup(newcon, c->options)) != 0)
+ err = console_call_setup(newcon, c->options);
+ if (err)
return err;
}
newcon->flags |= CON_ENABLED;
- if (i == preferred_console) {
+ if (i == preferred_console)
newcon->flags |= CON_CONSDEV;
- has_preferred_console = true;
- }
return 0;
}
@@ -2779,6 +3835,100 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
return -ENOENT;
}
+/* Try to enable the console unconditionally */
+static void try_enable_default_console(struct console *newcon)
+{
+ if (newcon->index < 0)
+ newcon->index = 0;
+
+ if (console_call_setup(newcon, NULL) != 0)
+ return;
+
+ newcon->flags |= CON_ENABLED;
+
+ if (newcon->device)
+ newcon->flags |= CON_CONSDEV;
+}
+
+/* Return the starting sequence number for a newly registered console. */
+static u64 get_init_console_seq(struct console *newcon, bool bootcon_registered)
+{
+ struct console *con;
+ bool handover;
+ u64 init_seq;
+
+ if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) {
+ /* Get a consistent copy of @syslog_seq. */
+ mutex_lock(&syslog_lock);
+ init_seq = syslog_seq;
+ mutex_unlock(&syslog_lock);
+ } else {
+ /* Begin with next message added to ringbuffer. */
+ init_seq = prb_next_seq(prb);
+
+ /*
+ * If any enabled boot consoles are due to be unregistered
+ * shortly, some may not be caught up and may be the same
+ * device as @newcon. Since it is not known which boot console
+ * is the same device, flush all consoles and, if necessary,
+ * start with the message of the enabled boot console that is
+ * the furthest behind.
+ */
+ if (bootcon_registered && !keep_bootcon) {
+ /*
+ * Hold the console_lock to stop console printing and
+ * guarantee safe access to console->seq.
+ */
+ console_lock();
+
+ /*
+ * Flush all consoles and set the console to start at
+ * the next unprinted sequence number.
+ */
+ if (!console_flush_all(true, &init_seq, &handover)) {
+ /*
+ * Flushing failed. Just choose the lowest
+ * sequence of the enabled boot consoles.
+ */
+
+ /*
+ * If there was a handover, this context no
+ * longer holds the console_lock.
+ */
+ if (handover)
+ console_lock();
+
+ init_seq = prb_next_seq(prb);
+ for_each_console(con) {
+ u64 seq;
+
+ if (!(con->flags & CON_BOOT) ||
+ !(con->flags & CON_ENABLED)) {
+ continue;
+ }
+
+ if (con->flags & CON_NBCON)
+ seq = nbcon_seq_read(con);
+ else
+ seq = con->seq;
+
+ if (seq < init_seq)
+ init_seq = seq;
+ }
+ }
+
+ console_unlock();
+ }
+ }
+
+ return init_seq;
+}
+
+#define console_first() \
+ hlist_entry(console_list.first, struct console, node)
+
+static int unregister_console_locked(struct console *console);
+
/*
* The console driver calls this routine during kernel initialization
* to register the console printing procedure with printk() and to
@@ -2800,64 +3950,76 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
*/
void register_console(struct console *newcon)
{
+ bool use_device_lock = (newcon->flags & CON_NBCON) && newcon->write_atomic;
+ bool bootcon_registered = false;
+ bool realcon_registered = false;
+ struct console *con;
unsigned long flags;
- struct console *bcon = NULL;
+ u64 init_seq;
int err;
- for_each_console(bcon) {
- if (WARN(bcon == newcon, "console '%s%d' already registered\n",
- bcon->name, bcon->index))
- return;
- }
+ console_list_lock();
- /*
- * before we register a new CON_BOOT console, make sure we don't
- * already have a valid console
- */
- if (newcon->flags & CON_BOOT) {
- for_each_console(bcon) {
- if (!(bcon->flags & CON_BOOT)) {
- pr_info("Too late to register bootconsole %s%d\n",
- newcon->name, newcon->index);
- return;
- }
+ for_each_console(con) {
+ if (WARN(con == newcon, "console '%s%d' already registered\n",
+ con->name, con->index)) {
+ goto unlock;
}
+
+ if (con->flags & CON_BOOT)
+ bootcon_registered = true;
+ else
+ realcon_registered = true;
}
- if (console_drivers && console_drivers->flags & CON_BOOT)
- bcon = console_drivers;
+ /* Do not register boot consoles when there already is a real one. */
+ if ((newcon->flags & CON_BOOT) && realcon_registered) {
+ pr_info("Too late to register bootconsole %s%d\n",
+ newcon->name, newcon->index);
+ goto unlock;
+ }
- if (!has_preferred_console || bcon || !console_drivers)
- has_preferred_console = preferred_console >= 0;
+ if (newcon->flags & CON_NBCON) {
+ /*
+ * Ensure the nbcon console buffers can be allocated
+ * before modifying any global data.
+ */
+ if (!nbcon_alloc(newcon))
+ goto unlock;
+ }
/*
- * See if we want to use this console driver. If we
- * didn't select a console we take the first one
- * that registers here.
+ * See if we want to enable this console driver by default.
+ *
+ * Nope when a console is preferred by the command line, device
+ * tree, or SPCR.
+ *
+ * The first real console with tty binding (driver) wins. More
+ * consoles might get enabled before the right one is found.
+ *
+ * Note that a console with tty binding will have CON_CONSDEV
+ * flag set and will be first in the list.
*/
- if (!has_preferred_console) {
- if (newcon->index < 0)
- newcon->index = 0;
- if (newcon->setup == NULL ||
- newcon->setup(newcon, NULL) == 0) {
- newcon->flags |= CON_ENABLED;
- if (newcon->device) {
- newcon->flags |= CON_CONSDEV;
- has_preferred_console = true;
- }
+ if (preferred_console < 0) {
+ if (hlist_empty(&console_list) || !console_first()->device ||
+ console_first()->flags & CON_BOOT) {
+ try_enable_default_console(newcon);
}
}
/* See if this console matches one we selected on the command line */
- err = try_enable_new_console(newcon, true);
+ err = try_enable_preferred_console(newcon, true);
/* If not, try to match against the platform default(s) */
if (err == -ENOENT)
- err = try_enable_new_console(newcon, false);
+ err = try_enable_preferred_console(newcon, false);
/* printk() messages are not printed to the Braille console. */
- if (err || newcon->flags & CON_BRL)
- return;
+ if (err || newcon->flags & CON_BRL) {
+ if (newcon->flags & CON_NBCON)
+ nbcon_free(newcon);
+ goto unlock;
+ }
/*
* If we have a bootconsole, and are switching to a real console,
@@ -2865,50 +4027,66 @@ void register_console(struct console *newcon)
* the real console are the same physical device, it's annoying to
* see the beginning boot messages twice
*/
- if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV))
+ if (bootcon_registered &&
+ ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
newcon->flags &= ~CON_PRINTBUFFER;
+ }
+
+ newcon->dropped = 0;
+ init_seq = get_init_console_seq(newcon, bootcon_registered);
+
+ if (newcon->flags & CON_NBCON) {
+ have_nbcon_console = true;
+ nbcon_seq_force(newcon, init_seq);
+ } else {
+ have_legacy_console = true;
+ newcon->seq = init_seq;
+ }
+
+ if (newcon->flags & CON_BOOT)
+ have_boot_console = true;
/*
- * Put this console in the list - keep the
- * preferred driver at the head of the list.
+ * If another context is actively using the hardware of this new
+ * console, it will not be aware of the nbcon synchronization. This
+ * is a risk that two contexts could access the hardware
+ * simultaneously if this new console is used for atomic printing
+ * and the other context is still using the hardware.
+ *
+ * Use the driver synchronization to ensure that the hardware is not
+ * in use while this new console transitions to being registered.
*/
- console_lock();
- if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) {
- newcon->next = console_drivers;
- console_drivers = newcon;
- if (newcon->next)
- newcon->next->flags &= ~CON_CONSDEV;
- /* Ensure this flag is always set for the head of the list */
+ if (use_device_lock)
+ newcon->device_lock(newcon, &flags);
+
+ /*
+ * Put this console in the list - keep the
+ * preferred driver at the head of the list.
+ */
+ if (hlist_empty(&console_list)) {
+ /* Ensure CON_CONSDEV is always set for the head. */
newcon->flags |= CON_CONSDEV;
+ hlist_add_head_rcu(&newcon->node, &console_list);
+
+ } else if (newcon->flags & CON_CONSDEV) {
+ /* Only the new head can have CON_CONSDEV set. */
+ console_srcu_write_flags(console_first(), console_first()->flags & ~CON_CONSDEV);
+ hlist_add_head_rcu(&newcon->node, &console_list);
+
} else {
- newcon->next = console_drivers->next;
- console_drivers->next = newcon;
+ hlist_add_behind_rcu(&newcon->node, console_list.first);
}
- if (newcon->flags & CON_EXTENDED)
- nr_ext_console_drivers++;
+ /*
+ * No need to synchronize SRCU here! The caller does not rely
+ * on all contexts being able to see the new console before
+ * register_console() completes.
+ */
+
+ /* This new console is now registered. */
+ if (use_device_lock)
+ newcon->device_unlock(newcon, flags);
- if (newcon->flags & CON_PRINTBUFFER) {
- /*
- * console_unlock(); will print out the buffered messages
- * for us.
- */
- logbuf_lock_irqsave(flags);
- /*
- * We're about to replay the log buffer. Only do this to the
- * just-registered console to avoid excessive message spam to
- * the already-registered consoles.
- *
- * Set exclusive_console with disabled interrupts to reduce
- * race window with eventual console_flush_on_panic() that
- * ignores console_lock.
- */
- exclusive_console = newcon;
- exclusive_console_stop_seq = console_seq;
- console_seq = syslog_seq;
- logbuf_unlock_irqrestore(flags);
- }
- console_unlock();
console_sysfs_notify();
/*
@@ -2918,30 +4096,39 @@ void register_console(struct console *newcon)
* users know there might be something in the kernel's log buffer that
* went to the bootconsole (that they do not see on the real console)
*/
- pr_info("%sconsole [%s%d] enabled\n",
- (newcon->flags & CON_BOOT) ? "boot" : "" ,
- newcon->name, newcon->index);
- if (bcon &&
+ con_printk(KERN_INFO, newcon, "enabled\n");
+ if (bootcon_registered &&
((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
!keep_bootcon) {
- /* We need to iterate through all boot consoles, to make
- * sure we print everything out, before we unregister them.
- */
- for_each_console(bcon)
- if (bcon->flags & CON_BOOT)
- unregister_console(bcon);
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(con, tmp, &console_list, node) {
+ if (con->flags & CON_BOOT)
+ unregister_console_locked(con);
+ }
}
+
+ /* Changed console list, may require printer threads to start/stop. */
+ printk_kthreads_check_locked();
+unlock:
+ console_list_unlock();
}
EXPORT_SYMBOL(register_console);
-int unregister_console(struct console *console)
+/* Must be called under console_list_lock(). */
+static int unregister_console_locked(struct console *console)
{
- struct console *con;
+ bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic;
+ bool found_legacy_con = false;
+ bool found_nbcon_con = false;
+ bool found_boot_con = false;
+ unsigned long flags;
+ struct console *c;
int res;
- pr_info("%sconsole [%s%d] disabled\n",
- (console->flags & CON_BOOT) ? "boot" : "" ,
- console->name, console->index);
+ lockdep_assert_console_list_lock_held();
+
+ con_printk(KERN_INFO, console, "disabled\n");
res = _braille_unregister_console(console);
if (res < 0)
@@ -2949,51 +4136,135 @@ int unregister_console(struct console *console)
if (res > 0)
return 0;
- res = -ENODEV;
- console_lock();
- if (console_drivers == console) {
- console_drivers=console->next;
- res = 0;
- } else {
- for_each_console(con) {
- if (con->next == console) {
- con->next = console->next;
- res = 0;
- break;
- }
- }
- }
+ if (!console_is_registered_locked(console))
+ res = -ENODEV;
+ else if (console_is_usable(console, console->flags, true))
+ __pr_flush(console, 1000, true);
+
+ /* Disable it unconditionally */
+ console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
+
+ if (res < 0)
+ return res;
+
+ /*
+ * Use the driver synchronization to ensure that the hardware is not
+ * in use while this console transitions to being unregistered.
+ */
+ if (use_device_lock)
+ console->device_lock(console, &flags);
- if (res)
- goto out_disable_unlock;
+ hlist_del_init_rcu(&console->node);
- if (console->flags & CON_EXTENDED)
- nr_ext_console_drivers--;
+ if (use_device_lock)
+ console->device_unlock(console, flags);
/*
+ * <HISTORICAL>
* If this isn't the last console and it has CON_CONSDEV set, we
* need to set it on the next preferred console.
+ * </HISTORICAL>
+ *
+ * The above makes no sense as there is no guarantee that the next
+ * console has any device attached. Oh well....
*/
- if (console_drivers != NULL && console->flags & CON_CONSDEV)
- console_drivers->flags |= CON_CONSDEV;
+ if (!hlist_empty(&console_list) && console->flags & CON_CONSDEV)
+ console_srcu_write_flags(console_first(), console_first()->flags | CON_CONSDEV);
+
+ /*
+ * Ensure that all SRCU list walks have completed. All contexts
+ * must not be able to see this console in the list so that any
+ * exit/cleanup routines can be performed safely.
+ */
+ synchronize_srcu(&console_srcu);
+
+ if (console->flags & CON_NBCON)
+ nbcon_free(console);
- console->flags &= ~CON_ENABLED;
- console_unlock();
console_sysfs_notify();
if (console->exit)
res = console->exit(console);
+ /*
+ * With this console gone, the global flags tracking registered
+ * console types may have changed. Update them.
+ */
+ for_each_console(c) {
+ if (c->flags & CON_BOOT)
+ found_boot_con = true;
+
+ if (c->flags & CON_NBCON)
+ found_nbcon_con = true;
+ else
+ found_legacy_con = true;
+ }
+ if (!found_boot_con)
+ have_boot_console = found_boot_con;
+ if (!found_legacy_con)
+ have_legacy_console = found_legacy_con;
+ if (!found_nbcon_con)
+ have_nbcon_console = found_nbcon_con;
+
+ /* Changed console list, may require printer threads to start/stop. */
+ printk_kthreads_check_locked();
+
return res;
+}
-out_disable_unlock:
- console->flags &= ~CON_ENABLED;
- console_unlock();
+int unregister_console(struct console *console)
+{
+ int res;
+ console_list_lock();
+ res = unregister_console_locked(console);
+ console_list_unlock();
return res;
}
EXPORT_SYMBOL(unregister_console);
+/**
+ * console_force_preferred_locked - force a registered console preferred
+ * @con: The registered console to force preferred.
+ *
+ * Must be called under console_list_lock().
+ */
+void console_force_preferred_locked(struct console *con)
+{
+ struct console *cur_pref_con;
+
+ if (!console_is_registered_locked(con))
+ return;
+
+ cur_pref_con = console_first();
+
+ /* Already preferred? */
+ if (cur_pref_con == con)
+ return;
+
+ /*
+ * Delete, but do not re-initialize the entry. This allows the console
+ * to continue to appear registered (via any hlist_unhashed_lockless()
+ * checks), even though it was briefly removed from the console list.
+ */
+ hlist_del_rcu(&con->node);
+
+ /*
+ * Ensure that all SRCU list walks have completed so that the console
+ * can be added to the beginning of the console list and its forward
+ * list pointer can be re-initialized.
+ */
+ synchronize_srcu(&console_srcu);
+
+ con->flags |= CON_CONSDEV;
+ WARN_ON(!con->device);
+
+ /* Only the new head can have CON_CONSDEV set. */
+ console_srcu_write_flags(cur_pref_con, cur_pref_con->flags & ~CON_CONSDEV);
+ hlist_add_head_rcu(&con->node, &console_list);
+}
+EXPORT_SYMBOL(console_force_preferred_locked);
+
/*
* Initialize the console device. This is called *early*, so
* we can't necessarily depend on lots of kernel help here.
@@ -3036,14 +4307,16 @@ void __init console_init(void)
*
* To mitigate this problem somewhat, only unregister consoles whose memory
* intersects with the init section. Note that all other boot consoles will
- * get unregistred when the real preferred console is registered.
+ * get unregistered when the real preferred console is registered.
*/
static int __init printk_late_init(void)
{
+ struct hlist_node *tmp;
struct console *con;
int ret;
- for_each_console(con) {
+ console_list_lock();
+ hlist_for_each_entry_safe(con, tmp, &console_list, node) {
if (!(con->flags & CON_BOOT))
continue;
@@ -3060,20 +4333,144 @@ static int __init printk_late_init(void)
*/
pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n",
con->name, con->index);
- unregister_console(con);
+ unregister_console_locked(con);
}
}
+ console_list_unlock();
+
ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
console_cpu_notify);
WARN_ON(ret < 0);
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
console_cpu_notify, NULL);
WARN_ON(ret < 0);
+ printk_sysctl_init();
return 0;
}
late_initcall(printk_late_init);
#if defined CONFIG_PRINTK
+/* If @con is specified, only wait for that console. Otherwise wait for all. */
+static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress)
+{
+ unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms);
+ unsigned long remaining_jiffies = timeout_jiffies;
+ struct console_flush_type ft;
+ struct console *c;
+ u64 last_diff = 0;
+ u64 printk_seq;
+ short flags;
+ int cookie;
+ u64 diff;
+ u64 seq;
+
+ /* Sorry, pr_flush() will not work this early. */
+ if (system_state < SYSTEM_SCHEDULING)
+ return false;
+
+ might_sleep();
+
+ seq = prb_next_reserve_seq(prb);
+
+ /* Flush the consoles so that records up to @seq are printed. */
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_atomic)
+ nbcon_atomic_flush_pending();
+ if (ft.legacy_direct) {
+ console_lock();
+ console_unlock();
+ }
+
+ for (;;) {
+ unsigned long begin_jiffies;
+ unsigned long slept_jiffies;
+
+ diff = 0;
+
+ /*
+ * Hold the console_lock to guarantee safe access to
+ * console->seq. Releasing console_lock flushes more
+ * records in case @seq is still not printed on all
+ * usable consoles.
+ *
+ * Holding the console_lock is not necessary if there
+ * are no legacy or boot consoles. However, such a
+ * console could register at any time. Always hold the
+ * console_lock as a precaution rather than
+ * synchronizing against register_console().
+ */
+ console_lock();
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
+ if (con && con != c)
+ continue;
+
+ flags = console_srcu_read_flags(c);
+
+ /*
+ * If consoles are not usable, it cannot be expected
+ * that they make forward progress, so only increment
+ * @diff for usable consoles.
+ */
+ if (!console_is_usable(c, flags, true) &&
+ !console_is_usable(c, flags, false)) {
+ continue;
+ }
+
+ if (flags & CON_NBCON) {
+ printk_seq = nbcon_seq_read(c);
+ } else {
+ printk_seq = c->seq;
+ }
+
+ if (printk_seq < seq)
+ diff += seq - printk_seq;
+ }
+ console_srcu_read_unlock(cookie);
+
+ if (diff != last_diff && reset_on_progress)
+ remaining_jiffies = timeout_jiffies;
+
+ console_unlock();
+
+ /* Note: @diff is 0 if there are no usable consoles. */
+ if (diff == 0 || remaining_jiffies == 0)
+ break;
+
+ /* msleep(1) might sleep much longer. Check time by jiffies. */
+ begin_jiffies = jiffies;
+ msleep(1);
+ slept_jiffies = jiffies - begin_jiffies;
+
+ remaining_jiffies -= min(slept_jiffies, remaining_jiffies);
+
+ last_diff = diff;
+ }
+
+ return (diff == 0);
+}
+
+/**
+ * pr_flush() - Wait for printing threads to catch up.
+ *
+ * @timeout_ms: The maximum time (in ms) to wait.
+ * @reset_on_progress: Reset the timeout if forward progress is seen.
+ *
+ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
+ * represents infinite waiting.
+ *
+ * If @reset_on_progress is true, the timeout will be reset whenever any
+ * printer has been seen to make some forward progress.
+ *
+ * Context: Process context. May sleep while acquiring console lock.
+ * Return: true if all usable printers are caught up.
+ */
+static bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+ return __pr_flush(NULL, timeout_ms, reset_on_progress);
+}
+
/*
* Delayed printk version, for scheduler-internal messages:
*/
@@ -3084,12 +4481,16 @@ static DEFINE_PER_CPU(int, printk_pending);
static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
- int pending = __this_cpu_xchg(printk_pending, 0);
+ int pending = this_cpu_xchg(printk_pending, 0);
if (pending & PRINTK_PENDING_OUTPUT) {
- /* If trylock fails, someone else is doing the printing */
- if (console_trylock())
- console_unlock();
+ if (force_legacy_kthread()) {
+ if (printk_legacy_kthread)
+ wake_up_interruptible(&legacy_wait);
+ } else {
+ if (console_trylock())
+ console_unlock();
+ }
}
if (pending & PRINTK_PENDING_WAKEUP)
@@ -3099,41 +4500,78 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func);
-void wake_up_klogd(void)
+static void __wake_up_klogd(int val)
{
if (!printk_percpu_data_ready())
return;
preempt_disable();
- if (waitqueue_active(&log_wait)) {
- this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+ /*
+ * Guarantee any new records can be seen by tasks preparing to wait
+ * before this context checks if the wait queue is empty.
+ *
+ * The full memory barrier within wq_has_sleeper() pairs with the full
+ * memory barrier within set_current_state() of
+ * prepare_to_wait_event(), which is called after ___wait_event() adds
+ * the waiter but before it has checked the wait condition.
+ *
+ * This pairs with devkmsg_read:A and syslog_print:A.
+ */
+ if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */
+ (val & PRINTK_PENDING_OUTPUT)) {
+ this_cpu_or(printk_pending, val);
irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
}
preempt_enable();
}
-void defer_console_output(void)
+/**
+ * wake_up_klogd - Wake kernel logging daemon
+ *
+ * Use this function when new records have been added to the ringbuffer
+ * and the console printing of those records has already occurred or is
+ * known to be handled by some other context. This function will only
+ * wake the logging daemon.
+ *
+ * Context: Any context.
+ */
+void wake_up_klogd(void)
{
- if (!printk_percpu_data_ready())
- return;
-
- preempt_disable();
- __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
- preempt_enable();
+ __wake_up_klogd(PRINTK_PENDING_WAKEUP);
}
-int vprintk_deferred(const char *fmt, va_list args)
+/**
+ * defer_console_output - Wake kernel logging daemon and trigger
+ * console printing in a deferred context
+ *
+ * Use this function when new records have been added to the ringbuffer,
+ * this context is responsible for console printing those records, but
+ * the current context is not allowed to perform the console printing.
+ * Trigger an irq_work context to perform the console printing. This
+ * function also wakes the logging daemon.
+ *
+ * Context: Any context.
+ */
+void defer_console_output(void)
{
- int r;
+ /*
+ * New messages may have been added directly to the ringbuffer
+ * using vprintk_store(), so wake any waiters as well.
+ */
+ __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
+}
- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
+void printk_trigger_flush(void)
+{
defer_console_output();
+}
- return r;
+int vprintk_deferred(const char *fmt, va_list args)
+{
+ return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
}
-int printk_deferred(const char *fmt, ...)
+int _printk_deferred(const char *fmt, ...)
{
va_list args;
int r;
@@ -3260,17 +4698,21 @@ const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
/**
- * kmsg_dump - dump kernel log to kernel message dumpers.
+ * kmsg_dump_desc - dump kernel log to kernel message dumpers.
* @reason: the reason (oops, panic etc) for dumping
+ * @desc: a short string to describe what caused the panic or oops. Can be NULL
+ * if no additional description is available.
*
* Call each of the registered dumper's dump() callback, which can
* retrieve the kmsg records with kmsg_dump_get_line() or
* kmsg_dump_get_buffer().
*/
-void kmsg_dump(enum kmsg_dump_reason reason)
+void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc)
{
struct kmsg_dumper *dumper;
- unsigned long flags;
+ struct kmsg_dump_detail detail = {
+ .reason = reason,
+ .description = desc};
rcu_read_lock();
list_for_each_entry_rcu(dumper, &dump_list, list) {
@@ -3287,26 +4729,15 @@ void kmsg_dump(enum kmsg_dump_reason reason)
if (reason > max_reason)
continue;
- /* initialize iterator with data about the stored records */
- dumper->active = true;
-
- logbuf_lock_irqsave(flags);
- dumper->cur_seq = clear_seq;
- dumper->next_seq = prb_next_seq(prb);
- logbuf_unlock_irqrestore(flags);
-
/* invoke dumper which will iterate over records */
- dumper->dump(dumper, reason);
-
- /* reset iterator */
- dumper->active = false;
+ dumper->dump(dumper, &detail);
}
rcu_read_unlock();
}
/**
- * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
- * @dumper: registered kmsg dumper
+ * kmsg_dump_get_line - retrieve one kmsg log line
+ * @iter: kmsg dump iterator
* @syslog: include the "<4>" prefixes
* @line: buffer to copy the line to
* @size: maximum size of the buffer
@@ -3320,30 +4751,29 @@ void kmsg_dump(enum kmsg_dump_reason reason)
*
* A return value of FALSE indicates that there are no more records to
* read.
- *
- * The function is similar to kmsg_dump_get_line(), but grabs no locks.
*/
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
- char *line, size_t size, size_t *len)
+bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog,
+ char *line, size_t size, size_t *len)
{
+ u64 min_seq = latched_seq_read_nolock(&clear_seq);
struct printk_info info;
unsigned int line_count;
struct printk_record r;
size_t l = 0;
bool ret = false;
- prb_rec_init_rd(&r, &info, line, size);
+ if (iter->cur_seq < min_seq)
+ iter->cur_seq = min_seq;
- if (!dumper->active)
- goto out;
+ prb_rec_init_rd(&r, &info, line, size);
/* Read text or count text lines? */
if (line) {
- if (!prb_read_valid(prb, dumper->cur_seq, &r))
+ if (!prb_read_valid(prb, iter->cur_seq, &r))
goto out;
l = record_print_text(&r, syslog, printk_time);
} else {
- if (!prb_read_valid_info(prb, dumper->cur_seq,
+ if (!prb_read_valid_info(prb, iter->cur_seq,
&info, &line_count)) {
goto out;
}
@@ -3352,52 +4782,22 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
}
- dumper->cur_seq = r.info->seq + 1;
+ iter->cur_seq = r.info->seq + 1;
ret = true;
out:
if (len)
*len = l;
return ret;
}
-
-/**
- * kmsg_dump_get_line - retrieve one kmsg log line
- * @dumper: registered kmsg dumper
- * @syslog: include the "<4>" prefixes
- * @line: buffer to copy the line to
- * @size: maximum size of the buffer
- * @len: length of line placed into buffer
- *
- * Start at the beginning of the kmsg buffer, with the oldest kmsg
- * record, and copy one record into the provided buffer.
- *
- * Consecutive calls will return the next available record moving
- * towards the end of the buffer with the youngest messages.
- *
- * A return value of FALSE indicates that there are no more records to
- * read.
- */
-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
- char *line, size_t size, size_t *len)
-{
- unsigned long flags;
- bool ret;
-
- logbuf_lock_irqsave(flags);
- ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
- logbuf_unlock_irqrestore(flags);
-
- return ret;
-}
EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
/**
* kmsg_dump_get_buffer - copy kmsg log lines
- * @dumper: registered kmsg dumper
+ * @iter: kmsg dump iterator
* @syslog: include the "<4>" prefixes
* @buf: buffer to copy the line to
* @size: maximum size of the buffer
- * @len: length of line placed into buffer
+ * @len_out: length of line placed into buffer
*
* Start at the end of the kmsg buffer and fill the provided buffer
* with as many of the *youngest* kmsg records that fit into it.
@@ -3410,114 +4810,232 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
* A return value of FALSE indicates that there are no more records to
* read.
*/
-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
- char *buf, size_t size, size_t *len)
+bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
+ char *buf, size_t size, size_t *len_out)
{
+ u64 min_seq = latched_seq_read_nolock(&clear_seq);
struct printk_info info;
- unsigned int line_count;
struct printk_record r;
- unsigned long flags;
u64 seq;
u64 next_seq;
- size_t l = 0;
+ size_t len = 0;
bool ret = false;
bool time = printk_time;
- prb_rec_init_rd(&r, &info, buf, size);
-
- if (!dumper->active || !buf || !size)
+ if (!buf || !size)
goto out;
- logbuf_lock_irqsave(flags);
- if (dumper->cur_seq < prb_first_valid_seq(prb)) {
- /* messages are gone, move to first available one */
- dumper->cur_seq = prb_first_valid_seq(prb);
+ if (iter->cur_seq < min_seq)
+ iter->cur_seq = min_seq;
+
+ if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) {
+ if (info.seq != iter->cur_seq) {
+ /* messages are gone, move to first available one */
+ iter->cur_seq = info.seq;
+ }
}
/* last entry */
- if (dumper->cur_seq >= dumper->next_seq) {
- logbuf_unlock_irqrestore(flags);
+ if (iter->cur_seq >= iter->next_seq)
goto out;
- }
-
- /* calculate length of entire buffer */
- seq = dumper->cur_seq;
- while (prb_read_valid_info(prb, seq, &info, &line_count)) {
- if (r.info->seq >= dumper->next_seq)
- break;
- l += get_record_print_text_size(&info, line_count, syslog, time);
- seq = r.info->seq + 1;
- }
- /* move first record forward until length fits into the buffer */
- seq = dumper->cur_seq;
- while (l >= size && prb_read_valid_info(prb, seq,
- &info, &line_count)) {
- if (r.info->seq >= dumper->next_seq)
- break;
- l -= get_record_print_text_size(&info, line_count, syslog, time);
- seq = r.info->seq + 1;
- }
+ /*
+ * Find first record that fits, including all following records,
+ * into the user-provided buffer for this dump. Pass in size-1
+ * because this function (by way of record_print_text()) will
+ * not write more than size-1 bytes of text into @buf.
+ */
+ seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq,
+ size - 1, syslog, time);
- /* last message in next interation */
+ /*
+ * Next kmsg_dump_get_buffer() invocation will dump block of
+ * older records stored right before this one.
+ */
next_seq = seq;
- /* actually read text into the buffer now */
- l = 0;
- while (prb_read_valid(prb, seq, &r)) {
- if (r.info->seq >= dumper->next_seq)
- break;
+ prb_rec_init_rd(&r, &info, buf, size);
- l += record_print_text(&r, syslog, time);
+ prb_for_each_record(seq, prb, seq, &r) {
+ if (r.info->seq >= iter->next_seq)
+ break;
- /* adjust record to store to remaining buffer space */
- prb_rec_init_rd(&r, &info, buf + l, size - l);
+ len += record_print_text(&r, syslog, time);
- seq = r.info->seq + 1;
+ /* Adjust record to store to remaining buffer space. */
+ prb_rec_init_rd(&r, &info, buf + len, size - len);
}
- dumper->next_seq = next_seq;
+ iter->next_seq = next_seq;
ret = true;
- logbuf_unlock_irqrestore(flags);
out:
- if (len)
- *len = l;
+ if (len_out)
+ *len_out = len;
return ret;
}
EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
/**
- * kmsg_dump_rewind_nolock - reset the iterator (unlocked version)
- * @dumper: registered kmsg dumper
+ * kmsg_dump_rewind - reset the iterator
+ * @iter: kmsg dump iterator
*
* Reset the dumper's iterator so that kmsg_dump_get_line() and
* kmsg_dump_get_buffer() can be called again and used multiple
* times within the same dumper.dump() callback.
+ */
+void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
+{
+ iter->cur_seq = latched_seq_read_nolock(&clear_seq);
+ iter->next_seq = prb_next_seq(prb);
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
+
+/**
+ * console_try_replay_all - try to replay kernel log on consoles
+ *
+ * Try to obtain lock on console subsystem and replay all
+ * available records in printk buffer on the consoles.
+ * Does nothing if lock is not obtained.
+ *
+ * Context: Any, except for NMI.
+ */
+void console_try_replay_all(void)
+{
+ struct console_flush_type ft;
+
+ printk_get_console_flush_type(&ft);
+ if (console_trylock()) {
+ __console_rewind_all();
+ if (ft.nbcon_atomic)
+ nbcon_atomic_flush_pending();
+ if (ft.nbcon_offload)
+ nbcon_kthreads_wake();
+ if (ft.legacy_offload)
+ defer_console_output();
+ /* Consoles are flushed as part of console_unlock(). */
+ console_unlock();
+ }
+}
+#endif
+
+#ifdef CONFIG_SMP
+static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1);
+static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0);
+
+bool is_printk_cpu_sync_owner(void)
+{
+ return (atomic_read(&printk_cpu_sync_owner) == raw_smp_processor_id());
+}
+
+/**
+ * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant
+ * spinning lock is not owned by any CPU.
*
- * The function is similar to kmsg_dump_rewind(), but grabs no locks.
+ * Context: Any context.
*/
-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
+void __printk_cpu_sync_wait(void)
{
- dumper->cur_seq = clear_seq;
- dumper->next_seq = prb_next_seq(prb);
+ do {
+ cpu_relax();
+ } while (atomic_read(&printk_cpu_sync_owner) != -1);
}
+EXPORT_SYMBOL(__printk_cpu_sync_wait);
/**
- * kmsg_dump_rewind - reset the iterator
- * @dumper: registered kmsg dumper
+ * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant
+ * spinning lock.
*
- * Reset the dumper's iterator so that kmsg_dump_get_line() and
- * kmsg_dump_get_buffer() can be called again and used multiple
- * times within the same dumper.dump() callback.
+ * If no processor has the lock, the calling processor takes the lock and
+ * becomes the owner. If the calling processor is already the owner of the
+ * lock, this function succeeds immediately.
+ *
+ * Context: Any context. Expects interrupts to be disabled.
+ * Return: 1 on success, otherwise 0.
*/
-void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+int __printk_cpu_sync_try_get(void)
{
- unsigned long flags;
+ int cpu;
+ int old;
+
+ cpu = smp_processor_id();
- logbuf_lock_irqsave(flags);
- kmsg_dump_rewind_nolock(dumper);
- logbuf_unlock_irqrestore(flags);
+ /*
+ * Guarantee loads and stores from this CPU when it is the lock owner
+ * are _not_ visible to the previous lock owner. This pairs with
+ * __printk_cpu_sync_put:B.
+ *
+ * Memory barrier involvement:
+ *
+ * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
+ * then __printk_cpu_sync_put:A can never read from
+ * __printk_cpu_sync_try_get:B.
+ *
+ * Relies on:
+ *
+ * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
+ * of the previous CPU
+ * matching
+ * ACQUIRE from __printk_cpu_sync_try_get:A to
+ * __printk_cpu_sync_try_get:B of this CPU
+ */
+ old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1,
+ cpu); /* LMM(__printk_cpu_sync_try_get:A) */
+ if (old == -1) {
+ /*
+ * This CPU is now the owner and begins loading/storing
+ * data: LMM(__printk_cpu_sync_try_get:B)
+ */
+ return 1;
+
+ } else if (old == cpu) {
+ /* This CPU is already the owner. */
+ atomic_inc(&printk_cpu_sync_nested);
+ return 1;
+ }
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
+EXPORT_SYMBOL(__printk_cpu_sync_try_get);
-#endif
+/**
+ * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock.
+ *
+ * The calling processor must be the owner of the lock.
+ *
+ * Context: Any context. Expects interrupts to be disabled.
+ */
+void __printk_cpu_sync_put(void)
+{
+ if (atomic_read(&printk_cpu_sync_nested)) {
+ atomic_dec(&printk_cpu_sync_nested);
+ return;
+ }
+
+ /*
+ * This CPU is finished loading/storing data:
+ * LMM(__printk_cpu_sync_put:A)
+ */
+
+ /*
+ * Guarantee loads and stores from this CPU when it was the
+ * lock owner are visible to the next lock owner. This pairs
+ * with __printk_cpu_sync_try_get:A.
+ *
+ * Memory barrier involvement:
+ *
+ * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
+ * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A.
+ *
+ * Relies on:
+ *
+ * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
+ * of this CPU
+ * matching
+ * ACQUIRE from __printk_cpu_sync_try_get:A to
+ * __printk_cpu_sync_try_get:B of the next CPU
+ */
+ atomic_set_release(&printk_cpu_sync_owner,
+ -1); /* LMM(__printk_cpu_sync_put:B) */
+}
+EXPORT_SYMBOL(__printk_cpu_sync_put);
+#endif /* CONFIG_SMP */