diff options
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r-- | kernel/trace/trace.c | 3939 |
1 files changed, 2471 insertions, 1468 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b5815a022ecc..0e6d517e74e0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -13,13 +13,12 @@ * Copyright (C) 2004 Nadia Yvette Chambers */ #include <linux/ring_buffer.h> -#include <generated/utsrelease.h> +#include <linux/utsname.h> #include <linux/stacktrace.h> #include <linux/writeback.h> #include <linux/kallsyms.h> #include <linux/security.h> #include <linux/seq_file.h> -#include <linux/notifier.h> #include <linux/irqflags.h> #include <linux/debugfs.h> #include <linux/tracefs.h> @@ -27,6 +26,7 @@ #include <linux/hardirq.h> #include <linux/linkage.h> #include <linux/uaccess.h> +#include <linux/cleanup.h> #include <linux/vmalloc.h> #include <linux/ftrace.h> #include <linux/module.h> @@ -39,6 +39,7 @@ #include <linux/slab.h> #include <linux/ctype.h> #include <linux/init.h> +#include <linux/panic_notifier.h> #include <linux/poll.h> #include <linux/nmi.h> #include <linux/fs.h> @@ -49,15 +50,12 @@ #include <linux/irq_work.h> #include <linux/workqueue.h> +#include <asm/setup.h> /* COMMAND_LINE_SIZE */ + #include "trace.h" #include "trace_output.h" -/* - * On boot up, the ring buffer is set to the minimum size, so that - * we do not waste memory on systems that are not using tracing. - */ -bool ring_buffer_expanded; - +#ifdef CONFIG_FTRACE_STARTUP_TEST /* * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the @@ -73,7 +71,6 @@ static bool __read_mostly tracing_selftest_running; */ bool __read_mostly tracing_selftest_disabled; -#ifdef CONFIG_FTRACE_STARTUP_TEST void __init disable_tracing_selftest(const char *reason) { if (!tracing_selftest_disabled) { @@ -81,11 +78,15 @@ void __init disable_tracing_selftest(const char *reason) pr_info("Ftrace startup test is disabled due to %s\n", reason); } } +#else +#define tracing_selftest_running 0 +#define tracing_selftest_disabled 0 #endif /* Pipe tracepoints to printk */ -struct trace_iterator *tracepoint_print_iter; +static struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; +static bool tracepoint_printk_stop_on_boot __initdata; static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); /* For tracers that don't implement custom flags */ @@ -104,7 +105,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) * tracing is active, only save the comm when a trace event * occurred. */ -static DEFINE_PER_CPU(bool, trace_taskinfo_save); +DEFINE_PER_CPU(bool, trace_taskinfo_save); /* * Kill all tracing for good (never come back). @@ -130,9 +131,12 @@ cpumask_var_t __read_mostly tracing_buffer_mask; * /proc/sys/kernel/ftrace_dump_on_oops * Set 1 if you want to dump buffers of all CPUs * Set 2 if you want to dump the buffer of the CPU that triggered oops + * Set instance name if you want to dump the specific trace instance + * Multiple instance dump is also supported, and instances are seperated + * by commas. */ - -enum ftrace_dump_mode ftrace_dump_on_oops; +/* Set to string format zero to disable by default */ +char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; /* When set, tracing will stop when a WARN*() is hit */ int __disable_trace_on_warning; @@ -176,37 +180,57 @@ static union trace_eval_map_item *trace_eval_maps; int tracing_set_tracer(struct trace_array *tr, const char *buf); static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc); + unsigned int trace_ctx); -#define MAX_TRACER_SIZE 100 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; static char *default_bootup_tracer; static bool allocate_snapshot; +static bool snapshot_at_boot; + +static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; +static int boot_instance_index; + +static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata; +static int boot_snapshot_index; static int __init set_cmdline_ftrace(char *str) { - strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); + strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; /* We are using ftrace early, expand it */ - ring_buffer_expanded = true; + trace_set_ring_buffer_expanded(NULL); return 1; } __setup("ftrace=", set_cmdline_ftrace); +int ftrace_dump_on_oops_enabled(void) +{ + if (!strcmp("0", ftrace_dump_on_oops)) + return 0; + else + return 1; +} + static int __init set_ftrace_dump_on_oops(char *str) { - if (*str++ != '=' || !*str) { - ftrace_dump_on_oops = DUMP_ALL; + if (!*str) { + strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); return 1; } - if (!strcmp("orig_cpu", str)) { - ftrace_dump_on_oops = DUMP_ORIG; - return 1; - } + if (*str == ',') { + strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); + strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1); + return 1; + } + + if (*str++ == '=') { + strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE); + return 1; + } - return 0; + return 0; } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); @@ -220,20 +244,59 @@ __setup("traceoff_on_warning", stop_trace_on_warning); static int __init boot_alloc_snapshot(char *str) { - allocate_snapshot = true; - /* We also need the main ring buffer expanded */ - ring_buffer_expanded = true; + char *slot = boot_snapshot_info + boot_snapshot_index; + int left = sizeof(boot_snapshot_info) - boot_snapshot_index; + int ret; + + if (str[0] == '=') { + str++; + if (strlen(str) >= left) + return -1; + + ret = snprintf(slot, left, "%s\t", str); + boot_snapshot_index += ret; + } else { + allocate_snapshot = true; + /* We also need the main ring buffer expanded */ + trace_set_ring_buffer_expanded(NULL); + } return 1; } __setup("alloc_snapshot", boot_alloc_snapshot); +static int __init boot_snapshot(char *str) +{ + snapshot_at_boot = true; + boot_alloc_snapshot(str); + return 1; +} +__setup("ftrace_boot_snapshot", boot_snapshot); + + +static int __init boot_instance(char *str) +{ + char *slot = boot_instance_info + boot_instance_index; + int left = sizeof(boot_instance_info) - boot_instance_index; + int ret; + + if (strlen(str) >= left) + return -1; + + ret = snprintf(slot, left, "%s\t", str); + boot_instance_index += ret; + + return 1; +} +__setup("trace_instance=", boot_instance); + + static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; static int __init set_trace_boot_options(char *str) { - strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); - return 0; + strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); + return 1; } __setup("trace_options=", set_trace_boot_options); @@ -242,20 +305,31 @@ static char *trace_boot_clock __initdata; static int __init set_trace_boot_clock(char *str) { - strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); + strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); trace_boot_clock = trace_boot_clock_buf; - return 0; + return 1; } __setup("trace_clock=", set_trace_boot_clock); static int __init set_tracepoint_printk(char *str) { + /* Ignore the "tp_printk_stop_on_boot" param */ + if (*str == '_') + return 0; + if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) tracepoint_printk = 1; return 1; } __setup("tp_printk", set_tracepoint_printk); +static int __init set_tracepoint_printk_stop(char *str) +{ + tracepoint_printk_stop_on_boot = true; + return 1; +} +__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); + unsigned long long ns2usecs(u64 nsec) { nsec += 500; @@ -408,7 +482,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export); TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \ TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ - TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS) + TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \ + TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK) /* trace_options that are only supported by global_trace */ #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ @@ -416,7 +491,7 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export); /* trace_flags that are default zero for instances */ #define ZEROED_TRACE_FLAGS \ - (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK) + (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK) /* * The global_trace is the descriptor that holds the top-level tracing @@ -426,24 +501,51 @@ static struct trace_array global_trace = { .trace_flags = TRACE_DEFAULT_FLAGS, }; +static struct trace_array *printk_trace = &global_trace; + +static __always_inline bool printk_binsafe(struct trace_array *tr) +{ + /* + * The binary format of traceprintk can cause a crash if used + * by a buffer from another boot. Force the use of the + * non binary version of trace_printk if the trace_printk + * buffer is a boot mapped ring buffer. + */ + return !(tr->flags & TRACE_ARRAY_FL_BOOT); +} + +static void update_printk_trace(struct trace_array *tr) +{ + if (printk_trace == tr) + return; + + printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK; + printk_trace = tr; + tr->trace_flags |= TRACE_ITER_TRACE_PRINTK; +} + +void trace_set_ring_buffer_expanded(struct trace_array *tr) +{ + if (!tr) + tr = &global_trace; + tr->ring_buffer_expanded = true; +} + LIST_HEAD(ftrace_trace_arrays); int trace_array_get(struct trace_array *this_tr) { struct trace_array *tr; - int ret = -ENODEV; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (tr == this_tr) { tr->ref++; - ret = 0; - break; + return 0; } } - mutex_unlock(&trace_types_lock); - return ret; + return -ENODEV; } static void __trace_array_put(struct trace_array *this_tr) @@ -454,6 +556,7 @@ static void __trace_array_put(struct trace_array *this_tr) /** * trace_array_put - Decrement the reference counter for this trace array. + * @this_tr : pointer to the trace array * * NOTE: Use this when we no longer need the trace array returned by * trace_array_get_by_name(). This ensures the trace array can be later @@ -488,48 +591,23 @@ int tracing_check_open_get_tr(struct trace_array *tr) return 0; } -int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct trace_buffer *buffer, - struct ring_buffer_event *event) -{ - if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && - !filter_match_preds(call->filter, rec)) { - __trace_event_discard_commit(buffer, event); - return 1; - } - - return 0; -} - -void trace_free_pid_list(struct trace_pid_list *pid_list) -{ - vfree(pid_list->pids); - kfree(pid_list); -} - /** * trace_find_filtered_pid - check if a pid exists in a filtered_pid list * @filtered_pids: The list of pids to check * @search_pid: The PID to find in @filtered_pids * - * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis. + * Returns true if @search_pid is found in @filtered_pids, and false otherwise. */ bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) { - /* - * If pid_max changed after filtered_pids was created, we - * by default ignore all pids greater than the previous pid_max. - */ - if (search_pid >= filtered_pids->pid_max) - return false; - - return test_bit(search_pid, filtered_pids->pids); + return trace_pid_list_is_set(filtered_pids, search_pid); } /** * trace_ignore_this_task - should a task be ignored for tracing * @filtered_pids: The list of pids to check + * @filtered_no_pids: The list of pids not to be traced * @task: The task that should be ignored if not filtered * * Checks if @task should be traced or not from @filtered_pids. @@ -542,7 +620,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task) { /* - * If filterd_no_pids is not empty, and the task's pid is listed + * If filtered_no_pids is not empty, and the task's pid is listed * in filtered_no_pids, then return true. * Otherwise, if filtered_pids is empty, that means we can * trace all tasks. If it has content, then only trace pids @@ -580,15 +658,11 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, return; } - /* Sorry, but we don't support pid_max changing after setting */ - if (task->pid >= pid_list->pid_max) - return; - /* "self" is set for forks, and NULL for exits */ if (self) - set_bit(task->pid, pid_list->pids); + trace_pid_list_set(pid_list, task->pid); else - clear_bit(task->pid, pid_list->pids); + trace_pid_list_clear(pid_list, task->pid); } /** @@ -605,18 +679,19 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, */ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) { - unsigned long pid = (unsigned long)v; + long pid = (unsigned long)v; + unsigned int next; (*pos)++; - /* pid already is +1 of the actual prevous bit */ - pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid); + /* pid already is +1 of the actual previous bit */ + if (trace_pid_list_next(pid_list, pid, &next) < 0) + return NULL; - /* Return pid + 1 to allow zero to be represented */ - if (pid < pid_list->pid_max) - return (void *)(pid + 1); + pid = next; - return NULL; + /* Return pid + 1 to allow zero to be represented */ + return (void *)(pid + 1); } /** @@ -633,12 +708,14 @@ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) { unsigned long pid; + unsigned int first; loff_t l = 0; - pid = find_first_bit(pid_list->pids, pid_list->pid_max); - if (pid >= pid_list->pid_max) + if (trace_pid_list_first(pid_list, &first) < 0) return NULL; + pid = first; + /* Return pid + 1 so that zero can be the exit value */ for (pid++; pid && l < *pos; pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) @@ -674,7 +751,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, unsigned long val; int nr_pids = 0; ssize_t read = 0; - ssize_t ret = 0; + ssize_t ret; loff_t pos; pid_t pid; @@ -687,55 +764,48 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, * the user. If the operation fails, then the current list is * not modified. */ - pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); + pid_list = trace_pid_list_alloc(); if (!pid_list) { trace_parser_put(&parser); return -ENOMEM; } - pid_list->pid_max = READ_ONCE(pid_max); - - /* Only truncating will shrink pid_max */ - if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max) - pid_list->pid_max = filtered_pids->pid_max; - - pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); - if (!pid_list->pids) { - trace_parser_put(&parser); - kfree(pid_list); - return -ENOMEM; - } - if (filtered_pids) { /* copy the current bits to the new max */ - for_each_set_bit(pid, filtered_pids->pids, - filtered_pids->pid_max) { - set_bit(pid, pid_list->pids); + ret = trace_pid_list_first(filtered_pids, &pid); + while (!ret) { + trace_pid_list_set(pid_list, pid); + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); nr_pids++; } } + ret = 0; while (cnt > 0) { pos = 0; ret = trace_get_user(&parser, ubuf, cnt, &pos); - if (ret < 0 || !trace_parser_loaded(&parser)) + if (ret < 0) break; read += ret; ubuf += ret; cnt -= ret; + if (!trace_parser_loaded(&parser)) + break; + ret = -EINVAL; if (kstrtoul(parser.buffer, 0, &val)) break; - if (val >= pid_list->pid_max) - break; pid = (pid_t)val; - set_bit(pid, pid_list->pids); + if (trace_pid_list_set(pid_list, pid) < 0) { + ret = -1; + break; + } nr_pids++; trace_parser_clear(&parser); @@ -744,14 +814,13 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, trace_parser_put(&parser); if (ret < 0) { - trace_free_pid_list(pid_list); + trace_pid_list_free(pid_list); return ret; } if (!nr_pids) { /* Cleared the list of pids */ - trace_free_pid_list(pid_list); - read = ret; + trace_pid_list_free(pid_list); pid_list = NULL; } @@ -768,7 +837,7 @@ static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) if (!buf->buffer) return trace_clock_local(); - ts = ring_buffer_time_stamp(buf->buffer, cpu); + ts = ring_buffer_time_stamp(buf->buffer); ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); return ts; @@ -780,7 +849,7 @@ u64 ftrace_now(int cpu) } /** - * tracing_is_enabled - Show if global_trace has been disabled + * tracing_is_enabled - Show if global_trace has been enabled * * Shows if the global trace has been enabled or not. It uses the * mirror flag "buffer_disabled" to be used in fast paths such as for @@ -831,7 +900,7 @@ DEFINE_MUTEX(trace_types_lock); * The content of events may become garbage if we allow other process consumes * these events concurrently: * A) the page of the consumed events may become a normal page - * (not reader page) in ring buffer, and this page will be rewrited + * (not reader page) in ring buffer, and this page will be rewritten * by events producer. * B) The page of the consumed events may become a page for splice_read, * and this page will be returned to system. @@ -904,24 +973,26 @@ static inline void trace_access_lock_init(void) #endif #ifdef CONFIG_STACKTRACE -static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); +static void __ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs); static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); + unsigned int trace_ctx, + int skip, struct pt_regs *regs); #else -static inline void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) +static inline void __ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { } static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned long trace_ctx, + int skip, struct pt_regs *regs) { } @@ -929,24 +1000,24 @@ static inline void ftrace_trace_stack(struct trace_array *tr, static __always_inline void trace_event_setup(struct ring_buffer_event *event, - int type, unsigned long flags, int pc) + int type, unsigned int trace_ctx) { struct trace_entry *ent = ring_buffer_event_data(event); - tracing_generic_entry_update(ent, type, flags, pc); + tracing_generic_entry_update(ent, type, trace_ctx); } static __always_inline struct ring_buffer_event * __trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *event; event = ring_buffer_lock_reserve(buffer, len); if (event != NULL) - trace_event_setup(event, type, flags, pc); + trace_event_setup(event, type, trace_ctx); return event; } @@ -992,40 +1063,37 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev ring_buffer_write(buffer, event->array[0], &event->array[1]); /* Release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); + /* ring_buffer_unlock_commit() enables preemption */ + preempt_enable_notrace(); } else - ring_buffer_unlock_commit(buffer, event); + ring_buffer_unlock_commit(buffer); } -/** - * __trace_puts - write a constant string into the trace buffer. - * @ip: The address of the caller - * @str: The constant string to write - * @size: The size of the string. - */ -int __trace_puts(unsigned long ip, const char *str, int size) +int __trace_array_puts(struct trace_array *tr, unsigned long ip, + const char *str, int size) { struct ring_buffer_event *event; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int alloc; - int pc; - if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) + if (!(tr->trace_flags & TRACE_ITER_PRINTK)) return 0; - pc = preempt_count(); + if (unlikely(tracing_selftest_running && tr == &global_trace)) + return 0; - if (unlikely(tracing_selftest_running || tracing_disabled)) + if (unlikely(tracing_disabled)) return 0; alloc = sizeof(*entry) + size + 2; /* possible \n added */ - local_save_flags(irq_flags); - buffer = global_trace.array_buffer.buffer; + trace_ctx = tracing_gen_ctx(); + buffer = tr->array_buffer.buffer; ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + trace_ctx); if (!event) { size = 0; goto out; @@ -1044,11 +1112,23 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); out: ring_buffer_nest_end(buffer); return size; } +EXPORT_SYMBOL_GPL(__trace_array_puts); + +/** + * __trace_puts - write a constant string into the trace buffer. + * @ip: The address of the caller + * @str: The constant string to write + * @size: The size of the string. + */ +int __trace_puts(unsigned long ip, const char *str, int size) +{ + return __trace_array_puts(printk_trace, ip, str, size); +} EXPORT_SYMBOL_GPL(__trace_puts); /** @@ -1058,28 +1138,29 @@ EXPORT_SYMBOL_GPL(__trace_puts); */ int __trace_bputs(unsigned long ip, const char *str) { + struct trace_array *tr = READ_ONCE(printk_trace); struct ring_buffer_event *event; struct trace_buffer *buffer; struct bputs_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int size = sizeof(struct bputs_entry); int ret = 0; - int pc; - if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) - return 0; + if (!printk_binsafe(tr)) + return __trace_puts(ip, str, strlen(str)); - pc = preempt_count(); + if (!(tr->trace_flags & TRACE_ITER_PRINTK)) + return 0; if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; - local_save_flags(irq_flags); - buffer = global_trace.array_buffer.buffer; + trace_ctx = tracing_gen_ctx(); + buffer = tr->array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, pc); + trace_ctx); if (!event) goto out; @@ -1088,7 +1169,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); ret = 1; out: @@ -1105,22 +1186,28 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr, unsigned long flags; if (in_nmi()) { - internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); - internal_trace_puts("*** snapshot is being ignored ***\n"); + trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); + trace_array_puts(tr, "*** snapshot is being ignored ***\n"); return; } if (!tr->allocated_snapshot) { - internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n"); - internal_trace_puts("*** stopping trace here! ***\n"); - tracing_off(); + trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); + trace_array_puts(tr, "*** stopping trace here! ***\n"); + tracer_tracing_off(tr); return; } /* Note, snapshot can not be used when the tracer uses it */ if (tracer->use_max_tr) { - internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n"); - internal_trace_puts("*** Can not use snapshot (sorry) ***\n"); + trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); + trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); + return; + } + + if (tr->mapped) { + trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); + trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } @@ -1143,7 +1230,7 @@ void tracing_snapshot_instance(struct trace_array *tr) * * Note, make sure to allocate the snapshot with either * a tracing_snapshot_alloc(), or by doing it manually - * with: echo 1 > /sys/kernel/debug/tracing/snapshot + * with: echo 1 > /sys/kernel/tracing/snapshot * * If the snapshot buffer is not allocated, it will stop tracing. * Basically making a permanent snapshot. @@ -1176,7 +1263,7 @@ void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) EXPORT_SYMBOL_GPL(tracing_snapshot_cond); /** - * tracing_snapshot_cond_data - get the user data associated with a snapshot + * tracing_cond_snapshot_data - get the user data associated with a snapshot * @tr: The tracing instance * * When the user enables a conditional snapshot using @@ -1193,12 +1280,14 @@ void *tracing_cond_snapshot_data(struct trace_array *tr) { void *cond_data = NULL; + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) cond_data = tr->cond_snapshot->cond_data; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); return cond_data; } @@ -1210,10 +1299,17 @@ static void set_buffer_entries(struct array_buffer *buf, unsigned long val); int tracing_alloc_snapshot_instance(struct trace_array *tr) { + int order; int ret; if (!tr->allocated_snapshot) { + /* Make the snapshot buffer have the same order as main buffer */ + order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); + ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); + if (ret < 0) + return ret; + /* allocate spare buffer */ ret = resize_buffer_duplicate_size(&tr->max_buffer, &tr->array_buffer, RING_BUFFER_ALL_CPUS); @@ -1233,12 +1329,57 @@ static void free_snapshot(struct trace_array *tr) * The max_tr ring buffer has some state (e.g. ring->clock) and * we want preserve it. */ + ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0); ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); set_buffer_entries(&tr->max_buffer, 1); tracing_reset_online_cpus(&tr->max_buffer); tr->allocated_snapshot = false; } +static int tracing_arm_snapshot_locked(struct trace_array *tr) +{ + int ret; + + lockdep_assert_held(&trace_types_lock); + + spin_lock(&tr->snapshot_trigger_lock); + if (tr->snapshot == UINT_MAX || tr->mapped) { + spin_unlock(&tr->snapshot_trigger_lock); + return -EBUSY; + } + + tr->snapshot++; + spin_unlock(&tr->snapshot_trigger_lock); + + ret = tracing_alloc_snapshot_instance(tr); + if (ret) { + spin_lock(&tr->snapshot_trigger_lock); + tr->snapshot--; + spin_unlock(&tr->snapshot_trigger_lock); + } + + return ret; +} + +int tracing_arm_snapshot(struct trace_array *tr) +{ + int ret; + + mutex_lock(&trace_types_lock); + ret = tracing_arm_snapshot_locked(tr); + mutex_unlock(&trace_types_lock); + + return ret; +} + +void tracing_disarm_snapshot(struct trace_array *tr) +{ + spin_lock(&tr->snapshot_trigger_lock); + if (!WARN_ON(!tr->snapshot)) + tr->snapshot--; + spin_unlock(&tr->snapshot_trigger_lock); +} + /** * tracing_alloc_snapshot - allocate snapshot buffer. * @@ -1300,26 +1441,20 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) { - struct cond_snapshot *cond_snapshot; - int ret = 0; + struct cond_snapshot *cond_snapshot __free(kfree) = + kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); + int ret; - cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); if (!cond_snapshot) return -ENOMEM; cond_snapshot->cond_data = cond_data; cond_snapshot->update = update; - mutex_lock(&trace_types_lock); - - ret = tracing_alloc_snapshot_instance(tr); - if (ret) - goto fail_unlock; + guard(mutex)(&trace_types_lock); - if (tr->current_trace->use_max_tr) { - ret = -EBUSY; - goto fail_unlock; - } + if (tr->current_trace->use_max_tr) + return -EBUSY; /* * The cond_snapshot can only change to NULL without the @@ -1329,23 +1464,20 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, * do safely with only holding the trace_types_lock and not * having to take the max_lock. */ - if (tr->cond_snapshot) { - ret = -EBUSY; - goto fail_unlock; - } + if (tr->cond_snapshot) + return -EBUSY; + + ret = tracing_arm_snapshot_locked(tr); + if (ret) + return ret; + local_irq_disable(); arch_spin_lock(&tr->max_lock); - tr->cond_snapshot = cond_snapshot; + tr->cond_snapshot = no_free_ptr(cond_snapshot); arch_spin_unlock(&tr->max_lock); + local_irq_enable(); - mutex_unlock(&trace_types_lock); - - return ret; - - fail_unlock: - mutex_unlock(&trace_types_lock); - kfree(cond_snapshot); - return ret; + return 0; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); @@ -1363,6 +1495,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) { int ret = 0; + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (!tr->cond_snapshot) @@ -1373,6 +1506,9 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) } arch_spin_unlock(&tr->max_lock); + local_irq_enable(); + + tracing_disarm_snapshot(tr); return ret; } @@ -1415,6 +1551,8 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) return false; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); +#define free_snapshot(tr) do { } while (0) +#define tracing_arm_snapshot_locked(tr) ({ -EBUSY; }) #endif /* CONFIG_TRACER_SNAPSHOT */ void tracer_tracing_off(struct trace_array *tr) @@ -1466,7 +1604,7 @@ void disable_trace_on_warning(void) bool tracer_tracing_is_on(struct trace_array *tr) { if (tr->array_buffer.buffer) - return ring_buffer_record_is_on(tr->array_buffer.buffer); + return ring_buffer_record_is_set_on(tr->array_buffer.buffer); return !tr->buffer_disabled; } @@ -1486,10 +1624,12 @@ static int __init set_buf_size(char *str) if (!str) return 0; buf_size = memparse(str, &str); - /* nr_entries can not be zero */ - if (buf_size == 0) - return 0; - trace_buf_size = buf_size; + /* + * nr_entries can not be zero and the startup + * tests require some buffer space. Therefore + * ensure we have at least 4096 bytes of buffer. + */ + trace_buf_size = max(4096UL, buf_size); return 1; } __setup("trace_buf_size=", set_buf_size); @@ -1523,7 +1663,7 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) #undef C #define C(a, b) b -/* These must match the bit postions in trace_iterator_flags */ +/* These must match the bit positions in trace_iterator_flags */ static const char *trace_options[] = { TRACE_FLAGS NULL @@ -1542,6 +1682,7 @@ static struct { { ktime_get_mono_fast_ns, "mono", 1 }, { ktime_get_raw_fast_ns, "mono_raw", 1 }, { ktime_get_boot_fast_ns, "boot", 1 }, + { ktime_get_tai_fast_ns, "tai", 1 }, ARCH_TRACE_CLOCKS }; @@ -1670,23 +1811,24 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) { int len; - if (trace_seq_used(s) <= s->seq.readpos) + if (trace_seq_used(s) <= s->readpos) return -EBUSY; - len = trace_seq_used(s) - s->seq.readpos; + len = trace_seq_used(s) - s->readpos; if (cnt > len) cnt = len; - memcpy(buf, s->buffer + s->seq.readpos, cnt); + memcpy(buf, s->buffer + s->readpos, cnt); - s->seq.readpos += cnt; + s->readpos += cnt; return cnt; } unsigned long __read_mostly tracing_thresh; + +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops; -#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \ - defined(CONFIG_FSNOTIFY) +#ifdef LATENCY_FS_NOTIFY static struct workqueue_struct *fsnotify_wq; @@ -1709,8 +1851,9 @@ static void trace_create_maxlat_file(struct trace_array *tr, { INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); - tr->d_max_latency = trace_create_file("tracing_max_latency", 0644, - d_tracer, &tr->max_latency, + tr->d_max_latency = trace_create_file("tracing_max_latency", + TRACE_MODE_WRITE, + d_tracer, tr, &tracing_max_lat_fops); } @@ -1739,19 +1882,14 @@ void latency_fsnotify(struct trace_array *tr) irq_work_queue(&tr->fsnotify_irqwork); } -/* - * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \ - * defined(CONFIG_FSNOTIFY) - */ -#else +#else /* !LATENCY_FS_NOTIFY */ #define trace_create_maxlat_file(tr, d_tracer) \ - trace_create_file("tracing_max_latency", 0644, d_tracer, \ - &tr->max_latency, &tracing_max_lat_fops) + trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ + d_tracer, tr, &tracing_max_lat_fops) #endif -#ifdef CONFIG_TRACER_MAX_TRACE /* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, @@ -1772,7 +1910,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) max_data->critical_start = data->critical_start; max_data->critical_end = data->critical_end; - strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN); + strscpy(max_data->comm, tsk->comm); max_data->pid = tsk->pid; /* * If tsk == current, then use current_uid(), as that does not use @@ -1826,15 +1964,19 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, ring_buffer_record_off(tr->max_buffer.buffer); #ifdef CONFIG_TRACER_SNAPSHOT - if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) - goto out_unlock; + if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { + arch_spin_unlock(&tr->max_lock); + return; + } #endif swap(tr->array_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); - out_unlock: arch_spin_unlock(&tr->max_lock); + + /* Any waiters on the old snapshot buffer need to wake up */ + ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); } /** @@ -1870,9 +2012,10 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) * place on this CPU. We fail to record, but we reset * the max trace buffer (no one writes directly to it) * and flag that it failed. + * Another reason is resize is in progress. */ trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, - "Failed to swap buffers due to commit in progress\n"); + "Failed to swap buffers due to commit or resize in progress\n"); } WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); @@ -1880,16 +2023,49 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); } + #endif /* CONFIG_TRACER_MAX_TRACE */ +struct pipe_wait { + struct trace_iterator *iter; + int wait_index; +}; + +static bool wait_pipe_cond(void *data) +{ + struct pipe_wait *pwait = data; + struct trace_iterator *iter = pwait->iter; + + if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) + return true; + + return iter->closed; +} + static int wait_on_pipe(struct trace_iterator *iter, int full) { + struct pipe_wait pwait; + int ret; + /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) return 0; - return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, - full); + pwait.wait_index = atomic_read_acquire(&iter->wait_index); + pwait.iter = iter; + + ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, + wait_pipe_cond, &pwait); + +#ifdef CONFIG_TRACER_MAX_TRACE + /* + * Make sure this is still the snapshot buffer, as if a snapshot were + * to happen, this would now be the main buffer. + */ + if (iter->snapshot) + iter->array_buffer = &iter->tr->max_buffer; +#endif + return ret; } #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -1932,6 +2108,12 @@ static int run_tracer_selftest(struct tracer *type) if (!selftests_can_run) return save_selftest(type); + if (!tracing_is_on()) { + pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", + type->name); + return 0; + } + /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current @@ -1946,7 +2128,7 @@ static int run_tracer_selftest(struct tracer *type) #ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) { /* If we expanded the buffers, make sure the max is expanded too */ - if (ring_buffer_expanded) + if (tr->ring_buffer_expanded) ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, RING_BUFFER_ALL_CPUS); tr->allocated_snapshot = true; @@ -1972,7 +2154,7 @@ static int run_tracer_selftest(struct tracer *type) tr->allocated_snapshot = false; /* Shrink the max buffer again */ - if (ring_buffer_expanded) + if (tr->ring_buffer_expanded) ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); } @@ -1982,6 +2164,24 @@ static int run_tracer_selftest(struct tracer *type) return 0; } +static int do_run_tracer_selftest(struct tracer *type) +{ + int ret; + + /* + * Tests can take a long time, especially if they are run one after the + * other, as does happen during bootup when all the tracers are + * registered. This could cause the soft lockup watchdog to trigger. + */ + cond_resched(); + + tracing_selftest_running = true; + ret = run_tracer_selftest(type); + tracing_selftest_running = false; + + return ret; +} + static __init int init_trace_selftests(void) { struct trace_selftests *p, *n; @@ -1990,10 +2190,10 @@ static __init int init_trace_selftests(void) selftests_can_run = true; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); if (list_empty(&postponed_selftests)) - goto out; + return 0; pr_info("Running postponed tracer tests:\n"); @@ -2022,14 +2222,11 @@ static __init int init_trace_selftests(void) } tracing_selftest_running = false; - out: - mutex_unlock(&trace_types_lock); - return 0; } core_initcall(init_trace_selftests); #else -static inline int run_tracer_selftest(struct tracer *type) +static inline int do_run_tracer_selftest(struct tracer *type) { return 0; } @@ -2068,8 +2265,6 @@ int __init register_tracer(struct tracer *type) mutex_lock(&trace_types_lock); - tracing_selftest_running = true; - for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ @@ -2098,7 +2293,7 @@ int __init register_tracer(struct tracer *type) /* store the tracer for __set_tracer_option */ type->flags->trace = type; - ret = run_tracer_selftest(type); + ret = do_run_tracer_selftest(type); if (ret < 0) goto out; @@ -2107,7 +2302,6 @@ int __init register_tracer(struct tracer *type) add_tracer_options(&global_trace, type); out: - tracing_selftest_running = false; mutex_unlock(&trace_types_lock); if (ret || !default_bootup_tracer) @@ -2165,11 +2359,32 @@ void tracing_reset_online_cpus(struct array_buffer *buf) ring_buffer_record_enable(buffer); } +static void tracing_reset_all_cpus(struct array_buffer *buf) +{ + struct trace_buffer *buffer = buf->buffer; + + if (!buffer) + return; + + ring_buffer_record_disable(buffer); + + /* Make sure all commits have finished */ + synchronize_rcu(); + + buf->time_start = buffer_ftrace_now(buf, buf->cpu); + + ring_buffer_reset(buffer); + + ring_buffer_record_enable(buffer); +} + /* Must have trace_types_lock held */ -void tracing_reset_all_online_cpus(void) +void tracing_reset_all_online_cpus_unlocked(void) { struct trace_array *tr; + lockdep_assert_held(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->clear_trace) continue; @@ -2181,74 +2396,11 @@ void tracing_reset_all_online_cpus(void) } } -static int *tgid_map; - -#define SAVED_CMDLINES_DEFAULT 128 -#define NO_CMDLINE_MAP UINT_MAX -static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; -struct saved_cmdlines_buffer { - unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; - unsigned *map_cmdline_to_pid; - unsigned cmdline_num; - int cmdline_idx; - char *saved_cmdlines; -}; -static struct saved_cmdlines_buffer *savedcmd; - -/* temporary disable recording */ -static atomic_t trace_record_taskinfo_disabled __read_mostly; - -static inline char *get_saved_cmdlines(int idx) -{ - return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN]; -} - -static inline void set_cmdline(int idx, const char *cmdline) -{ - strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); -} - -static int allocate_cmdlines_buffer(unsigned int val, - struct saved_cmdlines_buffer *s) -{ - s->map_cmdline_to_pid = kmalloc_array(val, - sizeof(*s->map_cmdline_to_pid), - GFP_KERNEL); - if (!s->map_cmdline_to_pid) - return -ENOMEM; - - s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); - if (!s->saved_cmdlines) { - kfree(s->map_cmdline_to_pid); - return -ENOMEM; - } - - s->cmdline_idx = 0; - s->cmdline_num = val; - memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, - sizeof(s->map_pid_to_cmdline)); - memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, - val * sizeof(*s->map_cmdline_to_pid)); - - return 0; -} - -static int trace_create_savedcmd(void) +void tracing_reset_all_online_cpus(void) { - int ret; - - savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); - if (!savedcmd) - return -ENOMEM; - - ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); - if (ret < 0) { - kfree(savedcmd); - savedcmd = NULL; - return -ENOMEM; - } - - return 0; + mutex_lock(&trace_types_lock); + tracing_reset_all_online_cpus_unlocked(); + mutex_unlock(&trace_types_lock); } int is_tracing_stopped(void) @@ -2256,13 +2408,7 @@ int is_tracing_stopped(void) return global_trace.stop_count; } -/** - * tracing_start - quick start of the tracer - * - * If tracing is enabled but was stopped by tracing_stop, - * this will start the tracer back up. - */ -void tracing_start(void) +static void tracing_start_tr(struct trace_array *tr) { struct trace_buffer *buffer; unsigned long flags; @@ -2270,306 +2416,83 @@ void tracing_start(void) if (tracing_disabled) return; - raw_spin_lock_irqsave(&global_trace.start_lock, flags); - if (--global_trace.stop_count) { - if (global_trace.stop_count < 0) { + raw_spin_lock_irqsave(&tr->start_lock, flags); + if (--tr->stop_count) { + if (WARN_ON_ONCE(tr->stop_count < 0)) { /* Someone screwed up their debugging */ - WARN_ON_ONCE(1); - global_trace.stop_count = 0; + tr->stop_count = 0; } goto out; } /* Prevent the buffers from switching */ - arch_spin_lock(&global_trace.max_lock); + arch_spin_lock(&tr->max_lock); - buffer = global_trace.array_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE - buffer = global_trace.max_buffer.buffer; + buffer = tr->max_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #endif - arch_spin_unlock(&global_trace.max_lock); - - out: - raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); -} - -static void tracing_start_tr(struct trace_array *tr) -{ - struct trace_buffer *buffer; - unsigned long flags; - - if (tracing_disabled) - return; - - /* If global, we need to also start the max tracer */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) - return tracing_start(); - - raw_spin_lock_irqsave(&tr->start_lock, flags); - - if (--tr->stop_count) { - if (tr->stop_count < 0) { - /* Someone screwed up their debugging */ - WARN_ON_ONCE(1); - tr->stop_count = 0; - } - goto out; - } - - buffer = tr->array_buffer.buffer; - if (buffer) - ring_buffer_record_enable(buffer); + arch_spin_unlock(&tr->max_lock); out: raw_spin_unlock_irqrestore(&tr->start_lock, flags); } /** - * tracing_stop - quick stop of the tracer + * tracing_start - quick start of the tracer * - * Light weight way to stop tracing. Use in conjunction with - * tracing_start. + * If tracing is enabled but was stopped by tracing_stop, + * this will start the tracer back up. */ -void tracing_stop(void) +void tracing_start(void) + +{ + return tracing_start_tr(&global_trace); +} + +static void tracing_stop_tr(struct trace_array *tr) { struct trace_buffer *buffer; unsigned long flags; - raw_spin_lock_irqsave(&global_trace.start_lock, flags); - if (global_trace.stop_count++) + raw_spin_lock_irqsave(&tr->start_lock, flags); + if (tr->stop_count++) goto out; /* Prevent the buffers from switching */ - arch_spin_lock(&global_trace.max_lock); + arch_spin_lock(&tr->max_lock); - buffer = global_trace.array_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE - buffer = global_trace.max_buffer.buffer; + buffer = tr->max_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #endif - arch_spin_unlock(&global_trace.max_lock); - - out: - raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); -} - -static void tracing_stop_tr(struct trace_array *tr) -{ - struct trace_buffer *buffer; - unsigned long flags; - - /* If global, we need to also stop the max tracer */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) - return tracing_stop(); - - raw_spin_lock_irqsave(&tr->start_lock, flags); - if (tr->stop_count++) - goto out; - - buffer = tr->array_buffer.buffer; - if (buffer) - ring_buffer_record_disable(buffer); + arch_spin_unlock(&tr->max_lock); out: raw_spin_unlock_irqrestore(&tr->start_lock, flags); } -static int trace_save_cmdline(struct task_struct *tsk) -{ - unsigned pid, idx; - - /* treat recording of idle task as a success */ - if (!tsk->pid) - return 1; - - if (unlikely(tsk->pid > PID_MAX_DEFAULT)) - return 0; - - /* - * It's not the end of the world if we don't get - * the lock, but we also don't want to spin - * nor do we want to disable interrupts, - * so if we miss here, then better luck next time. - */ - if (!arch_spin_trylock(&trace_cmdline_lock)) - return 0; - - idx = savedcmd->map_pid_to_cmdline[tsk->pid]; - if (idx == NO_CMDLINE_MAP) { - idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num; - - /* - * Check whether the cmdline buffer at idx has a pid - * mapped. We are going to overwrite that entry so we - * need to clear the map_pid_to_cmdline. Otherwise we - * would read the new comm for the old pid. - */ - pid = savedcmd->map_cmdline_to_pid[idx]; - if (pid != NO_CMDLINE_MAP) - savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; - - savedcmd->map_cmdline_to_pid[idx] = tsk->pid; - savedcmd->map_pid_to_cmdline[tsk->pid] = idx; - - savedcmd->cmdline_idx = idx; - } - - set_cmdline(idx, tsk->comm); - - arch_spin_unlock(&trace_cmdline_lock); - - return 1; -} - -static void __trace_find_cmdline(int pid, char comm[]) -{ - unsigned map; - - if (!pid) { - strcpy(comm, "<idle>"); - return; - } - - if (WARN_ON_ONCE(pid < 0)) { - strcpy(comm, "<XXX>"); - return; - } - - if (pid > PID_MAX_DEFAULT) { - strcpy(comm, "<...>"); - return; - } - - map = savedcmd->map_pid_to_cmdline[pid]; - if (map != NO_CMDLINE_MAP) - strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); - else - strcpy(comm, "<...>"); -} - -void trace_find_cmdline(int pid, char comm[]) -{ - preempt_disable(); - arch_spin_lock(&trace_cmdline_lock); - - __trace_find_cmdline(pid, comm); - - arch_spin_unlock(&trace_cmdline_lock); - preempt_enable(); -} - -int trace_find_tgid(int pid) -{ - if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT)) - return 0; - - return tgid_map[pid]; -} - -static int trace_save_tgid(struct task_struct *tsk) -{ - /* treat recording of idle task as a success */ - if (!tsk->pid) - return 1; - - if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT)) - return 0; - - tgid_map[tsk->pid] = tsk->tgid; - return 1; -} - -static bool tracing_record_taskinfo_skip(int flags) -{ - if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID)))) - return true; - if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on()) - return true; - if (!__this_cpu_read(trace_taskinfo_save)) - return true; - return false; -} - -/** - * tracing_record_taskinfo - record the task info of a task - * - * @task: task to record - * @flags: TRACE_RECORD_CMDLINE for recording comm - * TRACE_RECORD_TGID for recording tgid - */ -void tracing_record_taskinfo(struct task_struct *task, int flags) -{ - bool done; - - if (tracing_record_taskinfo_skip(flags)) - return; - - /* - * Record as much task information as possible. If some fail, continue - * to try to record the others. - */ - done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task); - done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task); - - /* If recording any information failed, retry again soon. */ - if (!done) - return; - - __this_cpu_write(trace_taskinfo_save, false); -} - /** - * tracing_record_taskinfo_sched_switch - record task info for sched_switch + * tracing_stop - quick stop of the tracer * - * @prev: previous task during sched_switch - * @next: next task during sched_switch - * @flags: TRACE_RECORD_CMDLINE for recording comm - * TRACE_RECORD_TGID for recording tgid + * Light weight way to stop tracing. Use in conjunction with + * tracing_start. */ -void tracing_record_taskinfo_sched_switch(struct task_struct *prev, - struct task_struct *next, int flags) -{ - bool done; - - if (tracing_record_taskinfo_skip(flags)) - return; - - /* - * Record as much task information as possible. If some fail, continue - * to try to record the others. - */ - done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev); - done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next); - done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev); - done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next); - - /* If recording any information failed, retry again soon. */ - if (!done) - return; - - __this_cpu_write(trace_taskinfo_save, false); -} - -/* Helpers to record a specific task information */ -void tracing_record_cmdline(struct task_struct *task) -{ - tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE); -} - -void tracing_record_tgid(struct task_struct *task) +void tracing_stop(void) { - tracing_record_taskinfo(task, TRACE_RECORD_TGID); + return tracing_stop_tr(&global_trace); } /* @@ -2584,36 +2507,48 @@ enum print_line_t trace_handle_return(struct trace_seq *s) } EXPORT_SYMBOL_GPL(trace_handle_return); -void -tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, - unsigned long flags, int pc) +static unsigned short migration_disable_value(void) { - struct task_struct *tsk = current; - - entry->preempt_count = pc & 0xff; - entry->pid = (tsk) ? tsk->pid : 0; - entry->type = type; - entry->flags = -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | +#if defined(CONFIG_SMP) + return current->migration_disabled; #else - TRACE_FLAG_IRQS_NOSUPPORT | + return 0; #endif - ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | - ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | - ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | - (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); } -EXPORT_SYMBOL_GPL(tracing_generic_entry_update); + +unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) +{ + unsigned int trace_flags = irqs_status; + unsigned int pc; + + pc = preempt_count(); + + if (pc & NMI_MASK) + trace_flags |= TRACE_FLAG_NMI; + if (pc & HARDIRQ_MASK) + trace_flags |= TRACE_FLAG_HARDIRQ; + if (in_serving_softirq()) + trace_flags |= TRACE_FLAG_SOFTIRQ; + if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) + trace_flags |= TRACE_FLAG_BH_OFF; + + if (tif_need_resched()) + trace_flags |= TRACE_FLAG_NEED_RESCHED; + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; + if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) + trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; + return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | + (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; +} struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { - return __trace_buffer_lock_reserve(buffer, type, len, flags, pc); + return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); } DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -2648,8 +2583,11 @@ void trace_buffered_event_enable(void) for_each_tracing_cpu(cpu) { page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, 0); - if (!page) - goto failed; + /* This is just an optimization and can handle failures */ + if (!page) { + pr_err("Failed to allocate event buffer\n"); + break; + } event = page_address(page); memset(event, 0, sizeof(*event)); @@ -2663,10 +2601,6 @@ void trace_buffered_event_enable(void) WARN_ON_ONCE(1); preempt_enable(); } - - return; - failed: - trace_buffered_event_disable(); } static void enable_trace_buffered_event(void *data) @@ -2701,11 +2635,9 @@ void trace_buffered_event_disable(void) if (--trace_buffered_event_ref) return; - preempt_disable(); /* For each CPU, set the buffer as used. */ - smp_call_function_many(tracing_buffer_mask, - disable_trace_buffered_event, NULL, 1); - preempt_enable(); + on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, + NULL, true); /* Wait for all current users to finish */ synchronize_rcu(); @@ -2714,17 +2646,19 @@ void trace_buffered_event_disable(void) free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); per_cpu(trace_buffered_event, cpu) = NULL; } + /* - * Make sure trace_buffered_event is NULL before clearing - * trace_buffered_event_cnt. + * Wait for all CPUs that potentially started checking if they can use + * their event buffer only after the previous synchronize_rcu() call and + * they still read a valid pointer from trace_buffered_event. It must be + * ensured they don't see cleared trace_buffered_event_cnt else they + * could wrongly decide to use the pointed-to buffer which is now freed. */ - smp_wmb(); + synchronize_rcu(); - preempt_disable(); - /* Do the work on each cpu */ - smp_call_function_many(tracing_buffer_mask, - enable_trace_buffered_event, NULL, 1); - preempt_enable(); + /* For each CPU, relinquish the buffer */ + on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, + true); } static struct trace_buffer *temp_buffer; @@ -2733,28 +2667,70 @@ struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, struct trace_event_file *trace_file, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *entry; + struct trace_array *tr = trace_file->tr; int val; - *current_rb = trace_file->tr->array_buffer.buffer; - - if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags & - (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && - (entry = this_cpu_read(trace_buffered_event))) { - /* Try to use the per cpu buffer first */ - val = this_cpu_inc_return(trace_buffered_event_cnt); - if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) { - trace_event_setup(entry, type, flags, pc); - entry->array[0] = len; - return entry; + *current_rb = tr->array_buffer.buffer; + + if (!tr->no_filter_buffering_ref && + (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { + preempt_disable_notrace(); + /* + * Filtering is on, so try to use the per cpu buffer first. + * This buffer will simulate a ring_buffer_event, + * where the type_len is zero and the array[0] will + * hold the full length. + * (see include/linux/ring-buffer.h for details on + * how the ring_buffer_event is structured). + * + * Using a temp buffer during filtering and copying it + * on a matched filter is quicker than writing directly + * into the ring buffer and then discarding it when + * it doesn't match. That is because the discard + * requires several atomic operations to get right. + * Copying on match and doing nothing on a failed match + * is still quicker than no copy on match, but having + * to discard out of the ring buffer on a failed match. + */ + if ((entry = __this_cpu_read(trace_buffered_event))) { + int max_len = PAGE_SIZE - struct_size(entry, array, 1); + + val = this_cpu_inc_return(trace_buffered_event_cnt); + + /* + * Preemption is disabled, but interrupts and NMIs + * can still come in now. If that happens after + * the above increment, then it will have to go + * back to the old method of allocating the event + * on the ring buffer, and if the filter fails, it + * will have to call ring_buffer_discard_commit() + * to remove it. + * + * Need to also check the unlikely case that the + * length is bigger than the temp buffer size. + * If that happens, then the reserve is pretty much + * guaranteed to fail, as the ring buffer currently + * only allows events less than a page. But that may + * change in the future, so let the ring buffer reserve + * handle the failure in that case. + */ + if (val == 1 && likely(len <= max_len)) { + trace_event_setup(entry, type, trace_ctx); + entry->array[0] = len; + /* Return with preemption disabled */ + return entry; + } + this_cpu_dec(trace_buffered_event_cnt); } - this_cpu_dec(trace_buffered_event_cnt); + /* __trace_buffer_lock_reserve() disables preemption */ + preempt_enable_notrace(); } - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); /* * If tracing is off, but we have triggers enabled * we still need to look at the event data. Use the temp_buffer @@ -2763,14 +2739,14 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, */ if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { *current_rb = temp_buffer; - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); } return entry; } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); -static DEFINE_SPINLOCK(tracepoint_iter_lock); +static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); static DEFINE_MUTEX(tracepoint_printk_mutex); static void output_printk(struct trace_event_buffer *fbuffer) @@ -2798,24 +2774,24 @@ static void output_printk(struct trace_event_buffer *fbuffer) event = &fbuffer->trace_file->event_call->event; - spin_lock_irqsave(&tracepoint_iter_lock, flags); + raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); trace_seq_init(&iter->seq); iter->ent = fbuffer->entry; event_call->event.funcs->trace(iter, 0, event); trace_seq_putc(&iter->seq, 0); printk("%s", iter->seq.buffer); - spin_unlock_irqrestore(&tracepoint_iter_lock, flags); + raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); } -int tracepoint_printk_sysctl(struct ctl_table *table, int write, +int tracepoint_printk_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int save_tracepoint_printk; int ret; - mutex_lock(&tracepoint_printk_mutex); + guard(mutex)(&tracepoint_printk_mutex); save_tracepoint_printk = tracepoint_printk; ret = proc_dointvec(table, write, buffer, lenp, ppos); @@ -2828,29 +2804,38 @@ int tracepoint_printk_sysctl(struct ctl_table *table, int write, tracepoint_printk = 0; if (save_tracepoint_printk == tracepoint_printk) - goto out; + return ret; if (tracepoint_printk) static_key_enable(&tracepoint_printk_key.key); else static_key_disable(&tracepoint_printk_key.key); - out: - mutex_unlock(&tracepoint_printk_mutex); - return ret; } void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) { + enum event_trigger_type tt = ETT_NONE; + struct trace_event_file *file = fbuffer->trace_file; + + if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, + fbuffer->entry, &tt)) + goto discard; + if (static_key_false(&tracepoint_printk_key.key)) output_printk(fbuffer); if (static_branch_unlikely(&trace_event_exports_enabled)) ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); - event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, - fbuffer->event, fbuffer->entry, - fbuffer->flags, fbuffer->pc, fbuffer->regs); + + trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, + fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); + +discard: + if (tt) + event_triggers_post_call(file, tt); + } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); @@ -2866,7 +2851,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_commit); void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, + unsigned int trace_ctx, struct pt_regs *regs) { __buffer_unlock_commit(buffer, event); @@ -2877,8 +2862,8 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * and mmiotrace, but that's ok if they lose a function or * two. They are not that meaningful. */ - ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); - ftrace_trace_userstack(tr, buffer, flags, pc); + ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); + ftrace_trace_userstack(tr, buffer, trace_ctx); } /* @@ -2892,28 +2877,24 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, } void -trace_function(struct trace_array *tr, - unsigned long ip, unsigned long parent_ip, unsigned long flags, - int pc) +trace_function(struct trace_array *tr, unsigned long ip, unsigned long + parent_ip, unsigned int trace_ctx) { - struct trace_event_call *call = &event_function; struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), - flags, pc); + trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); entry->ip = ip; entry->parent_ip = parent_ip; - if (!call_filter_check_discard(call, entry, buffer, event)) { - if (static_branch_unlikely(&trace_function_exports_enabled)) - ftrace_exports(event, TRACE_EXPORT_FUNCTION); - __buffer_unlock_commit(buffer, event); - } + if (static_branch_unlikely(&trace_function_exports_enabled)) + ftrace_exports(event, TRACE_EXPORT_FUNCTION); + __buffer_unlock_commit(buffer, event); } #ifdef CONFIG_STACKTRACE @@ -2921,7 +2902,7 @@ trace_function(struct trace_array *tr, /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ #define FTRACE_KSTACK_NESTING 4 -#define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING) +#define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING) struct ftrace_stack { unsigned long calls[FTRACE_KSTACK_ENTRIES]; @@ -2935,11 +2916,11 @@ struct ftrace_stacks { static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); -static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) +static void __ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { - struct trace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; unsigned int size, nr_entries; struct ftrace_stack *fstack; @@ -2982,18 +2963,32 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, nr_entries = stack_trace_save(fstack->calls, size, skip); } - size = nr_entries * sizeof(unsigned long); +#ifdef CONFIG_DYNAMIC_FTRACE + /* Mark entry of stack trace as trampoline code */ + if (tr->ops && tr->ops->trampoline) { + unsigned long tramp_start = tr->ops->trampoline; + unsigned long tramp_end = tramp_start + tr->ops->trampoline_size; + unsigned long *calls = fstack->calls; + + for (int i = 0; i < nr_entries; i++) { + if (calls[i] >= tramp_start && calls[i] < tramp_end) + calls[i] = FTRACE_TRAMPOLINE_MARKER; + } + } +#endif + event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, - sizeof(*entry) + size, flags, pc); + struct_size(entry, caller, nr_entries), + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); - memcpy(&entry->caller, fstack->calls, size); entry->size = nr_entries; + memcpy(&entry->caller, fstack->calls, + flex_array_size(entry, caller, nr_entries)); - if (!call_filter_check_discard(call, entry, buffer, event)) - __buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); out: /* Again, don't let gcc optimize things here */ @@ -3005,37 +3000,40 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) return; - __ftrace_trace_stack(buffer, flags, skip, pc, regs); + __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs); } -void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc) +void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, + int skip) { struct trace_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); return; } + if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) + return; + /* - * When an NMI triggers, RCU is enabled via rcu_nmi_enter(), + * When an NMI triggers, RCU is enabled via ct_nmi_enter(), * but if the above rcu_is_watching() failed, then the NMI - * triggered someplace critical, and rcu_irq_enter() should + * triggered someplace critical, and ct_irq_enter() should * not be called from NMI. */ if (unlikely(in_nmi())) return; - rcu_irq_enter_irqson(); - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); - rcu_irq_exit_irqson(); + ct_irq_enter_irqson(); + __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); + ct_irq_exit_irqson(); } /** @@ -3044,19 +3042,15 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, */ void trace_dump_stack(int skip) { - unsigned long flags; - if (tracing_disabled || tracing_selftest_running) return; - local_save_flags(flags); - #ifndef CONFIG_UNWINDER_ORC /* Skip 1 to skip this function. */ skip++; #endif - __ftrace_trace_stack(global_trace.array_buffer.buffer, - flags, skip, preempt_count(), NULL); + __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer, + tracing_gen_ctx(), skip, NULL); } EXPORT_SYMBOL_GPL(trace_dump_stack); @@ -3065,9 +3059,8 @@ static DEFINE_PER_CPU(int, user_stack_count); static void ftrace_trace_userstack(struct trace_array *tr, - struct trace_buffer *buffer, unsigned long flags, int pc) + struct trace_buffer *buffer, unsigned int trace_ctx) { - struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; struct userstack_entry *entry; @@ -3092,7 +3085,7 @@ ftrace_trace_userstack(struct trace_array *tr, __this_cpu_inc(user_stack_count); event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) goto out_drop_count; entry = ring_buffer_event_data(event); @@ -3101,8 +3094,7 @@ ftrace_trace_userstack(struct trace_array *tr, memset(&entry->caller, 0, sizeof(entry->caller)); stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); - if (!call_filter_check_discard(call, entry, buffer, event)) - __buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); out_drop_count: __this_cpu_dec(user_stack_count); @@ -3112,20 +3104,54 @@ ftrace_trace_userstack(struct trace_array *tr, #else /* CONFIG_USER_STACKTRACE_SUPPORT */ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc) + unsigned int trace_ctx) { } #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ #endif /* CONFIG_STACKTRACE */ +static inline void +func_repeats_set_delta_ts(struct func_repeats_entry *entry, + unsigned long long delta) +{ + entry->bottom_delta_ts = delta & U32_MAX; + entry->top_delta_ts = (delta >> 32); +} + +void trace_last_func_repeats(struct trace_array *tr, + struct trace_func_repeats *last_info, + unsigned int trace_ctx) +{ + struct trace_buffer *buffer = tr->array_buffer.buffer; + struct func_repeats_entry *entry; + struct ring_buffer_event *event; + u64 delta; + + event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, + sizeof(*entry), trace_ctx); + if (!event) + return; + + delta = ring_buffer_event_time_stamp(buffer, event) - + last_info->ts_last_call; + + entry = ring_buffer_event_data(event); + entry->ip = last_info->ip; + entry->parent_ip = last_info->parent_ip; + entry->count = last_info->count; + func_repeats_set_delta_ts(entry, delta); + + __buffer_unlock_commit(buffer, event); +} + /* created for use with alloc_percpu */ struct trace_buffer_struct { int nesting; char buffer[4][TRACE_BUF_SIZE]; }; -static struct trace_buffer_struct *trace_percpu_buffer; +static struct trace_buffer_struct __percpu *trace_percpu_buffer; /* * This allows for lockless recording. If we're nested too deeply, then @@ -3135,7 +3161,7 @@ static char *get_trace_buf(void) { struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); - if (!buffer || buffer->nesting >= 4) + if (!trace_percpu_buffer || buffer->nesting >= 4) return NULL; buffer->nesting++; @@ -3154,7 +3180,7 @@ static void put_trace_buf(void) static int alloc_percpu_trace_buffer(void) { - struct trace_buffer_struct *buffers; + struct trace_buffer_struct __percpu *buffers; if (trace_percpu_buffer) return 0; @@ -3195,7 +3221,7 @@ void trace_printk_init_buffers(void) pr_warn("**********************************************************\n"); /* Expand the buffers to set size */ - tracing_update_buffers(); + tracing_update_buffers(&global_trace); buffers_allocated = 1; @@ -3237,14 +3263,16 @@ static void trace_printk_start_stop_comm(int enabled) */ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { - struct trace_event_call *call = &event_bprint; struct ring_buffer_event *event; struct trace_buffer *buffer; - struct trace_array *tr = &global_trace; + struct trace_array *tr = READ_ONCE(printk_trace); struct bprint_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; - int len = 0, size, pc; + int len = 0, size; + + if (!printk_binsafe(tr)) + return trace_vprintk(ip, fmt, args); if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -3252,7 +3280,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); tbuffer = get_trace_buf(); @@ -3266,12 +3294,11 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) goto out_put; - local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3279,10 +3306,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, tbuffer, sizeof(u32) * len); - if (!call_filter_check_discard(call, entry, buffer, event)) { - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); - } + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); out: ring_buffer_nest_end(buffer); @@ -3302,20 +3327,19 @@ static int __trace_array_vprintk(struct trace_buffer *buffer, unsigned long ip, const char *fmt, va_list args) { - struct trace_event_call *call = &event_print; struct ring_buffer_event *event; - int len = 0, size, pc; + int len = 0, size; struct print_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; - if (tracing_disabled || tracing_selftest_running) + if (tracing_disabled) return 0; /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); @@ -3327,21 +3351,18 @@ __trace_array_vprintk(struct trace_buffer *buffer, len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); - local_save_flags(flags); size = sizeof(*entry) + len + 1; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); entry->ip = ip; memcpy(&entry->buf, tbuffer, len + 1); - if (!call_filter_check_discard(call, entry, buffer, event)) { - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL); - } + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); out: ring_buffer_nest_end(buffer); @@ -3358,6 +3379,9 @@ __printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { + if (tracing_selftest_running && tr == &global_trace) + return 0; + return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); } @@ -3373,7 +3397,7 @@ int trace_array_vprintk(struct trace_array *tr, * buffer (use trace_printk() for that), as writing into the top level * buffer should only have events that can be individually disabled. * trace_printk() is only used for debugging a kernel, and should not - * be ever encorporated in normal use. + * be ever incorporated in normal use. * * trace_array_printk() can be used, as it will not add noise to the * top level tracing buffer. @@ -3433,7 +3457,7 @@ int trace_array_printk_buf(struct trace_buffer *buffer, int ret; va_list ap; - if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) + if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK)) return 0; va_start(ap, fmt); @@ -3445,7 +3469,7 @@ int trace_array_printk_buf(struct trace_buffer *buffer, __printf(2, 0) int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { - return trace_array_vprintk(&global_trace, ip, fmt, args); + return trace_array_vprintk(printk_trace, ip, fmt, args); } EXPORT_SYMBOL_GPL(trace_vprintk); @@ -3543,6 +3567,198 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, return next; } +#define STATIC_FMT_BUF_SIZE 128 +static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; + +char *trace_iter_expand_format(struct trace_iterator *iter) +{ + char *tmp; + + /* + * iter->tr is NULL when used with tp_printk, which makes + * this get called where it is not safe to call krealloc(). + */ + if (!iter->tr || iter->fmt == static_fmt_buf) + return NULL; + + tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, + GFP_KERNEL); + if (tmp) { + iter->fmt_size += STATIC_FMT_BUF_SIZE; + iter->fmt = tmp; + } + + return tmp; +} + +/* Returns true if the string is safe to dereference from an event */ +static bool trace_safe_str(struct trace_iterator *iter, const char *str) +{ + unsigned long addr = (unsigned long)str; + struct trace_event *trace_event; + struct trace_event_call *event; + + /* OK if part of the event data */ + if ((addr >= (unsigned long)iter->ent) && + (addr < (unsigned long)iter->ent + iter->ent_size)) + return true; + + /* OK if part of the temp seq buffer */ + if ((addr >= (unsigned long)iter->tmp_seq.buffer) && + (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) + return true; + + /* Core rodata can not be freed */ + if (is_kernel_rodata(addr)) + return true; + + if (trace_is_tracepoint_string(str)) + return true; + + /* + * Now this could be a module event, referencing core module + * data, which is OK. + */ + if (!iter->ent) + return false; + + trace_event = ftrace_find_event(iter->ent->type); + if (!trace_event) + return false; + + event = container_of(trace_event, struct trace_event_call, event); + if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) + return false; + + /* Would rather have rodata, but this will suffice */ + if (within_module_core(addr, event->module)) + return true; + + return false; +} + +/** + * ignore_event - Check dereferenced fields while writing to the seq buffer + * @iter: The iterator that holds the seq buffer and the event being printed + * + * At boot up, test_event_printk() will flag any event that dereferences + * a string with "%s" that does exist in the ring buffer. It may still + * be valid, as the string may point to a static string in the kernel + * rodata that never gets freed. But if the string pointer is pointing + * to something that was allocated, there's a chance that it can be freed + * by the time the user reads the trace. This would cause a bad memory + * access by the kernel and possibly crash the system. + * + * This function will check if the event has any fields flagged as needing + * to be checked at runtime and perform those checks. + * + * If it is found that a field is unsafe, it will write into the @iter->seq + * a message stating what was found to be unsafe. + * + * @return: true if the event is unsafe and should be ignored, + * false otherwise. + */ +bool ignore_event(struct trace_iterator *iter) +{ + struct ftrace_event_field *field; + struct trace_event *trace_event; + struct trace_event_call *event; + struct list_head *head; + struct trace_seq *seq; + const void *ptr; + + trace_event = ftrace_find_event(iter->ent->type); + + seq = &iter->seq; + + if (!trace_event) { + trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); + return true; + } + + event = container_of(trace_event, struct trace_event_call, event); + if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) + return false; + + head = trace_get_fields(event); + if (!head) { + trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", + trace_event_name(event)); + return true; + } + + /* Offsets are from the iter->ent that points to the raw event */ + ptr = iter->ent; + + list_for_each_entry(field, head, link) { + const char *str; + bool good; + + if (!field->needs_test) + continue; + + str = *(const char **)(ptr + field->offset); + + good = trace_safe_str(iter, str); + + /* + * If you hit this warning, it is likely that the + * trace event in question used %s on a string that + * was saved at the time of the event, but may not be + * around when the trace is read. Use __string(), + * __assign_str() and __get_str() helpers in the TRACE_EVENT() + * instead. See samples/trace_events/trace-events-sample.h + * for reference. + */ + if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", + trace_event_name(event), field->name)) { + trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", + trace_event_name(event), field->name); + return true; + } + } + return false; +} + +const char *trace_event_format(struct trace_iterator *iter, const char *fmt) +{ + const char *p, *new_fmt; + char *q; + + if (WARN_ON_ONCE(!fmt)) + return fmt; + + if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR) + return fmt; + + p = fmt; + new_fmt = q = iter->fmt; + while (*p) { + if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { + if (!trace_iter_expand_format(iter)) + return fmt; + + q += iter->fmt - new_fmt; + new_fmt = iter->fmt; + } + + *q++ = *p++; + + /* Replace %p with %px */ + if (p[-1] == '%') { + if (p[0] == '%') { + *q++ = *p++; + } else if (p[0] == 'p' && !isalnum(p[1])) { + *q++ = *p++; + *q++ = 'x'; + } + } + } + *q = '\0'; + + return new_fmt; +} + #define STATIC_TEMP_BUF_SIZE 128 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); @@ -3661,6 +3877,8 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) break; entries++; ring_buffer_iter_advance(buf_iter); + /* This could be a big loop */ + cond_resched(); } per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; @@ -3679,15 +3897,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) loff_t l = 0; int cpu; - /* - * copy the tracer to avoid using a global lock all around. - * iter->trace is a copy of current_trace, the pointer to the - * name may be used instead of a strcmp(), as iter->trace->name - * will point to the same string as current_trace->name. - */ mutex_lock(&trace_types_lock); - if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) - *iter->trace = *tr->current_trace; + if (unlikely(tr->current_trace != iter->trace)) { + /* Close iter->trace before switching to the new current tracer */ + if (iter->trace->close) + iter->trace->close(iter); + iter->trace = tr->current_trace; + /* Reopen the new current tracer */ + if (iter->trace->open) + iter->trace->open(iter); + } mutex_unlock(&trace_types_lock); #ifdef CONFIG_TRACER_MAX_TRACE @@ -3695,9 +3914,6 @@ static void *s_start(struct seq_file *m, loff_t *pos) return ERR_PTR(-EBUSY); #endif - if (!iter->snapshot) - atomic_inc(&trace_record_taskinfo_disabled); - if (*pos != iter->pos) { iter->ent = NULL; iter->cpu = 0; @@ -3740,9 +3956,6 @@ static void s_stop(struct seq_file *m, void *p) return; #endif - if (!iter->snapshot) - atomic_dec(&trace_record_taskinfo_disabled); - trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); } @@ -3813,13 +4026,14 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off \n" + "# / _-----=> irqs-off/BH-disabled\n" "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" - "# |||| / delay \n" - "# cmd pid ||||| time | caller \n" - "# \\ / ||||| \\ | / \n"); + "# |||| / _-=> migrate-disable \n" + "# ||||| / delay \n" + "# cmd pid |||||| time | caller \n" + "# \\ / |||||| \\ | / \n"); } static void print_event_info(struct array_buffer *buf, struct seq_file *m) @@ -3848,18 +4062,19 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - const char *space = " "; + static const char space[] = " "; int prec = tgid ? 12 : 2; print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); - seq_printf(m, "# %.*s||| / delay\n", prec, space); - seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); - seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); + seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); + seq_printf(m, "# %.*s|||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); } void @@ -3871,14 +4086,12 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) struct tracer *type = iter->trace; unsigned long entries; unsigned long total; - const char *name = "preemption"; - - name = type->name; + const char *name = type->name; get_total_entries(buf, &total, &entries); seq_printf(m, "# %s latency trace v1.1.5 on %s\n", - name, UTS_RELEASE); + name, init_utsname()->release); seq_puts(m, "# -----------------------------------" "---------------------------------\n"); seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" @@ -3887,17 +4100,12 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) entries, total, buf->cpu, -#if defined(CONFIG_PREEMPT_NONE) - "server", -#elif defined(CONFIG_PREEMPT_VOLUNTARY) - "desktop", -#elif defined(CONFIG_PREEMPT) - "preempt", -#elif defined(CONFIG_PREEMPT_RT) - "preempt_rt", -#else + preempt_model_none() ? "server" : + preempt_model_voluntary() ? "desktop" : + preempt_model_full() ? "preempt" : + preempt_model_lazy() ? "lazy" : + preempt_model_rt() ? "preempt_rt" : "unknown", -#endif /* These are reserved for later use */ 0, 0, 0, 0); #ifdef CONFIG_SMP @@ -3977,8 +4185,20 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; - if (event) + if (event) { + if (tr->trace_flags & TRACE_ITER_FIELDS) + return print_event_fields(iter, event); + /* + * For TRACE_EVENT() events, the print_fmt is not + * safe to use if the array has delta offsets + * Force printing via the fields. + */ + if ((tr->text_delta || tr->data_delta) && + event->type > __TRACE_LAST_TYPE) + return print_event_fields(iter, event); + return event->funcs->trace(iter, sym_flags, event); + } trace_seq_printf(s, "Unknown type %d\n", entry->type); @@ -4271,7 +4491,11 @@ static int s_show(struct seq_file *m, void *v) iter->leftover = ret; } else { - print_trace_line(iter); + ret = print_trace_line(iter); + if (ret == TRACE_TYPE_PARTIAL_LINE) { + iter->seq.full = 0; + trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); + } ret = trace_print_seq(m, &iter->seq); /* * If we overflow the seq_file buffer, then it will @@ -4304,6 +4528,25 @@ static const struct seq_operations tracer_seq_ops = { .show = s_show, }; +/* + * Note, as iter itself can be allocated and freed in different + * ways, this function is only used to free its content, and not + * the iterator itself. The only requirement to all the allocations + * is that it must zero all fields (kzalloc), as freeing works with + * ethier allocated content or NULL. + */ +static void free_trace_iter_content(struct trace_iterator *iter) +{ + /* The fmt is either NULL, allocated or points to static_fmt_buf */ + if (iter->fmt != static_fmt_buf) + kfree(iter->fmt); + + kfree(iter->temp); + kfree(iter->buffer_iter); + mutex_destroy(&iter->mutex); + free_cpumask_var(iter->started); +} + static struct trace_iterator * __tracing_open(struct inode *inode, struct file *file, bool snapshot) { @@ -4336,15 +4579,17 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->temp_size = 128; /* - * We make a copy of the current tracer to avoid concurrent - * changes on it while we are reading. + * trace_event_printf() may need to modify given format + * string to replace %p with %px so that it shows real address + * instead of hash value. However, that is only for the event + * tracing, other tracer may not need. Defer the allocation + * until it is needed. */ - mutex_lock(&trace_types_lock); - iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL); - if (!iter->trace) - goto fail; + iter->fmt = NULL; + iter->fmt_size = 0; - *iter->trace = *tr->current_trace; + mutex_lock(&trace_types_lock); + iter->trace = tr->current_trace; if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; @@ -4409,9 +4654,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) fail: mutex_unlock(&trace_types_lock); - kfree(iter->trace); - kfree(iter->temp); - kfree(iter->buffer_iter); + free_trace_iter_content(iter); release: seq_release_private(inode, file); return ERR_PTR(-ENOMEM); @@ -4452,6 +4695,60 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) return 0; } +/* + * The private pointer of the inode is the trace_event_file. + * Update the tr ref count associated to it. + */ +int tracing_open_file_tr(struct inode *inode, struct file *filp) +{ + struct trace_event_file *file = inode->i_private; + int ret; + + ret = tracing_check_open_get_tr(file->tr); + if (ret) + return ret; + + mutex_lock(&event_mutex); + + /* Fail if the file is marked for removal */ + if (file->flags & EVENT_FILE_FL_FREED) { + trace_array_put(file->tr); + ret = -ENODEV; + } else { + event_file_get(file); + } + + mutex_unlock(&event_mutex); + if (ret) + return ret; + + filp->private_data = inode->i_private; + + return 0; +} + +int tracing_release_file_tr(struct inode *inode, struct file *filp) +{ + struct trace_event_file *file = inode->i_private; + + trace_array_put(file->tr); + event_file_put(file); + + return 0; +} + +int tracing_single_release_file_tr(struct inode *inode, struct file *filp) +{ + tracing_release_file_tr(inode, filp); + return single_release(inode, filp); +} + +static int tracing_mark_open(struct inode *inode, struct file *filp) +{ + stream_open(inode, filp); + return tracing_open_generic_tr(inode, filp); +} + static int tracing_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -4484,17 +4781,13 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); - mutex_destroy(&iter->mutex); - free_cpumask_var(iter->started); - kfree(iter->temp); - kfree(iter->trace); - kfree(iter->buffer_iter); + free_trace_iter_content(iter); seq_release_private(inode, file); return 0; } -static int tracing_release_generic_tr(struct inode *inode, struct file *file) +int tracing_release_generic_tr(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -4559,6 +4852,11 @@ static int tracing_open(struct inode *inode, struct file *file) static bool trace_ok_for_array(struct tracer *t, struct trace_array *tr) { +#ifdef CONFIG_TRACER_SNAPSHOT + /* arrays with mapped buffer range do not have snapshots */ + if (tr->range_addr_start && t->use_max_tr) + return false; +#endif return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; } @@ -4651,7 +4949,7 @@ static int show_traces_open(struct inode *inode, struct file *file) return 0; } -static int show_traces_release(struct inode *inode, struct file *file) +static int tracing_seq_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -4681,6 +4979,8 @@ loff_t tracing_lseek(struct file *file, loff_t offset, int whence) static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = copy_splice_read, .write = tracing_write_stub, .llseek = tracing_lseek, .release = tracing_release, @@ -4690,7 +4990,7 @@ static const struct file_operations show_traces_fops = { .open = show_traces_open, .read = seq_read, .llseek = seq_lseek, - .release = show_traces_release, + .release = tracing_seq_release, }; static ssize_t @@ -4740,11 +5040,17 @@ int tracing_set_cpumask(struct trace_array *tr, !cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); +#ifdef CONFIG_TRACER_MAX_TRACE + ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); +#endif } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); +#ifdef CONFIG_TRACER_MAX_TRACE + ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); +#endif } } arch_spin_unlock(&tr->max_lock); @@ -4763,7 +5069,10 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, cpumask_var_t tracing_cpumask_new; int err; - if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) + if (count == 0 || count > KMALLOC_MAX_SIZE) + return -EINVAL; + + if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) return -ENOMEM; err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); @@ -4799,7 +5108,8 @@ static int tracing_trace_options_show(struct seq_file *m, void *v) u32 tracer_flags; int i; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); + tracer_flags = tr->current_trace->flags->val; trace_opts = tr->current_trace->flags->opts; @@ -4816,7 +5126,6 @@ static int tracing_trace_options_show(struct seq_file *m, void *v) else seq_printf(m, "no%s\n", trace_opts[i].name); } - mutex_unlock(&trace_types_lock); return 0; } @@ -4869,7 +5178,8 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) { if ((mask == TRACE_ITER_RECORD_TGID) || - (mask == TRACE_ITER_RECORD_CMD)) + (mask == TRACE_ITER_RECORD_CMD) || + (mask == TRACE_ITER_TRACE_PRINTK)) lockdep_assert_held(&event_mutex); /* do nothing if flag is already set */ @@ -4881,6 +5191,25 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) if (tr->current_trace->flag_changed(tr, mask, !!enabled)) return -EINVAL; + if (mask == TRACE_ITER_TRACE_PRINTK) { + if (enabled) { + update_printk_trace(tr); + } else { + /* + * The global_trace cannot clear this. + * It's flag only gets cleared if another instance sets it. + */ + if (printk_trace == &global_trace) + return -EINVAL; + /* + * An instance must always have it set. + * by default, that's the global_trace instane. + */ + if (printk_trace == tr) + update_printk_trace(&global_trace); + } + } + if (enabled) tr->trace_flags |= mask; else @@ -4890,11 +5219,8 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) trace_event_enable_cmd_record(enabled); if (mask == TRACE_ITER_RECORD_TGID) { - if (!tgid_map) - tgid_map = kvcalloc(PID_MAX_DEFAULT + 1, - sizeof(*tgid_map), - GFP_KERNEL); - if (!tgid_map) { + + if (trace_alloc_tgid_map() < 0) { tr->trace_flags &= ~TRACE_ITER_RECORD_TGID; return -ENOMEM; } @@ -5034,6 +5360,10 @@ static const struct file_operations tracing_iter_fops = { static const char readme_msg[] = "tracing mini-HOWTO:\n\n" + "By default tracefs removes all OTH file permission bits.\n" + "When mounting tracefs an optional group id can be specified\n" + "which adds the group to every directory and file in tracefs:\n\n" + "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n" "# echo 0 > tracing_on : quick way to disable tracing\n" "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" " Important files:\n" @@ -5045,7 +5375,7 @@ static const char readme_msg[] = " error_log\t- error log for failed commands (that support it)\n" " buffer_size_kb\t- view and modify size of per cpu buffer\n" " buffer_total_size_kb - view total size of all cpu buffers\n\n" - " trace_clock\t\t-change the clock used to order events\n" + " trace_clock\t\t- change the clock used to order events\n" " local: Per cpu clock but may not be synced across CPUs\n" " global: Synced across CPUs but slows tracing down.\n" " counter: Not a clock, but just an increment\n" @@ -5054,7 +5384,7 @@ static const char readme_msg[] = #ifdef CONFIG_X86_64 " x86-tsc: TSC cycle counter\n" #endif - "\n timestamp_mode\t-view the mode used to timestamp events\n" + "\n timestamp_mode\t- view the mode used to timestamp events\n" " delta: Delta difference against a buffer-wide timestamp\n" " absolute: Absolute (standalone) timestamp\n" "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" @@ -5142,14 +5472,22 @@ static const char readme_msg[] = " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif -#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ + defined(CONFIG_FPROBE_EVENTS) "\t accepts: event-definitions (one definition per line)\n" - "\t Format: p[:[<group>/]<event>] <place> [<args>]\n" - "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n" +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) + "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" + "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" +#endif +#ifdef CONFIG_FPROBE_EVENTS + "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" + "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" +#endif #ifdef CONFIG_HIST_TRIGGERS "\t s:[synthetic/]<event> <field> [<field>]\n" #endif - "\t -:[<group>/]<event>\n" + "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" + "\t -:[<group>/][<event>]\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" @@ -5158,22 +5496,30 @@ static const char readme_msg[] = " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" #endif "\t args: <name>=fetcharg[:type]\n" - "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n" + "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" +#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS + "\t <argname>[->field[->field|.field...]],\n" +#endif #else "\t $stack<index>, $stack, $retval, $comm,\n" #endif "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" - "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" + "\t kernel return probes support: $retval, $arg<N>, $comm\n" + "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" - "\t <type>\\[<array-size>\\]\n" + "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" #ifdef CONFIG_HIST_TRIGGERS "\t field: <stype> <name>;\n" "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" "\t [unsigned] char/int/long\n" #endif + "\t efield: For event probes ('e' types), the field is on of the fields\n" + "\t of the <attached-group>/<attached-event>.\n" #endif + " set_event\t\t- Enables events by name written into it\n" + "\t\t\t Can enable module events via: :mod:<module>\n" " events/\t\t- Directory containing all trace event subsystems:\n" " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" " events/<system>/\t- Directory containing all trace events for <system>:\n" @@ -5220,18 +5566,34 @@ static const char readme_msg[] = #ifdef CONFIG_HIST_TRIGGERS " hist trigger\t- If set, event hits are aggregated into a hash table\n" "\t Format: hist:keys=<field1[,field2,...]>\n" + "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" "\t [:values=<field1[,field2,...]>]\n" "\t [:sort=<field1[,field2,...]>]\n" "\t [:size=#entries]\n" "\t [:pause][:continue][:clear]\n" "\t [:name=histname1]\n" + "\t [:nohitcount]\n" "\t [:<handler>.<action>]\n" "\t [if <filter>]\n\n" + "\t Note, special fields can be used as well:\n" + "\t common_timestamp - to record current timestamp\n" + "\t common_cpu - to record the CPU the event happened on\n" + "\n" + "\t A hist trigger variable can be:\n" + "\t - a reference to a field e.g. x=current_timestamp,\n" + "\t - a reference to another variable e.g. y=$x,\n" + "\t - a numeric literal: e.g. ms_per_sec=1000,\n" + "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" + "\n" + "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" + "\t multiplication(*) and division(/) operators. An operand can be either a\n" + "\t variable reference, field or numeric literal.\n" + "\n" "\t When a matching event is hit, an entry is added to a hash\n" "\t table using the key(s) and value(s) named, and the value of a\n" "\t sum called 'hitcount' is incremented. Keys and values\n" "\t correspond to fields in the event's format description. Keys\n" - "\t can be any field, or the special string 'stacktrace'.\n" + "\t can be any field, or the special string 'common_stacktrace'.\n" "\t Compound keys consisting of up to two fields can be specified\n" "\t by the 'keys' keyword. Values must correspond to numeric\n" "\t fields. Sort keys consisting of up to two fields can be\n" @@ -5256,7 +5618,10 @@ static const char readme_msg[] = "\t .execname display a common_pid as a program name\n" "\t .syscall display a syscall id as a syscall name\n" "\t .log2 display log2 value rather than raw number\n" - "\t .usecs display a common_timestamp in microseconds\n\n" + "\t .buckets=size display values in groups of size rather than raw number\n" + "\t .usecs display a common_timestamp in microseconds\n" + "\t .percent display a number of percentage value\n" + "\t .graph display a bar-graph of a value\n\n" "\t The 'pause' parameter can be used to pause an existing hist\n" "\t trigger or to start a hist trigger but not log any events\n" "\t until told to do so. 'continue' can be used to start or\n" @@ -5264,6 +5629,8 @@ static const char readme_msg[] = "\t The 'clear' parameter will clear the contents of a running\n" "\t hist trigger and leave its current paused/active state\n" "\t unchanged.\n\n" + "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" + "\t raw hitcount in the histogram.\n\n" "\t The enable_hist and disable_hist triggers can be used to\n" "\t have one event conditionally start and stop another event's\n" "\t already-attached hist trigger. The syntax is analogous to\n" @@ -5284,7 +5651,7 @@ static const char readme_msg[] = #ifdef CONFIG_SYNTH_EVENTS " events/synthetic_events\t- Create/append/remove/show synthetic events\n" "\t Write into this file to define/undefine new synthetic events.\n" - "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n" + "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" #endif #endif ; @@ -5303,231 +5670,6 @@ static const struct file_operations tracing_readme_fops = { .llseek = generic_file_llseek, }; -static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos) -{ - int *ptr = v; - - if (*pos || m->count) - ptr++; - - (*pos)++; - - for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) { - if (trace_find_tgid(*ptr)) - return ptr; - } - - return NULL; -} - -static void *saved_tgids_start(struct seq_file *m, loff_t *pos) -{ - void *v; - loff_t l = 0; - - if (!tgid_map) - return NULL; - - v = &tgid_map[0]; - while (l <= *pos) { - v = saved_tgids_next(m, v, &l); - if (!v) - return NULL; - } - - return v; -} - -static void saved_tgids_stop(struct seq_file *m, void *v) -{ -} - -static int saved_tgids_show(struct seq_file *m, void *v) -{ - int pid = (int *)v - tgid_map; - - seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid)); - return 0; -} - -static const struct seq_operations tracing_saved_tgids_seq_ops = { - .start = saved_tgids_start, - .stop = saved_tgids_stop, - .next = saved_tgids_next, - .show = saved_tgids_show, -}; - -static int tracing_saved_tgids_open(struct inode *inode, struct file *filp) -{ - int ret; - - ret = tracing_check_open_get_tr(NULL); - if (ret) - return ret; - - return seq_open(filp, &tracing_saved_tgids_seq_ops); -} - - -static const struct file_operations tracing_saved_tgids_fops = { - .open = tracing_saved_tgids_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos) -{ - unsigned int *ptr = v; - - if (*pos || m->count) - ptr++; - - (*pos)++; - - for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num]; - ptr++) { - if (*ptr == -1 || *ptr == NO_CMDLINE_MAP) - continue; - - return ptr; - } - - return NULL; -} - -static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos) -{ - void *v; - loff_t l = 0; - - preempt_disable(); - arch_spin_lock(&trace_cmdline_lock); - - v = &savedcmd->map_cmdline_to_pid[0]; - while (l <= *pos) { - v = saved_cmdlines_next(m, v, &l); - if (!v) - return NULL; - } - - return v; -} - -static void saved_cmdlines_stop(struct seq_file *m, void *v) -{ - arch_spin_unlock(&trace_cmdline_lock); - preempt_enable(); -} - -static int saved_cmdlines_show(struct seq_file *m, void *v) -{ - char buf[TASK_COMM_LEN]; - unsigned int *pid = v; - - __trace_find_cmdline(*pid, buf); - seq_printf(m, "%d %s\n", *pid, buf); - return 0; -} - -static const struct seq_operations tracing_saved_cmdlines_seq_ops = { - .start = saved_cmdlines_start, - .next = saved_cmdlines_next, - .stop = saved_cmdlines_stop, - .show = saved_cmdlines_show, -}; - -static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp) -{ - int ret; - - ret = tracing_check_open_get_tr(NULL); - if (ret) - return ret; - - return seq_open(filp, &tracing_saved_cmdlines_seq_ops); -} - -static const struct file_operations tracing_saved_cmdlines_fops = { - .open = tracing_saved_cmdlines_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static ssize_t -tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[64]; - int r; - - arch_spin_lock(&trace_cmdline_lock); - r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); - arch_spin_unlock(&trace_cmdline_lock); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) -{ - kfree(s->saved_cmdlines); - kfree(s->map_cmdline_to_pid); - kfree(s); -} - -static int tracing_resize_saved_cmdlines(unsigned int val) -{ - struct saved_cmdlines_buffer *s, *savedcmd_temp; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; - - if (allocate_cmdlines_buffer(val, s) < 0) { - kfree(s); - return -ENOMEM; - } - - arch_spin_lock(&trace_cmdline_lock); - savedcmd_temp = savedcmd; - savedcmd = s; - arch_spin_unlock(&trace_cmdline_lock); - free_saved_cmdlines_buffer(savedcmd_temp); - - return 0; -} - -static ssize_t -tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - unsigned long val; - int ret; - - ret = kstrtoul_from_user(ubuf, cnt, 10, &val); - if (ret) - return ret; - - /* must have at least 1 entry or less than PID_MAX_DEFAULT */ - if (!val || val > PID_MAX_DEFAULT) - return -EINVAL; - - ret = tracing_resize_saved_cmdlines((unsigned int)val); - if (ret < 0) - return ret; - - *ppos += cnt; - - return cnt; -} - -static const struct file_operations tracing_saved_cmdlines_size_fops = { - .open = tracing_open_generic, - .read = tracing_saved_cmdlines_size_read, - .write = tracing_saved_cmdlines_size_write, -}; - #ifdef CONFIG_TRACE_EVAL_MAP_FILE static union trace_eval_map_item * update_eval_map(union trace_eval_map_item *ptr) @@ -5650,7 +5792,7 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, return; } - mutex_lock(&trace_eval_mutex); + guard(mutex)(&trace_eval_mutex); if (!trace_eval_maps) trace_eval_maps = map_array; @@ -5674,13 +5816,11 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, map_array++; } memset(map_array, 0, sizeof(*map_array)); - - mutex_unlock(&trace_eval_mutex); } static void trace_create_eval_file(struct dentry *d_tracer) { - trace_create_file("eval_map", 0444, d_tracer, + trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, NULL, &tracing_eval_map_fops); } @@ -5734,6 +5874,15 @@ static void set_buffer_entries(struct array_buffer *buf, unsigned long val) per_cpu_ptr(buf->data, cpu)->entries = val; } +static void update_buffer_entries(struct array_buffer *buf, int cpu) +{ + if (cpu == RING_BUFFER_ALL_CPUS) { + set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); + } else { + per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); + } +} + #ifdef CONFIG_TRACER_MAX_TRACE /* resize @tr's buffer to the size of @size_tr's entries */ static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, @@ -5772,19 +5921,21 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, * we use the size that was given, and we can forget about * expanding it later. */ - ring_buffer_expanded = true; + trace_set_ring_buffer_expanded(tr); /* May be called before buffers are initialized */ if (!tr->array_buffer.buffer) return 0; + /* Do not allow tracing while resizing ring buffer */ + tracing_stop_tr(tr); + ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); if (ret < 0) - return ret; + goto out_start; #ifdef CONFIG_TRACER_MAX_TRACE - if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) || - !tr->current_trace->use_max_tr) + if (!tr->allocated_snapshot) goto out; ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); @@ -5809,53 +5960,55 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, WARN_ON(1); tracing_disabled = 1; } - return ret; + goto out_start; } - if (cpu == RING_BUFFER_ALL_CPUS) - set_buffer_entries(&tr->max_buffer, size); - else - per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size; + update_buffer_entries(&tr->max_buffer, cpu); out: #endif /* CONFIG_TRACER_MAX_TRACE */ - if (cpu == RING_BUFFER_ALL_CPUS) - set_buffer_entries(&tr->array_buffer, size); - else - per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size; - + update_buffer_entries(&tr->array_buffer, cpu); + out_start: + tracing_start_tr(tr); return ret; } ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id) { - int ret = size; - - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); if (cpu_id != RING_BUFFER_ALL_CPUS) { /* make sure, this cpu is enabled in the mask */ - if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) { - ret = -EINVAL; - goto out; - } + if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) + return -EINVAL; } - ret = __tracing_resize_ring_buffer(tr, size, cpu_id); - if (ret < 0) - ret = -ENOMEM; + return __tracing_resize_ring_buffer(tr, size, cpu_id); +} -out: - mutex_unlock(&trace_types_lock); +static void update_last_data(struct trace_array *tr) +{ + if (!tr->text_delta && !tr->data_delta) + return; - return ret; -} + /* + * Need to clear all CPU buffers as there cannot be events + * from the previous boot mixed with events with this boot + * as that will cause a confusing trace. Need to clear all + * CPU buffers, even for those that may currently be offline. + */ + tracing_reset_all_cpus(&tr->array_buffer); + /* Using current data now */ + tr->text_delta = 0; + tr->data_delta = 0; +} /** * tracing_update_buffers - used by tracing facility to expand ring buffers + * @tr: The tracing instance * * To save on memory when the tracing is never used on a system with it * configured in. The ring buffers are set to a minimum size. But once @@ -5864,13 +6017,16 @@ out: * * This function is to be called when a tracer is about to be used. */ -int tracing_update_buffers(void) +int tracing_update_buffers(struct trace_array *tr) { int ret = 0; mutex_lock(&trace_types_lock); - if (!ring_buffer_expanded) - ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size, + + update_last_data(tr); + + if (!tr->ring_buffer_expanded) + ret = __tracing_resize_ring_buffer(tr, trace_buf_size, RING_BUFFER_ALL_CPUS); mutex_unlock(&trace_types_lock); @@ -5890,7 +6046,7 @@ static void tracing_set_nop(struct trace_array *tr) { if (tr->current_trace == &nop_trace) return; - + tr->current_trace->enabled--; if (tr->current_trace->reset) @@ -5899,12 +6055,18 @@ static void tracing_set_nop(struct trace_array *tr) tr->current_trace = &nop_trace; } +static bool tracer_options_updated; + static void add_tracer_options(struct trace_array *tr, struct tracer *t) { /* Only enable if the directory has been created already. */ if (!tr->dir) return; + /* Only create trace option files after update_tracer_options finish */ + if (!tracer_options_updated) + return; + create_trace_option_files(tr, t); } @@ -5914,15 +6076,17 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) #ifdef CONFIG_TRACER_MAX_TRACE bool had_max_tr; #endif - int ret = 0; + int ret; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); + + update_last_data(tr); - if (!ring_buffer_expanded) { + if (!tr->ring_buffer_expanded) { ret = __tracing_resize_ring_buffer(tr, trace_buf_size, RING_BUFFER_ALL_CPUS); if (ret < 0) - goto out; + return ret; ret = 0; } @@ -5930,41 +6094,37 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) if (strcmp(t->name, buf) == 0) break; } - if (!t) { - ret = -EINVAL; - goto out; - } + if (!t) + return -EINVAL; + if (t == tr->current_trace) - goto out; + return 0; #ifdef CONFIG_TRACER_SNAPSHOT if (t->use_max_tr) { + local_irq_disable(); arch_spin_lock(&tr->max_lock); - if (tr->cond_snapshot) - ret = -EBUSY; + ret = tr->cond_snapshot ? -EBUSY : 0; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); if (ret) - goto out; + return ret; } #endif /* Some tracers won't work on kernel command line */ if (system_state < SYSTEM_RUNNING && t->noboot) { pr_warn("Tracer '%s' is not allowed on command line, ignored\n", t->name); - goto out; + return -EINVAL; } /* Some tracers are only allowed for the top level buffer */ - if (!trace_ok_for_array(t, tr)) { - ret = -EINVAL; - goto out; - } + if (!trace_ok_for_array(t, tr)) + return -EINVAL; /* If trace pipe files are being read, we can't change the tracer */ - if (tr->trace_ref) { - ret = -EBUSY; - goto out; - } + if (tr->trace_ref) + return -EBUSY; trace_branch_disable(); @@ -5973,12 +6133,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); +#ifdef CONFIG_TRACER_MAX_TRACE + had_max_tr = tr->current_trace->use_max_tr; + /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; -#ifdef CONFIG_TRACER_MAX_TRACE - had_max_tr = tr->allocated_snapshot; - if (had_max_tr && !t->use_max_tr) { /* * We need to make sure that the update_max_tr sees that @@ -5989,30 +6149,34 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) */ synchronize_rcu(); free_snapshot(tr); + tracing_disarm_snapshot(tr); } -#endif -#ifdef CONFIG_TRACER_MAX_TRACE - if (t->use_max_tr && !had_max_tr) { - ret = tracing_alloc_snapshot_instance(tr); - if (ret < 0) - goto out; + if (!had_max_tr && t->use_max_tr) { + ret = tracing_arm_snapshot_locked(tr); + if (ret) + return ret; } +#else + tr->current_trace = &nop_trace; #endif if (t->init) { ret = tracer_init(t, tr); - if (ret) - goto out; + if (ret) { +#ifdef CONFIG_TRACER_MAX_TRACE + if (t->use_max_tr) + tracing_disarm_snapshot(tr); +#endif + return ret; + } } tr->current_trace = t; tr->current_trace->enabled++; trace_branch_enable(tr); - out: - mutex_unlock(&trace_types_lock); - return ret; + return 0; } static ssize_t @@ -6021,7 +6185,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, { struct trace_array *tr = filp->private_data; char buf[MAX_TRACER_SIZE+1]; - int i; + char *name; size_t ret; int err; @@ -6035,11 +6199,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; - /* strip ending whitespace. */ - for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) - buf[i] = 0; + name = strim(buf); - err = tracing_set_tracer(tr, buf); + err = tracing_set_tracer(tr, name); if (err) return err; @@ -6092,46 +6254,72 @@ tracing_thresh_write(struct file *filp, const char __user *ubuf, struct trace_array *tr = filp->private_data; int ret; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); if (ret < 0) - goto out; + return ret; if (tr->current_trace->update_thresh) { ret = tr->current_trace->update_thresh(tr); if (ret < 0) - goto out; + return ret; } - ret = cnt; -out: - mutex_unlock(&trace_types_lock); - - return ret; + return cnt; } -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE static ssize_t tracing_max_lat_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos); + struct trace_array *tr = filp->private_data; + + return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); } static ssize_t tracing_max_lat_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos); + struct trace_array *tr = filp->private_data; + + return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); } #endif +static int open_pipe_on_cpu(struct trace_array *tr, int cpu) +{ + if (cpu == RING_BUFFER_ALL_CPUS) { + if (cpumask_empty(tr->pipe_cpumask)) { + cpumask_setall(tr->pipe_cpumask); + return 0; + } + } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { + cpumask_set_cpu(cpu, tr->pipe_cpumask); + return 0; + } + return -EBUSY; +} + +static void close_pipe_on_cpu(struct trace_array *tr, int cpu) +{ + if (cpu == RING_BUFFER_ALL_CPUS) { + WARN_ON(!cpumask_full(tr->pipe_cpumask)); + cpumask_clear(tr->pipe_cpumask); + } else { + WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); + cpumask_clear_cpu(cpu, tr->pipe_cpumask); + } +} + static int tracing_open_pipe(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; struct trace_iterator *iter; + int cpu; int ret; ret = tracing_check_open_get_tr(tr); @@ -6139,13 +6327,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) return ret; mutex_lock(&trace_types_lock); + cpu = tracing_get_cpu(inode); + ret = open_pipe_on_cpu(tr, cpu); + if (ret) + goto fail_pipe_on_cpu; /* create a buffer to store the information to pass to userspace */ iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { ret = -ENOMEM; - __trace_array_put(tr); - goto out; + goto fail_alloc_iter; } trace_seq_init(&iter->seq); @@ -6168,7 +6359,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->tr = tr; iter->array_buffer = &tr->array_buffer; - iter->cpu_file = tracing_get_cpu(inode); + iter->cpu_file = cpu; mutex_init(&iter->mutex); filp->private_data = iter; @@ -6178,12 +6369,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) nonseekable_open(inode, filp); tr->trace_ref++; -out: + mutex_unlock(&trace_types_lock); return ret; fail: kfree(iter); +fail_alloc_iter: + close_pipe_on_cpu(tr, cpu); +fail_pipe_on_cpu: __trace_array_put(tr); mutex_unlock(&trace_types_lock); return ret; @@ -6200,11 +6394,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) if (iter->trace->pipe_close) iter->trace->pipe_close(iter); - + close_pipe_on_cpu(tr, iter->cpu_file); mutex_unlock(&trace_types_lock); - free_cpumask_var(iter->started); - mutex_destroy(&iter->mutex); + free_trace_iter_content(iter); kfree(iter); trace_array_put(tr); @@ -6228,7 +6421,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl return EPOLLIN | EPOLLRDNORM; else return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, - filp, poll_table); + filp, poll_table, iter->tr->buffer_percent); } static __poll_t @@ -6291,42 +6484,37 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, * This is just a matter of traces coherency, the ring buffer itself * is protected. */ - mutex_lock(&iter->mutex); + guard(mutex)(&iter->mutex); /* return any leftover data */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); if (sret != -EBUSY) - goto out; + return sret; trace_seq_init(&iter->seq); if (iter->trace->read) { sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); if (sret) - goto out; + return sret; } waitagain: sret = tracing_wait_pipe(filp); if (sret <= 0) - goto out; + return sret; /* stop when tracing is finished */ - if (trace_empty(iter)) { - sret = 0; - goto out; - } + if (trace_empty(iter)) + return 0; - if (cnt >= PAGE_SIZE) - cnt = PAGE_SIZE - 1; + if (cnt >= TRACE_SEQ_BUFFER_SIZE) + cnt = TRACE_SEQ_BUFFER_SIZE - 1; /* reset all but tr, trace, and overruns */ - memset(&iter->seq, 0, - sizeof(struct trace_iterator) - - offsetof(struct trace_iterator, seq)); + trace_iterator_reset(iter); cpumask_clear(iter->started); trace_seq_init(&iter->seq); - iter->pos = -1; trace_event_read_lock(); trace_access_lock(iter->cpu_file); @@ -6336,7 +6524,20 @@ waitagain: ret = print_trace_line(iter); if (ret == TRACE_TYPE_PARTIAL_LINE) { - /* don't print partial lines */ + /* + * If one print_trace_line() fills entire trace_seq in one shot, + * trace_seq_to_user() will returns -EBUSY because save_len == 0, + * In this case, we need to consume it, otherwise, loop will peek + * this event next time, resulting in an infinite loop. + */ + if (save_len == 0) { + iter->seq.full = 0; + trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); + trace_consume(iter); + break; + } + + /* In other cases, don't print partial lines */ iter->seq.seq.len = save_len; break; } @@ -6359,7 +6560,7 @@ waitagain: /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); - if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq)) + if (iter->seq.readpos >= trace_seq_used(&iter->seq)) trace_seq_init(&iter->seq); /* @@ -6369,9 +6570,6 @@ waitagain: if (sret == -EBUSY) goto waitagain; -out: - mutex_unlock(&iter->mutex); - return sret; } @@ -6545,7 +6743,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, } if (buf_size_same) { - if (!ring_buffer_expanded) + if (!tr->ring_buffer_expanded) r = sprintf(buf, "%lu (expanded: %lu)\n", size >> 10, trace_buf_size >> 10); @@ -6602,10 +6800,10 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf, mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; - if (!ring_buffer_expanded) + if (!tr->ring_buffer_expanded) expanded_size += trace_buf_size >> 10; } - if (ring_buffer_expanded) + if (tr->ring_buffer_expanded) r = sprintf(buf, "%lu\n", size); else r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); @@ -6615,6 +6813,37 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf, } static ssize_t +tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + struct seq_buf seq; + char buf[64]; + + seq_buf_init(&seq, buf, 64); + + seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta); + seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq)); +} + +static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) +{ + struct trace_array *tr = inode->i_private; + int cpu = tracing_get_cpu(inode); + int ret; + + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; + + ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu); + if (ret < 0) + __trace_array_put(tr); + return ret; +} + +static ssize_t tracing_free_buffer_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -6644,6 +6873,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) return 0; } +#define TRACE_MARKER_MAX_SIZE 4096 + static ssize_t tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) @@ -6653,9 +6884,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, enum event_trigger_type tt = ETT_NONE; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; + int meta_size; ssize_t written; - int size; + size_t size; int len; /* Used in tracing_mark_raw_write() as well */ @@ -6668,13 +6899,15 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (!(tr->trace_flags & TRACE_ITER_MARKERS)) return -EINVAL; - if (cnt > TRACE_BUF_SIZE) - cnt = TRACE_BUF_SIZE; + if ((ssize_t)cnt < 0) + return -EINVAL; - BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); + if (cnt > TRACE_MARKER_MAX_SIZE) + cnt = TRACE_MARKER_MAX_SIZE; - local_save_flags(irq_flags); - size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */ + meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ + again: + size = cnt + meta_size; /* If less than "<faulted>", then make sure we can still add that */ if (cnt < FAULTED_SIZE) @@ -6682,10 +6915,26 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - irq_flags, preempt_count()); - if (unlikely(!event)) + tracing_gen_ctx()); + if (unlikely(!event)) { + /* + * If the size was greater than what was allowed, then + * make it smaller and try again. + */ + if (size > ring_buffer_max_event_size(buffer)) { + /* cnt < FAULTED size should never be bigger than max */ + if (WARN_ON_ONCE(cnt < FAULTED_SIZE)) + return -EBADF; + cnt = ring_buffer_max_event_size(buffer) - meta_size; + /* The above should only happen once */ + if (WARN_ON_ONCE(cnt + meta_size == size)) + return -EBADF; + goto again; + } + /* Ring buffer disabled, return as if not open for write */ return -EBADF; + } entry = ring_buffer_event_data(event); entry->ip = _THIS_IP_; @@ -6701,7 +6950,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { /* do not add \n before testing triggers, but add \0 */ entry->buf[cnt] = '\0'; - tt = event_triggers_call(tr->trace_marker_file, entry, event); + tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); } if (entry->buf[cnt - 1] != '\n') { @@ -6717,15 +6966,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (tt) event_triggers_post_call(tr->trace_marker_file, tt); - if (written > 0) - *fpos += written; - return written; } -/* Limit it for now to 3K (including tag) */ -#define RAW_DATA_MAX_SIZE (1024*3) - static ssize_t tracing_mark_raw_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) @@ -6734,7 +6977,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, struct ring_buffer_event *event; struct trace_buffer *buffer; struct raw_data_entry *entry; - unsigned long irq_flags; ssize_t written; int size; int len; @@ -6748,22 +6990,20 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, return -EINVAL; /* The marker must at least have a tag id */ - if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE) + if (cnt < sizeof(unsigned int)) return -EINVAL; - if (cnt > TRACE_BUF_SIZE) - cnt = TRACE_BUF_SIZE; - - BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - - local_save_flags(irq_flags); size = sizeof(*entry) + cnt; if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; buffer = tr->array_buffer.buffer; + + if (size > ring_buffer_max_event_size(buffer)) + return -EINVAL; + event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, - irq_flags, preempt_count()); + tracing_gen_ctx()); if (!event) /* Ring buffer disabled, return as if not open for write */ return -EBADF; @@ -6780,9 +7020,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, __buffer_unlock_commit(buffer, event); - if (written > 0) - *fpos += written; - return written; } @@ -6911,41 +7148,39 @@ static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) return ret; } -int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) +u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) { - int ret = 0; - - mutex_lock(&trace_types_lock); + if (rbe == this_cpu_read(trace_buffered_event)) + return ring_buffer_time_stamp(buffer); - if (abs && tr->time_stamp_abs_ref++) - goto out; + return ring_buffer_event_time_stamp(buffer, rbe); +} - if (!abs) { - if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) { - ret = -EINVAL; - goto out; - } +/* + * Set or disable using the per CPU trace_buffer_event when possible. + */ +int tracing_set_filter_buffering(struct trace_array *tr, bool set) +{ + guard(mutex)(&trace_types_lock); - if (--tr->time_stamp_abs_ref) - goto out; - } + if (set && tr->no_filter_buffering_ref++) + return 0; - ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs); + if (!set) { + if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) + return -EINVAL; -#ifdef CONFIG_TRACER_MAX_TRACE - if (tr->max_buffer.buffer) - ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs); -#endif - out: - mutex_unlock(&trace_types_lock); + --tr->no_filter_buffering_ref; + } - return ret; + return 0; } struct ftrace_buffer_info { struct trace_iterator iter; void *spare; unsigned int spare_cpu; + unsigned int spare_size; unsigned int read; }; @@ -6991,6 +7226,11 @@ out: return ret; } +static void tracing_swap_cpu_buffer(void *tr) +{ + update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); +} + static ssize_t tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -7001,7 +7241,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, unsigned long val; int ret; - ret = tracing_update_buffers(); + ret = tracing_update_buffers(tr); if (ret < 0) return ret; @@ -7009,51 +7249,51 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, if (ret) return ret; - mutex_lock(&trace_types_lock); + guard(mutex)(&trace_types_lock); - if (tr->current_trace->use_max_tr) { - ret = -EBUSY; - goto out; - } + if (tr->current_trace->use_max_tr) + return -EBUSY; + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) ret = -EBUSY; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); if (ret) - goto out; + return ret; switch (val) { case 0: - if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { - ret = -EINVAL; - break; - } + if (iter->cpu_file != RING_BUFFER_ALL_CPUS) + return -EINVAL; if (tr->allocated_snapshot) free_snapshot(tr); break; case 1: /* Only allow per-cpu swap if the ring buffer supports it */ #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP - if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { - ret = -EINVAL; - break; - } + if (iter->cpu_file != RING_BUFFER_ALL_CPUS) + return -EINVAL; #endif if (tr->allocated_snapshot) ret = resize_buffer_duplicate_size(&tr->max_buffer, &tr->array_buffer, iter->cpu_file); - else - ret = tracing_alloc_snapshot_instance(tr); - if (ret < 0) - break; - local_irq_disable(); + + ret = tracing_arm_snapshot_locked(tr); + if (ret) + return ret; + /* Now, we're going to swap */ - if (iter->cpu_file == RING_BUFFER_ALL_CPUS) + if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { + local_irq_disable(); update_max_tr(tr, current, smp_processor_id(), NULL); - else - update_max_tr_single(tr, current, iter->cpu_file); - local_irq_enable(); + local_irq_enable(); + } else { + smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, + (void *)tr, 1); + } + tracing_disarm_snapshot(tr); break; default: if (tr->allocated_snapshot) { @@ -7069,8 +7309,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, *ppos += cnt; ret = cnt; } -out: - mutex_unlock(&trace_types_lock); + return ret; } @@ -7132,20 +7371,22 @@ static const struct file_operations tracing_thresh_fops = { .llseek = generic_file_llseek, }; -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_max_lat_read, .write = tracing_max_lat_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; #endif static const struct file_operations set_tracer_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_set_trace_read, .write = tracing_set_trace_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_pipe_fops = { @@ -7154,7 +7395,6 @@ static const struct file_operations tracing_pipe_fops = { .read = tracing_read_pipe, .splice_read = tracing_splice_read_pipe, .release = tracing_release_pipe, - .llseek = no_llseek, }; static const struct file_operations tracing_entries_fops = { @@ -7165,6 +7405,13 @@ static const struct file_operations tracing_entries_fops = { .release = tracing_release_generic_tr, }; +static const struct file_operations tracing_buffer_meta_fops = { + .open = tracing_buffer_meta_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_seq_release, +}; + static const struct file_operations tracing_total_entries_fops = { .open = tracing_open_generic_tr, .read = tracing_total_entries_read, @@ -7179,16 +7426,14 @@ static const struct file_operations tracing_free_buffer_fops = { }; static const struct file_operations tracing_mark_fops = { - .open = tracing_open_generic_tr, + .open = tracing_mark_open, .write = tracing_mark_write, - .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; static const struct file_operations tracing_mark_raw_fops = { - .open = tracing_open_generic_tr, + .open = tracing_mark_open, .write = tracing_mark_raw_write, - .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; @@ -7207,6 +7452,13 @@ static const struct file_operations trace_time_stamp_mode_fops = { .release = tracing_single_release_tr, }; +static const struct file_operations last_boot_fops = { + .open = tracing_open_generic_tr, + .read = tracing_last_boot_read, + .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, +}; + #ifdef CONFIG_TRACER_SNAPSHOT static const struct file_operations snapshot_fops = { .open = tracing_snapshot_open, @@ -7221,11 +7473,95 @@ static const struct file_operations snapshot_raw_fops = { .read = tracing_buffers_read, .release = tracing_buffers_release, .splice_read = tracing_buffers_splice_read, - .llseek = no_llseek, }; #endif /* CONFIG_TRACER_SNAPSHOT */ +/* + * trace_min_max_write - Write a u64 value to a trace_min_max_param struct + * @filp: The active open file structure + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function implements the write interface for a struct trace_min_max_param. + * The filp->private_data must point to a trace_min_max_param structure that + * defines where to write the value, the min and the max acceptable values, + * and a lock to protect the write. + */ +static ssize_t +trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct trace_min_max_param *param = filp->private_data; + u64 val; + int err; + + if (!param) + return -EFAULT; + + err = kstrtoull_from_user(ubuf, cnt, 10, &val); + if (err) + return err; + + if (param->lock) + mutex_lock(param->lock); + + if (param->min && val < *param->min) + err = -EINVAL; + + if (param->max && val > *param->max) + err = -EINVAL; + + if (!err) + *param->val = val; + + if (param->lock) + mutex_unlock(param->lock); + + if (err) + return err; + + return cnt; +} + +/* + * trace_min_max_read - Read a u64 value from a trace_min_max_param struct + * @filp: The active open file structure + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function implements the read interface for a struct trace_min_max_param. + * The filp->private_data must point to a trace_min_max_param struct with valid + * data. + */ +static ssize_t +trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct trace_min_max_param *param = filp->private_data; + char buf[U64_STR_SIZE]; + int len; + u64 val; + + if (!param) + return -EFAULT; + + val = *param->val; + + if (cnt > sizeof(buf)) + cnt = sizeof(buf); + + len = snprintf(buf, sizeof(buf), "%llu\n", val); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); +} + +const struct file_operations trace_min_max_fops = { + .open = tracing_open_generic, + .read = trace_min_max_read, + .write = trace_min_max_write, +}; + #define TRACING_LOG_ERRS_MAX 8 #define TRACING_LOG_LOC_MAX 128 @@ -7234,7 +7570,7 @@ static const struct file_operations snapshot_raw_fops = { struct err_info { const char **errs; /* ptr to loc-specific array of err strings */ u8 type; /* index into errs -> specific err string */ - u8 pos; /* MAX_FILTER_STR_VAL = 256 */ + u16 pos; /* caret position */ u64 ts; }; @@ -7242,25 +7578,53 @@ struct tracing_log_err { struct list_head list; struct err_info info; char loc[TRACING_LOG_LOC_MAX]; /* err location */ - char cmd[MAX_FILTER_STR_VAL]; /* what caused err */ + char *cmd; /* what caused err */ }; static DEFINE_MUTEX(tracing_err_log_lock); -static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) +static struct tracing_log_err *alloc_tracing_log_err(int len) { struct tracing_log_err *err; + err = kzalloc(sizeof(*err), GFP_KERNEL); + if (!err) + return ERR_PTR(-ENOMEM); + + err->cmd = kzalloc(len, GFP_KERNEL); + if (!err->cmd) { + kfree(err); + return ERR_PTR(-ENOMEM); + } + + return err; +} + +static void free_tracing_log_err(struct tracing_log_err *err) +{ + kfree(err->cmd); + kfree(err); +} + +static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, + int len) +{ + struct tracing_log_err *err; + char *cmd; + if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { - err = kzalloc(sizeof(*err), GFP_KERNEL); - if (!err) - err = ERR_PTR(-ENOMEM); - tr->n_err_log_entries++; + err = alloc_tracing_log_err(len); + if (PTR_ERR(err) != -ENOMEM) + tr->n_err_log_entries++; return err; } - + cmd = kzalloc(len, GFP_KERNEL); + if (!cmd) + return ERR_PTR(-ENOMEM); err = list_first_entry(&tr->err_log, struct tracing_log_err, list); + kfree(err->cmd); + err->cmd = cmd; list_del(&err->list); return err; @@ -7271,11 +7635,11 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) * @cmd: The tracing command that caused the error * @str: The string to position the caret at within @cmd * - * Finds the position of the first occurence of @str within @cmd. The + * Finds the position of the first occurrence of @str within @cmd. The * return value can be passed to tracing_log_err() for caret placement * within @cmd. * - * Returns the index within @cmd of the first occurence of @str or 0 + * Returns the index within @cmd of the first occurrence of @str or 0 * if @str was not found. */ unsigned int err_pos(char *cmd, const char *str) @@ -7321,22 +7685,24 @@ unsigned int err_pos(char *cmd, const char *str) */ void tracing_log_err(struct trace_array *tr, const char *loc, const char *cmd, - const char **errs, u8 type, u8 pos) + const char **errs, u8 type, u16 pos) { struct tracing_log_err *err; + int len = 0; if (!tr) tr = &global_trace; - mutex_lock(&tracing_err_log_lock); - err = get_tracing_log_err(tr); - if (PTR_ERR(err) == -ENOMEM) { - mutex_unlock(&tracing_err_log_lock); + len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; + + guard(mutex)(&tracing_err_log_lock); + + err = get_tracing_log_err(tr, len); + if (PTR_ERR(err) == -ENOMEM) return; - } snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); - snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd); + snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); err->info.errs = errs; err->info.type = type; @@ -7344,7 +7710,6 @@ void tracing_log_err(struct trace_array *tr, err->info.ts = local_clock(); list_add_tail(&err->list, &tr->err_log); - mutex_unlock(&tracing_err_log_lock); } static void clear_tracing_err_log(struct trace_array *tr) @@ -7354,7 +7719,7 @@ static void clear_tracing_err_log(struct trace_array *tr) mutex_lock(&tracing_err_log_lock); list_for_each_entry_safe(err, next, &tr->err_log, list) { list_del(&err->list); - kfree(err); + free_tracing_log_err(err); } tr->n_err_log_entries = 0; @@ -7382,9 +7747,9 @@ static void tracing_err_log_seq_stop(struct seq_file *m, void *v) mutex_unlock(&tracing_err_log_lock); } -static void tracing_err_log_show_pos(struct seq_file *m, u8 pos) +static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) { - u8 i; + u16 i; for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) seq_putc(m, ' '); @@ -7467,7 +7832,7 @@ static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, - .llseek = seq_lseek, + .llseek = tracing_lseek, .release = tracing_err_log_release, }; @@ -7525,6 +7890,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, { struct ftrace_buffer_info *info = filp->private_data; struct trace_iterator *iter = &info->iter; + void *trace_data; + int page_size; ssize_t ret = 0; ssize_t size; @@ -7536,6 +7903,17 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return -EBUSY; #endif + page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); + + /* Make sure the spare matches the current sub buffer size */ + if (info->spare) { + if (page_size != info->spare_size) { + ring_buffer_free_read_page(iter->array_buffer->buffer, + info->spare_cpu, info->spare); + info->spare = NULL; + } + } + if (!info->spare) { info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, iter->cpu_file); @@ -7544,25 +7922,26 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, info->spare = NULL; } else { info->spare_cpu = iter->cpu_file; + info->spare_size = page_size; } } if (!info->spare) return ret; /* Do we have previous read data to read? */ - if (info->read < PAGE_SIZE) + if (info->read < page_size) goto read; again: trace_access_lock(iter->cpu_file); ret = ring_buffer_read_page(iter->array_buffer->buffer, - &info->spare, + info->spare, count, iter->cpu_file, 0); trace_access_unlock(iter->cpu_file); if (ret < 0) { - if (trace_empty(iter)) { + if (trace_empty(iter) && !iter->closed) { if ((filp->f_flags & O_NONBLOCK)) return -EAGAIN; @@ -7577,11 +7956,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, info->read = 0; read: - size = PAGE_SIZE - info->read; + size = page_size - info->read; if (size > count) size = count; - - ret = copy_to_user(ubuf, info->spare + info->read, size); + trace_data = ring_buffer_read_page_data(info->spare); + ret = copy_to_user(ubuf, trace_data + info->read, size); if (ret == size) return -EFAULT; @@ -7593,6 +7972,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return size; } +static int tracing_buffers_flush(struct file *file, fl_owner_t id) +{ + struct ftrace_buffer_info *info = file->private_data; + struct trace_iterator *iter = &info->iter; + + iter->closed = true; + /* Make sure the waiters see the new wait_index */ + (void)atomic_fetch_inc_release(&iter->wait_index); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + + return 0; +} + static int tracing_buffers_release(struct inode *inode, struct file *file) { struct ftrace_buffer_info *info = file->private_data; @@ -7686,6 +8079,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; + bool woken = false; + int page_size; int entries, i; ssize_t ret = 0; @@ -7694,13 +8089,14 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, return -EBUSY; #endif - if (*ppos & (PAGE_SIZE - 1)) + page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); + if (*ppos & (page_size - 1)) return -EINVAL; - if (len & (PAGE_SIZE - 1)) { - if (len < PAGE_SIZE) + if (len & (page_size - 1)) { + if (len < page_size) return -EINVAL; - len &= PAGE_MASK; + len &= (~(page_size - 1)); } if (splice_grow_spd(pipe, &spd)) @@ -7710,7 +8106,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, trace_access_lock(iter->cpu_file); entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); - for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) { + for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { struct page *page; int r; @@ -7731,7 +8127,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } ref->cpu = iter->cpu_file; - r = ring_buffer_read_page(ref->buffer, &ref->page, + r = ring_buffer_read_page(ref->buffer, ref->page, len, iter->cpu_file, 1); if (r < 0) { ring_buffer_free_read_page(ref->buffer, ref->cpu, @@ -7740,14 +8136,14 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - page = virt_to_page(ref->page); + page = virt_to_page(ring_buffer_read_page_data(ref->page)); spd.pages[i] = page; - spd.partial[i].len = PAGE_SIZE; + spd.partial[i].len = page_size; spd.partial[i].offset = 0; spd.partial[i].private = (unsigned long)ref; spd.nr_pages++; - *ppos += PAGE_SIZE; + *ppos += page_size; entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); } @@ -7757,17 +8153,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, /* did we read anything? */ if (!spd.nr_pages) { + if (ret) goto out; + if (woken) + goto out; + ret = -EAGAIN; if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) goto out; - ret = wait_on_pipe(iter, iter->tr->buffer_percent); + ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); if (ret) goto out; + /* No need to wait after waking up when tracing is off */ + if (!tracer_tracing_is_on(iter->tr)) + goto out; + + /* Iterate one more time to collect any new data then exit */ + woken = true; + goto again; } @@ -7778,13 +8185,126 @@ out: return ret; } +static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct ftrace_buffer_info *info = file->private_data; + struct trace_iterator *iter = &info->iter; + int err; + + if (cmd == TRACE_MMAP_IOCTL_GET_READER) { + if (!(file->f_flags & O_NONBLOCK)) { + err = ring_buffer_wait(iter->array_buffer->buffer, + iter->cpu_file, + iter->tr->buffer_percent, + NULL, NULL); + if (err) + return err; + } + + return ring_buffer_map_get_reader(iter->array_buffer->buffer, + iter->cpu_file); + } else if (cmd) { + return -ENOTTY; + } + + /* + * An ioctl call with cmd 0 to the ring buffer file will wake up all + * waiters + */ + mutex_lock(&trace_types_lock); + + /* Make sure the waiters see the new wait_index */ + (void)atomic_fetch_inc_release(&iter->wait_index); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + + mutex_unlock(&trace_types_lock); + return 0; +} + +#ifdef CONFIG_TRACER_MAX_TRACE +static int get_snapshot_map(struct trace_array *tr) +{ + int err = 0; + + /* + * Called with mmap_lock held. lockdep would be unhappy if we would now + * take trace_types_lock. Instead use the specific + * snapshot_trigger_lock. + */ + spin_lock(&tr->snapshot_trigger_lock); + + if (tr->snapshot || tr->mapped == UINT_MAX) + err = -EBUSY; + else + tr->mapped++; + + spin_unlock(&tr->snapshot_trigger_lock); + + /* Wait for update_max_tr() to observe iter->tr->mapped */ + if (tr->mapped == 1) + synchronize_rcu(); + + return err; + +} +static void put_snapshot_map(struct trace_array *tr) +{ + spin_lock(&tr->snapshot_trigger_lock); + if (!WARN_ON(!tr->mapped)) + tr->mapped--; + spin_unlock(&tr->snapshot_trigger_lock); +} +#else +static inline int get_snapshot_map(struct trace_array *tr) { return 0; } +static inline void put_snapshot_map(struct trace_array *tr) { } +#endif + +static void tracing_buffers_mmap_close(struct vm_area_struct *vma) +{ + struct ftrace_buffer_info *info = vma->vm_file->private_data; + struct trace_iterator *iter = &info->iter; + + WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file)); + put_snapshot_map(iter->tr); +} + +static const struct vm_operations_struct tracing_buffers_vmops = { + .close = tracing_buffers_mmap_close, +}; + +static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct ftrace_buffer_info *info = filp->private_data; + struct trace_iterator *iter = &info->iter; + int ret = 0; + + /* Currently the boot mapped buffer is not supported for mmap */ + if (iter->tr->flags & TRACE_ARRAY_FL_BOOT) + return -ENODEV; + + ret = get_snapshot_map(iter->tr); + if (ret) + return ret; + + ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma); + if (ret) + put_snapshot_map(iter->tr); + + vma->vm_ops = &tracing_buffers_vmops; + + return ret; +} + static const struct file_operations tracing_buffers_fops = { .open = tracing_buffers_open, .read = tracing_buffers_read, .poll = tracing_buffers_poll, .release = tracing_buffers_release, + .flush = tracing_buffers_flush, .splice_read = tracing_buffers_splice_read, - .llseek = no_llseek, + .unlocked_ioctl = tracing_buffers_ioctl, + .mmap = tracing_buffers_mmap, }; static ssize_t @@ -7825,7 +8345,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); - t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu)); + t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); usec_rem = do_div(t, USEC_PER_SEC); trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); } else { @@ -7834,7 +8354,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); trace_seq_printf(s, "now ts: %llu\n", - ring_buffer_time_stamp(trace_buf->buffer, cpu)); + ring_buffer_time_stamp(trace_buf->buffer)); } cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); @@ -7868,15 +8388,22 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf, char *buf; int r; - /* 256 should be plenty to hold the amount needed */ - buf = kmalloc(256, GFP_KERNEL); + /* 512 should be plenty to hold the amount needed */ +#define DYN_INFO_BUF_SIZE 512 + + buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; - r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n", + r = scnprintf(buf, DYN_INFO_BUF_SIZE, + "%ld pages:%ld groups: %ld\n" + "ftrace boot update time = %llu (ns)\n" + "ftrace module total update time = %llu (ns)\n", ftrace_update_tot_cnt, ftrace_number_of_pages, - ftrace_number_of_groups); + ftrace_number_of_groups, + ftrace_update_time, + ftrace_total_mod_time); ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); kfree(buf); @@ -8005,8 +8532,13 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; - if (glob[0] == '!') - return unregister_ftrace_function_probe_func(glob+1, tr, ops); + if (glob[0] == '!') { + ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); + if (!ret) + tracing_disarm_snapshot(tr); + + return ret; + } if (!param) goto out_reg; @@ -8025,12 +8557,13 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, return ret; out_reg: - ret = tracing_alloc_snapshot_instance(tr); + ret = tracing_arm_snapshot(tr); if (ret < 0) goto out; ret = register_ftrace_function_probe(glob, tr, ops, count); - + if (ret < 0) + tracing_disarm_snapshot(tr); out: return ret < 0 ? ret : 0; } @@ -8109,28 +8642,33 @@ tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) } /* per cpu trace_pipe */ - trace_create_cpu_file("trace_pipe", 0444, d_cpu, + trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_pipe_fops); /* per cpu trace */ - trace_create_cpu_file("trace", 0644, d_cpu, + trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, tr, cpu, &tracing_fops); - trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu, + trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_buffers_fops); - trace_create_cpu_file("stats", 0444, d_cpu, + trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_stats_fops); - trace_create_cpu_file("buffer_size_kb", 0444, d_cpu, + trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_entries_fops); + if (tr->range_addr_start) + trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu, + tr, cpu, &tracing_buffer_meta_fops); #ifdef CONFIG_TRACER_SNAPSHOT - trace_create_cpu_file("snapshot", 0644, d_cpu, - tr, cpu, &snapshot_fops); + if (!tr->range_addr_start) { + trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, + tr, cpu, &snapshot_fops); - trace_create_cpu_file("snapshot_raw", 0444, d_cpu, - tr, cpu, &snapshot_raw_fops); + trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, + tr, cpu, &snapshot_raw_fops); + } #endif } @@ -8183,12 +8721,33 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static int tracing_open_options(struct inode *inode, struct file *filp) +{ + struct trace_option_dentry *topt = inode->i_private; + int ret; + + ret = tracing_check_open_get_tr(topt->tr); + if (ret) + return ret; + + filp->private_data = inode->i_private; + return 0; +} + +static int tracing_release_options(struct inode *inode, struct file *file) +{ + struct trace_option_dentry *topt = file->private_data; + + trace_array_put(topt->tr); + return 0; +} static const struct file_operations trace_options_fops = { - .open = tracing_open_generic, + .open = tracing_open_options, .read = trace_options_read, .write = trace_options_write, .llseek = generic_file_llseek, + .release = tracing_release_options, }; /* @@ -8335,8 +8894,8 @@ create_trace_option_file(struct trace_array *tr, topt->opt = opt; topt->tr = tr; - topt->entry = trace_create_file(opt->name, 0644, t_options, topt, - &trace_options_fops); + topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, + t_options, topt, &trace_options_fops); } @@ -8411,7 +8970,7 @@ create_trace_option_core_file(struct trace_array *tr, if (!t_options) return NULL; - return trace_create_file(option, 0644, t_options, + return trace_create_file(option, TRACE_MODE_WRITE, t_options, (void *)&tr->trace_flags_index[index], &trace_options_core_fops); } @@ -8472,6 +9031,8 @@ rb_simple_write(struct file *filp, const char __user *ubuf, tracer_tracing_off(tr); if (tr->current_trace->stop) tr->current_trace->stop(tr); + /* Wake up any waiters */ + ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); } mutex_unlock(&trace_types_lock); } @@ -8518,9 +9079,6 @@ buffer_percent_write(struct file *filp, const char __user *ubuf, if (val > 100) return -EINVAL; - if (!val) - val = 1; - tr->buffer_percent = val; (*ppos)++; @@ -8536,6 +9094,103 @@ static const struct file_operations buffer_percent_fops = { .llseek = default_llseek, }; +static ssize_t +buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + size_t size; + char buf[64]; + int order; + int r; + + order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); + size = (PAGE_SIZE << order) / 1024; + + r = sprintf(buf, "%zd\n", size); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + unsigned long val; + int old_order; + int order; + int pages; + int ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + val *= 1024; /* value passed in is in KB */ + + pages = DIV_ROUND_UP(val, PAGE_SIZE); + order = fls(pages - 1); + + /* limit between 1 and 128 system pages */ + if (order < 0 || order > 7) + return -EINVAL; + + /* Do not allow tracing while changing the order of the ring buffer */ + tracing_stop_tr(tr); + + old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); + if (old_order == order) + goto out; + + ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); + if (ret) + goto out; + +#ifdef CONFIG_TRACER_MAX_TRACE + + if (!tr->allocated_snapshot) + goto out_max; + + ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); + if (ret) { + /* Put back the old order */ + cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); + if (WARN_ON_ONCE(cnt)) { + /* + * AARGH! We are left with different orders! + * The max buffer is our "snapshot" buffer. + * When a tracer needs a snapshot (one of the + * latency tracers), it swaps the max buffer + * with the saved snap shot. We succeeded to + * update the order of the main buffer, but failed to + * update the order of the max buffer. But when we tried + * to reset the main buffer to the original size, we + * failed there too. This is very unlikely to + * happen, but if it does, warn and kill all + * tracing. + */ + tracing_disabled = 1; + } + goto out; + } + out_max: +#endif + (*ppos)++; + out: + if (ret) + cnt = ret; + tracing_start_tr(tr); + return cnt; +} + +static const struct file_operations buffer_subbuf_size_fops = { + .open = tracing_open_generic_tr, + .read = buffer_subbuf_size_read, + .write = buffer_subbuf_size_write, + .release = tracing_release_generic_tr, + .llseek = default_llseek, +}; + static struct dentry *trace_instance_dir; static void @@ -8550,7 +9205,21 @@ allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size buf->tr = tr; - buf->buffer = ring_buffer_alloc(size, rb_flags); + if (tr->range_addr_start && tr->range_addr_size) { + buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, + tr->range_addr_start, + tr->range_addr_size); + + ring_buffer_last_boot_delta(buf->buffer, + &tr->text_delta, &tr->data_delta); + /* + * This is basically the same as a mapped buffer, + * with the same restrictions. + */ + tr->mapped++; + } else { + buf->buffer = ring_buffer_alloc(size, rb_flags); + } if (!buf->buffer) return -ENOMEM; @@ -8568,6 +9237,16 @@ allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size return 0; } +static void free_trace_buffer(struct array_buffer *buf) +{ + if (buf->buffer) { + ring_buffer_free(buf->buffer); + buf->buffer = NULL; + free_percpu(buf->data); + buf->data = NULL; + } +} + static int allocate_trace_buffers(struct trace_array *tr, int size) { int ret; @@ -8577,37 +9256,24 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) return ret; #ifdef CONFIG_TRACER_MAX_TRACE + /* Fix mapped buffer trace arrays do not have snapshot buffers */ + if (tr->range_addr_start) + return 0; + ret = allocate_trace_buffer(tr, &tr->max_buffer, allocate_snapshot ? size : 1); if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { - ring_buffer_free(tr->array_buffer.buffer); - tr->array_buffer.buffer = NULL; - free_percpu(tr->array_buffer.data); - tr->array_buffer.data = NULL; + free_trace_buffer(&tr->array_buffer); return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; - /* - * Only the top level trace array gets its snapshot allocated - * from the kernel command line. - */ allocate_snapshot = false; #endif return 0; } -static void free_trace_buffer(struct array_buffer *buf) -{ - if (buf->buffer) { - ring_buffer_free(buf->buffer); - buf->buffer = NULL; - free_percpu(buf->data); - buf->data = NULL; - } -} - static void free_trace_buffers(struct trace_array *tr) { if (!tr) @@ -8640,6 +9306,7 @@ static void __update_tracer_options(struct trace_array *tr) static void update_tracer_options(struct trace_array *tr) { mutex_lock(&trace_types_lock); + tracer_options_updated = true; __update_tracer_options(tr); mutex_unlock(&trace_types_lock); } @@ -8681,8 +9348,10 @@ static int trace_array_create_dir(struct trace_array *tr) return -EINVAL; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { tracefs_remove(tr->dir); + return ret; + } init_tracer_tracefs(tr, tr->dir); __update_tracer_options(tr); @@ -8690,7 +9359,10 @@ static int trace_array_create_dir(struct trace_array *tr) return ret; } -static struct trace_array *trace_array_create(const char *name) +static struct trace_array * +trace_array_create_systems(const char *name, const char *systems, + unsigned long range_addr_start, + unsigned long range_addr_size) { struct trace_array *tr; int ret; @@ -8707,6 +9379,19 @@ static struct trace_array *trace_array_create(const char *name) if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) goto out_free_tr; + if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) + goto out_free_tr; + + if (systems) { + tr->system_names = kstrdup_const(systems, GFP_KERNEL); + if (!tr->system_names) + goto out_free_tr; + } + + /* Only for boot up memory mapped ring buffers */ + tr->range_addr_start = range_addr_start; + tr->range_addr_size = range_addr_size; + tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; cpumask_copy(tr->tracing_cpumask, cpu_all_mask); @@ -8714,7 +9399,9 @@ static struct trace_array *trace_array_create(const char *name) raw_spin_lock_init(&tr->start_lock); tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; - +#ifdef CONFIG_TRACER_MAX_TRACE + spin_lock_init(&tr->snapshot_trigger_lock); +#endif tr->current_trace = &nop_trace; INIT_LIST_HEAD(&tr->systems); @@ -8722,9 +9409,16 @@ static struct trace_array *trace_array_create(const char *name) INIT_LIST_HEAD(&tr->hist_vars); INIT_LIST_HEAD(&tr->err_log); +#ifdef CONFIG_MODULES + INIT_LIST_HEAD(&tr->mod_events); +#endif + if (allocate_trace_buffers(tr, trace_buf_size) < 0) goto out_free_tr; + /* The ring buffer is defaultly expanded */ + trace_set_ring_buffer_expanded(tr); + if (ftrace_allocate_ftrace_ops(tr) < 0) goto out_free_tr; @@ -8748,38 +9442,68 @@ static struct trace_array *trace_array_create(const char *name) out_free_tr: ftrace_free_ftrace_ops(tr); free_trace_buffers(tr); + free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); + kfree_const(tr->system_names); kfree(tr->name); kfree(tr); return ERR_PTR(ret); } +static struct trace_array *trace_array_create(const char *name) +{ + return trace_array_create_systems(name, NULL, 0, 0); +} + static int instance_mkdir(const char *name) { struct trace_array *tr; int ret; - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); + guard(mutex)(&event_mutex); + guard(mutex)(&trace_types_lock); ret = -EEXIST; if (trace_array_find(name)) - goto out_unlock; + return -EEXIST; tr = trace_array_create(name); ret = PTR_ERR_OR_ZERO(tr); -out_unlock: - mutex_unlock(&trace_types_lock); - mutex_unlock(&event_mutex); return ret; } +static u64 map_pages(u64 start, u64 size) +{ + struct page **pages; + phys_addr_t page_start; + unsigned int page_count; + unsigned int i; + void *vaddr; + + page_count = DIV_ROUND_UP(size, PAGE_SIZE); + + page_start = start; + pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return 0; + + for (i = 0; i < page_count; i++) { + phys_addr_t addr = page_start + i * PAGE_SIZE; + pages[i] = pfn_to_page(addr >> PAGE_SHIFT); + } + vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); + kfree(pages); + + return (u64)(unsigned long)vaddr; +} + /** * trace_array_get_by_name - Create/Lookup a trace array, given its name. * @name: The name of the trace array to be looked up/created. + * @systems: A list of systems to create event directories for (NULL for all) * * Returns pointer to trace array with given name. * NULL, if it cannot be created. @@ -8793,28 +9517,27 @@ out_unlock: * trace_array_put() is called, user space can not delete it. * */ -struct trace_array *trace_array_get_by_name(const char *name) +struct trace_array *trace_array_get_by_name(const char *name, const char *systems) { struct trace_array *tr; - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); + guard(mutex)(&event_mutex); + guard(mutex)(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) - goto out_unlock; + if (tr->name && strcmp(tr->name, name) == 0) { + tr->ref++; + return tr; + } } - tr = trace_array_create(name); + tr = trace_array_create_systems(name, systems, 0, 0); if (IS_ERR(tr)) tr = NULL; -out_unlock: - if (tr) + else tr->ref++; - mutex_unlock(&trace_types_lock); - mutex_unlock(&event_mutex); return tr; } EXPORT_SYMBOL_GPL(trace_array_get_by_name); @@ -8835,20 +9558,27 @@ static int __remove_instance(struct trace_array *tr) set_tracer_flag(tr, 1 << i, 0); } + if (printk_trace == tr) + update_printk_trace(&global_trace); + tracing_set_nop(tr); clear_ftrace_function_probes(tr); event_trace_del_tracer(tr); ftrace_clear_pids(tr); ftrace_destroy_function_files(tr); tracefs_remove(tr->dir); + free_percpu(tr->last_func_repeats); free_trace_buffers(tr); + clear_tracing_err_log(tr); for (i = 0; i < tr->nr_topts; i++) { kfree(tr->topts[i].topts); } kfree(tr->topts); + free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); + kfree_const(tr->system_names); kfree(tr->name); kfree(tr); @@ -8858,48 +9588,36 @@ static int __remove_instance(struct trace_array *tr) int trace_array_destroy(struct trace_array *this_tr) { struct trace_array *tr; - int ret; if (!this_tr) return -EINVAL; - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); + guard(mutex)(&event_mutex); + guard(mutex)(&trace_types_lock); - ret = -ENODEV; /* Making sure trace array exists before destroying it. */ list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr == this_tr) { - ret = __remove_instance(tr); - break; - } + if (tr == this_tr) + return __remove_instance(tr); } - mutex_unlock(&trace_types_lock); - mutex_unlock(&event_mutex); - - return ret; + return -ENODEV; } EXPORT_SYMBOL_GPL(trace_array_destroy); static int instance_rmdir(const char *name) { struct trace_array *tr; - int ret; - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); + guard(mutex)(&event_mutex); + guard(mutex)(&trace_types_lock); - ret = -ENODEV; tr = trace_array_find(name); - if (tr) - ret = __remove_instance(tr); - - mutex_unlock(&trace_types_lock); - mutex_unlock(&event_mutex); + if (!tr) + return -ENODEV; - return ret; + return __remove_instance(tr); } static __init void create_trace_instances(struct dentry *d_tracer) @@ -8912,49 +9630,45 @@ static __init void create_trace_instances(struct dentry *d_tracer) if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) return; - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); + guard(mutex)(&event_mutex); + guard(mutex)(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->name) continue; if (MEM_FAIL(trace_array_create_dir(tr) < 0, "Failed to create instance directory\n")) - break; + return; } - - mutex_unlock(&trace_types_lock); - mutex_unlock(&event_mutex); } static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) { - struct trace_event_file *file; int cpu; - trace_create_file("available_tracers", 0444, d_tracer, + trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, tr, &show_traces_fops); - trace_create_file("current_tracer", 0644, d_tracer, + trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer, tr, &set_tracer_fops); - trace_create_file("tracing_cpumask", 0644, d_tracer, + trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer, tr, &tracing_cpumask_fops); - trace_create_file("trace_options", 0644, d_tracer, + trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, tr, &tracing_iter_fops); - trace_create_file("trace", 0644, d_tracer, + trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, tr, &tracing_fops); - trace_create_file("trace_pipe", 0444, d_tracer, + trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, tr, &tracing_pipe_fops); - trace_create_file("buffer_size_kb", 0644, d_tracer, + trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer, tr, &tracing_entries_fops); - trace_create_file("buffer_total_size_kb", 0444, d_tracer, + trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, tr, &tracing_total_entries_fops); trace_create_file("free_buffer", 0200, d_tracer, @@ -8963,44 +9677,48 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("trace_marker", 0220, d_tracer, tr, &tracing_mark_fops); - file = __find_event_file(tr, "ftrace", "print"); - if (file && file->dir) - trace_create_file("trigger", 0644, file->dir, file, - &event_trigger_fops); - tr->trace_marker_file = file; + tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); trace_create_file("trace_marker_raw", 0220, d_tracer, tr, &tracing_mark_raw_fops); - trace_create_file("trace_clock", 0644, d_tracer, tr, + trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr, &trace_clock_fops); - trace_create_file("tracing_on", 0644, d_tracer, + trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, tr, &rb_simple_fops); - trace_create_file("timestamp_mode", 0444, d_tracer, tr, + trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, &trace_time_stamp_mode_fops); tr->buffer_percent = 50; - trace_create_file("buffer_percent", 0444, d_tracer, + trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, tr, &buffer_percent_fops); + trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, + tr, &buffer_subbuf_size_fops); + create_trace_options_dir(tr); -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE trace_create_maxlat_file(tr, d_tracer); #endif if (ftrace_create_function_files(tr, d_tracer)) MEM_FAIL(1, "Could not allocate function filter files"); + if (tr->range_addr_start) { + trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer, + tr, &last_boot_fops); #ifdef CONFIG_TRACER_SNAPSHOT - trace_create_file("snapshot", 0644, d_tracer, - tr, &snapshot_fops); + } else { + trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, + tr, &snapshot_fops); #endif + } - trace_create_file("error_log", 0644, d_tracer, + trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, tr, &tracing_err_log_fops); for_each_tracing_cpu(cpu) @@ -9058,7 +9776,7 @@ int tracing_init_dentry(void) * As there may still be users that expect the tracing * files to exist in debugfs/tracing, we must automount * the tracefs file system there, so older tools still - * work with the newer kerenl. + * work with the newer kernel. */ tr->dir = debugfs_create_automount("tracing", NULL, trace_automount, NULL); @@ -9071,6 +9789,7 @@ extern struct trace_eval_map *__stop_ftrace_eval_maps[]; static struct workqueue_struct *eval_map_wq __initdata; static struct work_struct eval_map_work __initdata; +static struct work_struct tracerfs_init_work __initdata; static void __init eval_map_work_func(struct work_struct *work) { @@ -9096,6 +9815,8 @@ static int __init trace_eval_init(void) return 0; } +subsys_initcall(trace_eval_init); + static int __init trace_eval_sync(void) { /* Make sure the eval map updates are finished */ @@ -9108,6 +9829,24 @@ late_initcall_sync(trace_eval_sync); #ifdef CONFIG_MODULES + +bool module_exists(const char *module) +{ + /* All modules have the symbol __this_module */ + static const char this_mod[] = "__this_module"; + char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; + unsigned long val; + int n; + + n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); + + if (n > sizeof(modname) - 1) + return false; + + val = module_kallsyms_lookup_name(modname); + return val != 0; +} + static void trace_module_add_evals(struct module *mod) { if (!mod->num_trace_evals) @@ -9132,7 +9871,7 @@ static void trace_module_remove_evals(struct module *mod) if (!mod->num_trace_evals) return; - mutex_lock(&trace_eval_mutex); + guard(mutex)(&trace_eval_mutex); map = trace_eval_maps; @@ -9144,12 +9883,10 @@ static void trace_module_remove_evals(struct module *mod) map = map->tail.next; } if (!map) - goto out; + return; *last = trace_eval_jmp_to_tail(map)->tail.next; kfree(map); - out: - mutex_unlock(&trace_eval_mutex); } #else static inline void trace_module_remove_evals(struct module *mod) { } @@ -9178,38 +9915,29 @@ static struct notifier_block trace_module_nb = { }; #endif /* CONFIG_MODULES */ -static __init int tracer_init_tracefs(void) +static __init void tracer_init_tracefs_work_func(struct work_struct *work) { - int ret; - - trace_access_lock_init(); - - ret = tracing_init_dentry(); - if (ret) - return 0; event_trace_init(); init_tracer_tracefs(&global_trace, NULL); ftrace_init_tracefs_toplevel(&global_trace, NULL); - trace_create_file("tracing_thresh", 0644, NULL, + trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, &global_trace, &tracing_thresh_fops); - trace_create_file("README", 0444, NULL, + trace_create_file("README", TRACE_MODE_READ, NULL, NULL, &tracing_readme_fops); - trace_create_file("saved_cmdlines", 0444, NULL, + trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, NULL, &tracing_saved_cmdlines_fops); - trace_create_file("saved_cmdlines_size", 0644, NULL, + trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, NULL, &tracing_saved_cmdlines_size_fops); - trace_create_file("saved_tgids", 0444, NULL, + trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, NULL, &tracing_saved_tgids_fops); - trace_eval_init(); - trace_create_eval_file(NULL); #ifdef CONFIG_MODULES @@ -9217,52 +9945,74 @@ static __init int tracer_init_tracefs(void) #endif #ifdef CONFIG_DYNAMIC_FTRACE - trace_create_file("dyn_ftrace_total_info", 0444, NULL, + trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, NULL, &tracing_dyn_info_fops); #endif create_trace_instances(NULL); update_tracer_options(&global_trace); - - return 0; } -static int trace_panic_handler(struct notifier_block *this, - unsigned long event, void *unused) +static __init int tracer_init_tracefs(void) { - if (ftrace_dump_on_oops) - ftrace_dump(ftrace_dump_on_oops); - return NOTIFY_OK; -} + int ret; -static struct notifier_block trace_panic_notifier = { - .notifier_call = trace_panic_handler, - .next = NULL, - .priority = 150 /* priority: INT_MAX >= x >= 0 */ -}; + trace_access_lock_init(); -static int trace_die_handler(struct notifier_block *self, - unsigned long val, - void *data) -{ - switch (val) { - case DIE_OOPS: - if (ftrace_dump_on_oops) - ftrace_dump(ftrace_dump_on_oops); - break; - default: - break; + ret = tracing_init_dentry(); + if (ret) + return 0; + + if (eval_map_wq) { + INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); + queue_work(eval_map_wq, &tracerfs_init_work); + } else { + tracer_init_tracefs_work_func(NULL); } - return NOTIFY_OK; + + rv_init_interface(); + + return 0; } +fs_initcall(tracer_init_tracefs); + +static int trace_die_panic_handler(struct notifier_block *self, + unsigned long ev, void *unused); + +static struct notifier_block trace_panic_notifier = { + .notifier_call = trace_die_panic_handler, + .priority = INT_MAX - 1, +}; + static struct notifier_block trace_die_notifier = { - .notifier_call = trace_die_handler, - .priority = 200 + .notifier_call = trace_die_panic_handler, + .priority = INT_MAX - 1, }; /* + * The idea is to execute the following die/panic callback early, in order + * to avoid showing irrelevant information in the trace (like other panic + * notifier functions); we are the 2nd to run, after hung_task/rcu_stall + * warnings get disabled (to prevent potential log flooding). + */ +static int trace_die_panic_handler(struct notifier_block *self, + unsigned long ev, void *unused) +{ + if (!ftrace_dump_on_oops_enabled()) + return NOTIFY_DONE; + + /* The die notifier requires DIE_OOPS to trigger */ + if (self == &trace_die_notifier && ev != DIE_OOPS) + return NOTIFY_DONE; + + ftrace_dump(DUMP_PARAM); + + return NOTIFY_DONE; +} + +/* * printk is set to max of 1024, we really don't need it that big. * Nothing should be printing 1000 characters anyway. */ @@ -9298,12 +10048,12 @@ trace_printk_seq(struct trace_seq *s) trace_seq_init(s); } -void trace_init_global_iter(struct trace_iterator *iter) +static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr) { - iter->tr = &global_trace; + iter->tr = tr; iter->trace = iter->tr->current_trace; iter->cpu_file = RING_BUFFER_ALL_CPUS; - iter->array_buffer = &global_trace.array_buffer; + iter->array_buffer = &tr->array_buffer; if (iter->trace && iter->trace->open) iter->trace->open(iter); @@ -9315,24 +10065,27 @@ void trace_init_global_iter(struct trace_iterator *iter) /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[iter->tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + + /* Can not use kmalloc for iter.temp and iter.fmt */ + iter->temp = static_temp_buf; + iter->temp_size = STATIC_TEMP_BUF_SIZE; + iter->fmt = static_fmt_buf; + iter->fmt_size = STATIC_FMT_BUF_SIZE; } -void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) +void trace_init_global_iter(struct trace_iterator *iter) +{ + trace_init_iter(iter, &global_trace); +} + +static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode) { /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; - static atomic_t dump_running; - struct trace_array *tr = &global_trace; unsigned int old_userobj; unsigned long flags; int cnt = 0, cpu; - /* Only allow one dump user at a time. */ - if (atomic_inc_return(&dump_running) != 1) { - atomic_dec(&dump_running); - return; - } - /* * Always turn off tracing when we dump. * We don't need to show trace output of what happens @@ -9341,16 +10094,12 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) * If the user does a sysrq-z, then they can re-enable * tracing with echo 1 > tracing_on. */ - tracing_off(); + tracer_tracing_off(tr); local_irq_save(flags); - printk_nmi_direct_enter(); /* Simulate the iterator */ - trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp */ - iter.temp = static_temp_buf; - iter.temp_size = STATIC_TEMP_BUF_SIZE; + trace_init_iter(&iter, tr); for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); @@ -9361,21 +10110,15 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* don't look at user memory in panic mode */ tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ; - switch (oops_dump_mode) { - case DUMP_ALL: - iter.cpu_file = RING_BUFFER_ALL_CPUS; - break; - case DUMP_ORIG: + if (dump_mode == DUMP_ORIG) iter.cpu_file = raw_smp_processor_id(); - break; - case DUMP_NONE: - goto out_enable; - default: - printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); + else iter.cpu_file = RING_BUFFER_ALL_CPUS; - } - printk(KERN_TRACE "Dumping ftrace buffer:\n"); + if (tr == &global_trace) + printk(KERN_TRACE "Dumping ftrace buffer:\n"); + else + printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name); /* Did function tracer already get disabled? */ if (ftrace_is_dead()) { @@ -9417,42 +10160,91 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) else printk(KERN_TRACE "---------------------------------\n"); - out_enable: tr->trace_flags |= old_userobj; for_each_tracing_cpu(cpu) { atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } - atomic_dec(&dump_running); - printk_nmi_direct_exit(); local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(ftrace_dump); -int trace_run_command(const char *buf, int (*createfn)(int, char **)) +static void ftrace_dump_by_param(void) { - char **argv; - int argc, ret; + bool first_param = true; + char dump_param[MAX_TRACER_SIZE]; + char *buf, *token, *inst_name; + struct trace_array *tr; - argc = 0; - ret = 0; - argv = argv_split(GFP_KERNEL, buf, &argc); - if (!argv) - return -ENOMEM; + strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE); + buf = dump_param; + + while ((token = strsep(&buf, ",")) != NULL) { + if (first_param) { + first_param = false; + if (!strcmp("0", token)) + continue; + else if (!strcmp("1", token)) { + ftrace_dump_one(&global_trace, DUMP_ALL); + continue; + } + else if (!strcmp("2", token) || + !strcmp("orig_cpu", token)) { + ftrace_dump_one(&global_trace, DUMP_ORIG); + continue; + } + } + + inst_name = strsep(&token, "="); + tr = trace_array_find(inst_name); + if (!tr) { + printk(KERN_TRACE "Instance %s not found\n", inst_name); + continue; + } - if (argc) - ret = createfn(argc, argv); + if (token && (!strcmp("2", token) || + !strcmp("orig_cpu", token))) + ftrace_dump_one(tr, DUMP_ORIG); + else + ftrace_dump_one(tr, DUMP_ALL); + } +} - argv_free(argv); +void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) +{ + static atomic_t dump_running; - return ret; + /* Only allow one dump user at a time. */ + if (atomic_inc_return(&dump_running) != 1) { + atomic_dec(&dump_running); + return; + } + + switch (oops_dump_mode) { + case DUMP_ALL: + ftrace_dump_one(&global_trace, DUMP_ALL); + break; + case DUMP_ORIG: + ftrace_dump_one(&global_trace, DUMP_ORIG); + break; + case DUMP_PARAM: + ftrace_dump_by_param(); + break; + case DUMP_NONE: + break; + default: + printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); + ftrace_dump_one(&global_trace, DUMP_ALL); + } + + atomic_dec(&dump_running); } +EXPORT_SYMBOL_GPL(ftrace_dump); #define WRITE_BUFSIZE 4096 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char **)) + int (*createfn)(const char *)) { char *kbuf, *buf, *tmp; int ret = 0; @@ -9500,7 +10292,7 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, if (tmp) *tmp = '\0'; - ret = trace_run_command(buf, createfn); + ret = createfn(buf); if (ret) goto out; buf += size; @@ -9515,6 +10307,168 @@ out: return ret; } +#ifdef CONFIG_TRACER_MAX_TRACE +__init static bool tr_needs_alloc_snapshot(const char *name) +{ + char *test; + int len = strlen(name); + bool ret; + + if (!boot_snapshot_index) + return false; + + if (strncmp(name, boot_snapshot_info, len) == 0 && + boot_snapshot_info[len] == '\t') + return true; + + test = kmalloc(strlen(name) + 3, GFP_KERNEL); + if (!test) + return false; + + sprintf(test, "\t%s\t", name); + ret = strstr(boot_snapshot_info, test) == NULL; + kfree(test); + return ret; +} + +__init static void do_allocate_snapshot(const char *name) +{ + if (!tr_needs_alloc_snapshot(name)) + return; + + /* + * When allocate_snapshot is set, the next call to + * allocate_trace_buffers() (called by trace_array_get_by_name()) + * will allocate the snapshot buffer. That will alse clear + * this flag. + */ + allocate_snapshot = true; +} +#else +static inline void do_allocate_snapshot(const char *name) { } +#endif + +__init static void enable_instances(void) +{ + struct trace_array *tr; + char *curr_str; + char *name; + char *str; + char *tok; + + /* A tab is always appended */ + boot_instance_info[boot_instance_index - 1] = '\0'; + str = boot_instance_info; + + while ((curr_str = strsep(&str, "\t"))) { + phys_addr_t start = 0; + phys_addr_t size = 0; + unsigned long addr = 0; + bool traceprintk = false; + bool traceoff = false; + char *flag_delim; + char *addr_delim; + + tok = strsep(&curr_str, ","); + + flag_delim = strchr(tok, '^'); + addr_delim = strchr(tok, '@'); + + if (addr_delim) + *addr_delim++ = '\0'; + + if (flag_delim) + *flag_delim++ = '\0'; + + name = tok; + + if (flag_delim) { + char *flag; + + while ((flag = strsep(&flag_delim, "^"))) { + if (strcmp(flag, "traceoff") == 0) { + traceoff = true; + } else if ((strcmp(flag, "printk") == 0) || + (strcmp(flag, "traceprintk") == 0) || + (strcmp(flag, "trace_printk") == 0)) { + traceprintk = true; + } else { + pr_info("Tracing: Invalid instance flag '%s' for %s\n", + flag, name); + } + } + } + + tok = addr_delim; + if (tok && isdigit(*tok)) { + start = memparse(tok, &tok); + if (!start) { + pr_warn("Tracing: Invalid boot instance address for %s\n", + name); + continue; + } + if (*tok != ':') { + pr_warn("Tracing: No size specified for instance %s\n", name); + continue; + } + tok++; + size = memparse(tok, &tok); + if (!size) { + pr_warn("Tracing: Invalid boot instance size for %s\n", + name); + continue; + } + } else if (tok) { + if (!reserve_mem_find_by_name(tok, &start, &size)) { + start = 0; + pr_warn("Failed to map boot instance %s to %s\n", name, tok); + continue; + } + } + + if (start) { + addr = map_pages(start, size); + if (addr) { + pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", + name, &start, (unsigned long)size); + } else { + pr_warn("Tracing: Failed to map boot instance %s\n", name); + continue; + } + } else { + /* Only non mapped buffers have snapshot buffers */ + if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) + do_allocate_snapshot(name); + } + + tr = trace_array_create_systems(name, NULL, addr, size); + if (IS_ERR(tr)) { + pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); + continue; + } + + if (traceoff) + tracer_tracing_off(tr); + + if (traceprintk) + update_printk_trace(tr); + + /* + * If start is set, then this is a mapped buffer, and + * cannot be deleted by user space, so keep the reference + * to it. + */ + if (start) { + tr->flags |= TRACE_ARRAY_FL_BOOT; + tr->ref++; + } + + while ((tok = strsep(&curr_str, ","))) { + early_enable_events(tr, tok, true); + } + } +} + __init static int tracer_alloc_buffers(void) { int ring_buf_size; @@ -9544,7 +10498,7 @@ __init static int tracer_alloc_buffers(void) trace_printk_init_buffers(); /* To save memory, keep the ring buffer size to its minimum */ - if (ring_buffer_expanded) + if (global_trace.ring_buffer_expanded) ring_buf_size = trace_buf_size; else ring_buf_size = 1; @@ -9561,7 +10515,7 @@ __init static int tracer_alloc_buffers(void) * buffer. The memory will be removed once the "instance" is removed. */ ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, - "trace/RB:preapre", trace_rb_cpu_prepare, + "trace/RB:prepare", trace_rb_cpu_prepare, NULL); if (ret < 0) goto out_free_cpumask; @@ -9574,12 +10528,14 @@ __init static int tracer_alloc_buffers(void) if (trace_create_savedcmd() < 0) goto out_free_temp_buffer; + if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) + goto out_free_savedcmd; + /* TODO: make the number of buffers hot pluggable with CPUS */ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); - goto out_free_savedcmd; + goto out_free_pipe_cpumask; } - if (global_trace.buffer_disabled) tracing_off(); @@ -9598,9 +10554,15 @@ __init static int tracer_alloc_buffers(void) global_trace.current_trace = &nop_trace; global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; - +#ifdef CONFIG_TRACER_MAX_TRACE + spin_lock_init(&global_trace.snapshot_trigger_lock); +#endif ftrace_init_global_array_ops(&global_trace); +#ifdef CONFIG_MODULES + INIT_LIST_HEAD(&global_trace.mod_events); +#endif + init_trace_flags_index(&global_trace); register_tracer(&nop_trace); @@ -9630,8 +10592,10 @@ __init static int tracer_alloc_buffers(void) return 0; +out_free_pipe_cpumask: + free_cpumask_var(global_trace.pipe_cpumask); out_free_savedcmd: - free_saved_cmdlines_buffer(savedcmd); + trace_free_saved_cmdlines_buffer(); out_free_temp_buffer: ring_buffer_free(temp_buffer); out_rm_hp_state: @@ -9644,11 +10608,37 @@ out: return ret; } +#ifdef CONFIG_FUNCTION_TRACER +/* Used to set module cached ftrace filtering at boot up */ +__init struct trace_array *trace_get_global_array(void) +{ + return &global_trace; +} +#endif + +void __init ftrace_boot_snapshot(void) +{ +#ifdef CONFIG_TRACER_MAX_TRACE + struct trace_array *tr; + + if (!snapshot_at_boot) + return; + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (!tr->allocated_snapshot) + continue; + + tracing_snapshot_instance(tr); + trace_array_puts(tr, "** Boot snapshot taken **\n"); + } +#endif +} + void __init early_trace_init(void) { if (tracepoint_printk) { tracepoint_print_iter = - kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); + kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); if (MEM_FAIL(!tracepoint_print_iter, "Failed to allocate trace iterator\n")) tracepoint_printk = 0; @@ -9656,14 +10646,19 @@ void __init early_trace_init(void) static_key_enable(&tracepoint_printk_key.key); } tracer_alloc_buffers(); + + init_events(); } void __init trace_init(void) { trace_event_init(); + + if (boot_instance_index) + enable_instances(); } -__init static int clear_boot_tracer(void) +__init static void clear_boot_tracer(void) { /* * The default tracer at boot buffer is an init section. @@ -9673,26 +10668,21 @@ __init static int clear_boot_tracer(void) * about to be freed. */ if (!default_bootup_tracer) - return 0; + return; printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", default_bootup_tracer); default_bootup_tracer = NULL; - - return 0; } -fs_initcall(tracer_init_tracefs); -late_initcall_sync(clear_boot_tracer); - #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -__init static int tracing_set_default_clock(void) +__init static void tracing_set_default_clock(void) { /* sched_clock_stable() is determined in late_initcall */ if (!trace_boot_clock && !sched_clock_stable()) { if (security_locked_down(LOCKDOWN_TRACEFS)) { pr_warn("Can not set tracing clock due to lockdown\n"); - return -EPERM; + return; } printk(KERN_WARNING @@ -9702,8 +10692,21 @@ __init static int tracing_set_default_clock(void) "on the kernel command line\n"); tracing_set_clock(&global_trace, "global"); } +} +#else +static inline void tracing_set_default_clock(void) { } +#endif +__init static int late_trace_init(void) +{ + if (tracepoint_printk && tracepoint_printk_stop_on_boot) { + static_key_disable(&tracepoint_printk_key.key); + tracepoint_printk = 0; + } + + tracing_set_default_clock(); + clear_boot_tracer(); return 0; } -late_initcall_sync(tracing_set_default_clock); -#endif + +late_initcall_sync(late_trace_init); |