diff options
Diffstat (limited to 'kernel/trace/trace_events.c')
| -rw-r--r-- | kernel/trace/trace_events.c | 3859 |
1 files changed, 2997 insertions, 862 deletions
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7d854290bf81..b16a5a158040 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * event tracer * @@ -8,16 +9,23 @@ * */ +#define pr_fmt(fmt) fmt + #include <linux/workqueue.h> +#include <linux/security.h> #include <linux/spinlock.h> #include <linux/kthread.h> -#include <linux/debugfs.h> +#include <linux/tracefs.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> +#include <linux/sort.h> #include <linux/slab.h> #include <linux/delay.h> +#include <trace/events/sched.h> +#include <trace/syscall.h> + #include <asm/setup.h> #include "trace_output.h" @@ -27,35 +35,37 @@ DEFINE_MUTEX(event_mutex); -DEFINE_MUTEX(event_storage_mutex); -EXPORT_SYMBOL_GPL(event_storage_mutex); - -char event_storage[EVENT_STORAGE_SIZE]; -EXPORT_SYMBOL_GPL(event_storage); - LIST_HEAD(ftrace_events); +static LIST_HEAD(ftrace_generic_fields); static LIST_HEAD(ftrace_common_fields); +static bool eventdir_initialized; + +static LIST_HEAD(module_strings); + +struct module_string { + struct list_head next; + struct module *module; + char *str; +}; #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; -#define SYSTEM_FL_FREE_NAME (1 << 31) - static inline int system_refcount(struct event_subsystem *system) { - return system->ref_count & ~SYSTEM_FL_FREE_NAME; + return system->ref_count; } static int system_refcount_inc(struct event_subsystem *system) { - return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; + return system->ref_count++; } static int system_refcount_dec(struct event_subsystem *system) { - return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; + return --system->ref_count; } /* Double loops, do not use break, only goto's work */ @@ -65,22 +75,14 @@ static int system_refcount_dec(struct event_subsystem *system) #define do_for_each_event_file_safe(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ - struct ftrace_event_file *___n; \ + struct trace_event_file *___n; \ list_for_each_entry_safe(file, ___n, &tr->events, list) #define while_for_each_event_file() \ } -static struct list_head * -trace_get_fields(struct ftrace_event_call *event_call) -{ - if (!event_call->class->get_fields) - return &event_call->class->fields; - return event_call->class->get_fields(event_call); -} - static struct ftrace_event_field * -__find_event_field(struct list_head *head, char *name) +__find_event_field(struct list_head *head, const char *name) { struct ftrace_event_field *field; @@ -93,22 +95,27 @@ __find_event_field(struct list_head *head, char *name) } struct ftrace_event_field * -trace_find_event_field(struct ftrace_event_call *call, char *name) +trace_find_event_field(struct trace_event_call *call, char *name) { struct ftrace_event_field *field; struct list_head *head; - field = __find_event_field(&ftrace_common_fields, name); + head = trace_get_fields(call); + field = __find_event_field(head, name); if (field) return field; - head = trace_get_fields(call); - return __find_event_field(head, name); + field = __find_event_field(&ftrace_generic_fields, name); + if (field) + return field; + + return __find_event_field(&ftrace_common_fields, name); } static int __trace_define_field(struct list_head *head, const char *type, const char *name, int offset, int size, - int is_signed, int filter_type) + int is_signed, int filter_type, int len, + int need_test) { struct ftrace_event_field *field; @@ -127,13 +134,15 @@ static int __trace_define_field(struct list_head *head, const char *type, field->offset = offset; field->size = size; field->is_signed = is_signed; + field->needs_test = need_test; + field->len = len; list_add(&field->link, head); return 0; } -int trace_define_field(struct ftrace_event_call *call, const char *type, +int trace_define_field(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type) { @@ -144,19 +153,56 @@ int trace_define_field(struct ftrace_event_call *call, const char *type, head = trace_get_fields(call); return __trace_define_field(head, type, name, offset, size, - is_signed, filter_type); + is_signed, filter_type, 0, 0); } EXPORT_SYMBOL_GPL(trace_define_field); +static int trace_define_field_ext(struct trace_event_call *call, const char *type, + const char *name, int offset, int size, int is_signed, + int filter_type, int len, int need_test) +{ + struct list_head *head; + + if (WARN_ON(!call->class)) + return 0; + + head = trace_get_fields(call); + return __trace_define_field(head, type, name, offset, size, + is_signed, filter_type, len, need_test); +} + +#define __generic_field(type, item, filter_type) \ + ret = __trace_define_field(&ftrace_generic_fields, #type, \ + #item, 0, 0, is_signed_type(type), \ + filter_type, 0, 0); \ + if (ret) \ + return ret; + #define __common_field(type, item) \ ret = __trace_define_field(&ftrace_common_fields, #type, \ "common_" #item, \ offsetof(typeof(ent), item), \ sizeof(ent.item), \ - is_signed_type(type), FILTER_OTHER); \ + is_signed_type(type), FILTER_OTHER, \ + 0, 0); \ if (ret) \ return ret; +static int trace_define_generic_fields(void) +{ + int ret; + + __generic_field(int, CPU, FILTER_CPU); + __generic_field(int, cpu, FILTER_CPU); + __generic_field(int, common_cpu, FILTER_CPU); + __generic_field(char *, COMM, FILTER_COMM); + __generic_field(char *, comm, FILTER_COMM); + __generic_field(char *, stacktrace, FILTER_STACKTRACE); + __generic_field(char *, STACKTRACE, FILTER_STACKTRACE); + + return ret; +} + static int trace_define_common_fields(void) { int ret; @@ -164,13 +210,14 @@ static int trace_define_common_fields(void) __common_field(unsigned short, type); __common_field(unsigned char, flags); + /* Holds both preempt_count and migrate_disable */ __common_field(unsigned char, preempt_count); __common_field(int, pid); return ret; } -static void trace_destroy_fields(struct ftrace_event_call *call) +static void trace_destroy_fields(struct trace_event_call *call) { struct ftrace_event_field *field, *next; struct list_head *head; @@ -182,41 +229,482 @@ static void trace_destroy_fields(struct ftrace_event_call *call) } } -int trace_event_raw_init(struct ftrace_event_call *call) +/* + * run-time version of trace_event_get_offsets_<call>() that returns the last + * accessible offset of trace fields excluding __dynamic_array bytes + */ +int trace_event_get_offsets(struct trace_event_call *call) +{ + struct ftrace_event_field *tail; + struct list_head *head; + + head = trace_get_fields(call); + /* + * head->next points to the last field with the largest offset, + * since it was added last by trace_define_field() + */ + tail = list_first_entry(head, struct ftrace_event_field, link); + return tail->offset + tail->size; +} + + +static struct trace_event_fields *find_event_field(const char *fmt, + struct trace_event_call *call) +{ + struct trace_event_fields *field = call->class->fields_array; + const char *p = fmt; + int len; + + if (!(len = str_has_prefix(fmt, "REC->"))) + return NULL; + fmt += len; + for (p = fmt; *p; p++) { + if (!isalnum(*p) && *p != '_') + break; + } + len = p - fmt; + + for (; field->type; field++) { + if (strncmp(field->name, fmt, len) || field->name[len]) + continue; + + return field; + } + return NULL; +} + +/* + * Check if the referenced field is an array and return true, + * as arrays are OK to dereference. + */ +static bool test_field(const char *fmt, struct trace_event_call *call) +{ + struct trace_event_fields *field; + + field = find_event_field(fmt, call); + if (!field) + return false; + + /* This is an array and is OK to dereference. */ + return strchr(field->type, '[') != NULL; +} + +/* Look for a string within an argument */ +static bool find_print_string(const char *arg, const char *str, const char *end) +{ + const char *r; + + r = strstr(arg, str); + return r && r < end; +} + +/* Return true if the argument pointer is safe */ +static bool process_pointer(const char *fmt, int len, struct trace_event_call *call) +{ + const char *r, *e, *a; + + e = fmt + len; + + /* Find the REC-> in the argument */ + r = strstr(fmt, "REC->"); + if (r && r < e) { + /* + * Addresses of events on the buffer, or an array on the buffer is + * OK to dereference. There's ways to fool this, but + * this is to catch common mistakes, not malicious code. + */ + a = strchr(fmt, '&'); + if ((a && (a < r)) || test_field(r, call)) + return true; + } else if (find_print_string(fmt, "__get_dynamic_array(", e)) { + return true; + } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) { + return true; + } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) { + return true; + } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) { + return true; + } else if (find_print_string(fmt, "__get_sockaddr(", e)) { + return true; + } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) { + return true; + } + return false; +} + +/* Return true if the string is safe */ +static bool process_string(const char *fmt, int len, struct trace_event_call *call) +{ + struct trace_event_fields *field; + const char *r, *e, *s; + + e = fmt + len; + + /* + * There are several helper functions that return strings. + * If the argument contains a function, then assume its field is valid. + * It is considered that the argument has a function if it has: + * alphanumeric or '_' before a parenthesis. + */ + s = fmt; + do { + r = strstr(s, "("); + if (!r || r >= e) + break; + for (int i = 1; r - i >= s; i++) { + char ch = *(r - i); + if (isspace(ch)) + continue; + if (isalnum(ch) || ch == '_') + return true; + /* Anything else, this isn't a function */ + break; + } + /* A function could be wrapped in parenthesis, try the next one */ + s = r + 1; + } while (s < e); + + /* + * Check for arrays. If the argument has: foo[REC->val] + * then it is very likely that foo is an array of strings + * that are safe to use. + */ + r = strstr(s, "["); + if (r && r < e) { + r = strstr(r, "REC->"); + if (r && r < e) + return true; + } + + /* + * If there's any strings in the argument consider this arg OK as it + * could be: REC->field ? "foo" : "bar" and we don't want to get into + * verifying that logic here. + */ + if (find_print_string(fmt, "\"", e)) + return true; + + /* Dereferenced strings are also valid like any other pointer */ + if (process_pointer(fmt, len, call)) + return true; + + /* Make sure the field is found */ + field = find_event_field(fmt, call); + if (!field) + return false; + + /* Test this field's string before printing the event */ + call->flags |= TRACE_EVENT_FL_TEST_STR; + field->needs_test = 1; + + return true; +} + +static void handle_dereference_arg(const char *arg_str, u64 string_flags, int len, + u64 *dereference_flags, int arg, + struct trace_event_call *call) +{ + if (string_flags & (1ULL << arg)) { + if (process_string(arg_str, len, call)) + *dereference_flags &= ~(1ULL << arg); + } else if (process_pointer(arg_str, len, call)) + *dereference_flags &= ~(1ULL << arg); + else + pr_warn("TRACE EVENT ERROR: Bad dereference argument: '%.*s'\n", + len, arg_str); +} + +/* + * Examine the print fmt of the event looking for unsafe dereference + * pointers using %p* that could be recorded in the trace event and + * much later referenced after the pointer was freed. Dereferencing + * pointers are OK, if it is dereferenced into the event itself. + */ +static void test_event_printk(struct trace_event_call *call) +{ + u64 dereference_flags = 0; + u64 string_flags = 0; + bool first = true; + const char *fmt; + int parens = 0; + char in_quote = 0; + int start_arg = 0; + int arg = 0; + int i, e; + + fmt = call->print_fmt; + + if (!fmt) + return; + + for (i = 0; fmt[i]; i++) { + switch (fmt[i]) { + case '\\': + i++; + if (!fmt[i]) + return; + continue; + case '"': + case '\'': + /* + * The print fmt starts with a string that + * is processed first to find %p* usage, + * then after the first string, the print fmt + * contains arguments that are used to check + * if the dereferenced %p* usage is safe. + */ + if (first) { + if (fmt[i] == '\'') + continue; + if (in_quote) { + arg = 0; + first = false; + /* + * If there was no %p* uses + * the fmt is OK. + */ + if (!dereference_flags) + return; + } + } + if (in_quote) { + if (in_quote == fmt[i]) + in_quote = 0; + } else { + in_quote = fmt[i]; + } + continue; + case '%': + if (!first || !in_quote) + continue; + i++; + if (!fmt[i]) + return; + switch (fmt[i]) { + case '%': + continue; + case 'p': + do_pointer: + /* Find dereferencing fields */ + switch (fmt[i + 1]) { + case 'B': case 'R': case 'r': + case 'b': case 'M': case 'm': + case 'I': case 'i': case 'E': + case 'U': case 'V': case 'N': + case 'a': case 'd': case 'D': + case 'g': case 't': case 'C': + case 'O': case 'f': + if (WARN_ONCE(arg == 63, + "Too many args for event: %s", + trace_event_name(call))) + return; + dereference_flags |= 1ULL << arg; + } + break; + default: + { + bool star = false; + int j; + + /* Increment arg if %*s exists. */ + for (j = 0; fmt[i + j]; j++) { + if (isdigit(fmt[i + j]) || + fmt[i + j] == '.') + continue; + if (fmt[i + j] == '*') { + star = true; + /* Handle %*pbl case */ + if (!j && fmt[i + 1] == 'p') { + arg++; + i++; + goto do_pointer; + } + continue; + } + if ((fmt[i + j] == 's')) { + if (star) + arg++; + if (WARN_ONCE(arg == 63, + "Too many args for event: %s", + trace_event_name(call))) + return; + dereference_flags |= 1ULL << arg; + string_flags |= 1ULL << arg; + } + break; + } + break; + } /* default */ + + } /* switch */ + arg++; + continue; + case '(': + if (in_quote) + continue; + parens++; + continue; + case ')': + if (in_quote) + continue; + parens--; + if (WARN_ONCE(parens < 0, + "Paren mismatch for event: %s\narg='%s'\n%*s", + trace_event_name(call), + fmt + start_arg, + (i - start_arg) + 5, "^")) + return; + continue; + case ',': + if (in_quote || parens) + continue; + e = i; + i++; + while (isspace(fmt[i])) + i++; + + /* + * If start_arg is zero, then this is the start of the + * first argument. The processing of the argument happens + * when the end of the argument is found, as it needs to + * handle parenthesis and such. + */ + if (!start_arg) { + start_arg = i; + /* Balance out the i++ in the for loop */ + i--; + continue; + } + + if (dereference_flags & (1ULL << arg)) { + handle_dereference_arg(fmt + start_arg, string_flags, + e - start_arg, + &dereference_flags, arg, call); + } + + start_arg = i; + arg++; + /* Balance out the i++ in the for loop */ + i--; + } + } + + if (dereference_flags & (1ULL << arg)) { + handle_dereference_arg(fmt + start_arg, string_flags, + i - start_arg, + &dereference_flags, arg, call); + } + + /* + * If you triggered the below warning, the trace event reported + * uses an unsafe dereference pointer %p*. As the data stored + * at the trace event time may no longer exist when the trace + * event is printed, dereferencing to the original source is + * unsafe. The source of the dereference must be copied into the + * event itself, and the dereference must access the copy instead. + */ + if (WARN_ON_ONCE(dereference_flags)) { + arg = 1; + while (!(dereference_flags & 1)) { + dereference_flags >>= 1; + arg++; + } + pr_warn("event %s has unsafe dereference of argument %d\n", + trace_event_name(call), arg); + pr_warn("print_fmt: %s\n", fmt); + } +} + +int trace_event_raw_init(struct trace_event_call *call) { int id; - id = register_ftrace_event(&call->event); + id = register_trace_event(&call->event); if (!id) return -ENODEV; + test_event_printk(call); + return 0; } EXPORT_SYMBOL_GPL(trace_event_raw_init); -int ftrace_event_reg(struct ftrace_event_call *call, - enum trace_reg type, void *data) +bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) { - struct ftrace_event_file *file = data; + struct trace_array *tr = trace_file->tr; + struct trace_pid_list *no_pid_list; + struct trace_pid_list *pid_list; + + pid_list = rcu_dereference_raw(tr->filtered_pids); + no_pid_list = rcu_dereference_raw(tr->filtered_no_pids); + + if (!pid_list && !no_pid_list) + return false; + + /* + * This is recorded at every sched_switch for this task. + * Thus, even if the task migrates the ignore value will be the same. + */ + return this_cpu_read(tr->array_buffer.data->ignore_pid) != 0; +} +EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); +void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, + struct trace_event_file *trace_file, + unsigned long len) +{ + struct trace_event_call *event_call = trace_file->event_call; + + if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) && + trace_event_ignore_this_pid(trace_file)) + return NULL; + + /* + * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables + * preemption (adding one to the preempt_count). Since we are + * interested in the preempt_count at the time the tracepoint was + * hit, we need to subtract one to offset the increment. + */ + fbuffer->trace_ctx = tracing_gen_ctx_dec(); + fbuffer->trace_file = trace_file; + + fbuffer->event = + trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, + event_call->event.type, len, + fbuffer->trace_ctx); + if (!fbuffer->event) + return NULL; + + fbuffer->regs = NULL; + fbuffer->entry = ring_buffer_event_data(fbuffer->event); + return fbuffer->entry; +} +EXPORT_SYMBOL_GPL(trace_event_buffer_reserve); + +int trace_event_reg(struct trace_event_call *call, + enum trace_reg type, void *data) +{ + struct trace_event_file *file = data; + + WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); switch (type) { case TRACE_REG_REGISTER: - return tracepoint_probe_register(call->name, + return tracepoint_probe_register(call->tp, call->class->probe, file); case TRACE_REG_UNREGISTER: - tracepoint_probe_unregister(call->name, + tracepoint_probe_unregister(call->tp, call->class->probe, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return tracepoint_probe_register(call->name, + return tracepoint_probe_register(call->tp, call->class->perf_probe, call); case TRACE_REG_PERF_UNREGISTER: - tracepoint_probe_unregister(call->name, + tracepoint_probe_unregister(call->tp, call->class->perf_probe, call); return 0; @@ -229,34 +717,58 @@ int ftrace_event_reg(struct ftrace_event_call *call, } return 0; } -EXPORT_SYMBOL_GPL(ftrace_event_reg); +EXPORT_SYMBOL_GPL(trace_event_reg); void trace_event_enable_cmd_record(bool enable) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); + do_for_each_event_file(tr, file) { - if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) + if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; if (enable) { tracing_start_cmdline_record(); - set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } else { tracing_stop_cmdline_record(); - clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } } while_for_each_event_file(); - mutex_unlock(&event_mutex); } -static int __ftrace_event_enable_disable(struct ftrace_event_file *file, +void trace_event_enable_tgid_record(bool enable) +{ + struct trace_event_file *file; + struct trace_array *tr; + + lockdep_assert_held(&event_mutex); + + do_for_each_event_file(tr, file) { + if (!(file->flags & EVENT_FILE_FL_ENABLED)) + continue; + + if (enable) { + tracing_start_tgid_record(); + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); + } else { + tracing_stop_tgid_record(); + clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, + &file->flags); + } + } while_for_each_event_file(); +} + +static int __ftrace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; + struct trace_array *tr = file->tr; + bool soft_mode = atomic_read(&file->sm_ref) != 0; int ret = 0; int disable; @@ -271,70 +783,94 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, * is set we do not want the event to be enabled before we * clear the bit. * - * When soft_disable is not set but the SOFT_MODE flag is, + * When soft_disable is not set but the soft_mode is, * we do nothing. Do not disable the tracepoint, otherwise - * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. + * "soft enable"s (clearing the SOFT_DISABLED bit) won't work. */ if (soft_disable) { if (atomic_dec_return(&file->sm_ref) > 0) break; - disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED; - clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); + disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; + soft_mode = false; + /* Disable use of trace_buffered_event */ + trace_buffered_event_disable(); } else - disable = !(file->flags & FTRACE_EVENT_FL_SOFT_MODE); + disable = !soft_mode; - if (disable && (file->flags & FTRACE_EVENT_FL_ENABLED)) { - clear_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); - if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) { + if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { + clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); + if (file->flags & EVENT_FILE_FL_RECORDED_CMD) { tracing_stop_cmdline_record(); - clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } - call->class->reg(call, TRACE_REG_UNREGISTER, file); + + if (file->flags & EVENT_FILE_FL_RECORDED_TGID) { + tracing_stop_tgid_record(); + clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); + } + + ret = call->class->reg(call, TRACE_REG_UNREGISTER, file); + + WARN_ON_ONCE(ret); } - /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ - if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + /* If in soft mode, just set the SOFT_DISABLE_BIT, else clear it */ + if (soft_mode) + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); break; case 1: /* * When soft_disable is set and enable is set, we want to * register the tracepoint for the event, but leave the event * as is. That means, if the event was already enabled, we do - * nothing (but set SOFT_MODE). If the event is disabled, we + * nothing (but set soft_mode). If the event is disabled, we * set SOFT_DISABLED before enabling the event tracepoint, so * it still seems to be disabled. */ if (!soft_disable) - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else { if (atomic_inc_return(&file->sm_ref) > 1) break; - set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); + soft_mode = true; + /* Enable use of trace_buffered_event */ + trace_buffered_event_enable(); } - if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) { + if (!(file->flags & EVENT_FILE_FL_ENABLED)) { + bool cmd = false, tgid = false; - /* Keep the event disabled, when going to SOFT_MODE. */ + /* Keep the event disabled, when going to soft mode. */ if (soft_disable) - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); - if (trace_flags & TRACE_ITER_RECORD_CMD) { + if (tr->trace_flags & TRACE_ITER(RECORD_CMD)) { + cmd = true; tracing_start_cmdline_record(); - set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); + } + + if (tr->trace_flags & TRACE_ITER(RECORD_TGID)) { + tgid = true; + tracing_start_tgid_record(); + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } + ret = call->class->reg(call, TRACE_REG_REGISTER, file); if (ret) { - tracing_stop_cmdline_record(); + if (cmd) + tracing_stop_cmdline_record(); + if (tgid) + tracing_stop_tgid_record(); pr_info("event trace: Could not enable event " - "%s\n", call->name); + "%s\n", trace_event_name(call)); break; } - set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); + set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); /* WAS_ENABLED gets set but never cleared. */ - call->flags |= TRACE_EVENT_FL_WAS_ENABLED; + set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags); } break; } @@ -342,20 +878,329 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, return ret; } -static int ftrace_event_enable_disable(struct ftrace_event_file *file, +int trace_event_enable_disable(struct trace_event_file *file, + int enable, int soft_disable) +{ + return __ftrace_event_enable_disable(file, enable, soft_disable); +} + +static int ftrace_event_enable_disable(struct trace_event_file *file, int enable) { return __ftrace_event_enable_disable(file, enable, 0); } +#ifdef CONFIG_MODULES +struct event_mod_load { + struct list_head list; + char *module; + char *match; + char *system; + char *event; +}; + +static void free_event_mod(struct event_mod_load *event_mod) +{ + list_del(&event_mod->list); + kfree(event_mod->module); + kfree(event_mod->match); + kfree(event_mod->system); + kfree(event_mod->event); + kfree(event_mod); +} + +static void clear_mod_events(struct trace_array *tr) +{ + struct event_mod_load *event_mod, *n; + + list_for_each_entry_safe(event_mod, n, &tr->mod_events, list) { + free_event_mod(event_mod); + } +} + +static int remove_cache_mod(struct trace_array *tr, const char *mod, + const char *match, const char *system, const char *event) +{ + struct event_mod_load *event_mod, *n; + int ret = -EINVAL; + + list_for_each_entry_safe(event_mod, n, &tr->mod_events, list) { + if (strcmp(event_mod->module, mod) != 0) + continue; + + if (match && strcmp(event_mod->match, match) != 0) + continue; + + if (system && + (!event_mod->system || strcmp(event_mod->system, system) != 0)) + continue; + + if (event && + (!event_mod->event || strcmp(event_mod->event, event) != 0)) + continue; + + free_event_mod(event_mod); + ret = 0; + } + + return ret; +} + +static int cache_mod(struct trace_array *tr, const char *mod, int set, + const char *match, const char *system, const char *event) +{ + struct event_mod_load *event_mod; + + /* If the module exists, then this just failed to find an event */ + if (module_exists(mod)) + return -EINVAL; + + /* See if this is to remove a cached filter */ + if (!set) + return remove_cache_mod(tr, mod, match, system, event); + + event_mod = kzalloc(sizeof(*event_mod), GFP_KERNEL); + if (!event_mod) + return -ENOMEM; + + INIT_LIST_HEAD(&event_mod->list); + event_mod->module = kstrdup(mod, GFP_KERNEL); + if (!event_mod->module) + goto out_free; + + if (match) { + event_mod->match = kstrdup(match, GFP_KERNEL); + if (!event_mod->match) + goto out_free; + } + + if (system) { + event_mod->system = kstrdup(system, GFP_KERNEL); + if (!event_mod->system) + goto out_free; + } + + if (event) { + event_mod->event = kstrdup(event, GFP_KERNEL); + if (!event_mod->event) + goto out_free; + } + + list_add(&event_mod->list, &tr->mod_events); + + return 0; + + out_free: + free_event_mod(event_mod); + + return -ENOMEM; +} +#else /* CONFIG_MODULES */ +static inline void clear_mod_events(struct trace_array *tr) { } +static int cache_mod(struct trace_array *tr, const char *mod, int set, + const char *match, const char *system, const char *event) +{ + return -EINVAL; +} +#endif + static void ftrace_clear_events(struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { ftrace_event_enable_disable(file, 0); } + clear_mod_events(tr); + mutex_unlock(&event_mutex); +} + +static void +event_filter_pid_sched_process_exit(void *data, struct task_struct *task) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = data; + + pid_list = rcu_dereference_raw(tr->filtered_pids); + trace_filter_add_remove_task(pid_list, NULL, task); + + pid_list = rcu_dereference_raw(tr->filtered_no_pids); + trace_filter_add_remove_task(pid_list, NULL, task); +} + +static void +event_filter_pid_sched_process_fork(void *data, + struct task_struct *self, + struct task_struct *task) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = data; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + trace_filter_add_remove_task(pid_list, self, task); + + pid_list = rcu_dereference_sched(tr->filtered_no_pids); + trace_filter_add_remove_task(pid_list, self, task); +} + +void trace_event_follow_fork(struct trace_array *tr, bool enable) +{ + if (enable) { + register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork, + tr, INT_MIN); + register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit, + tr, INT_MAX); + } else { + unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork, + tr); + unregister_trace_sched_process_free(event_filter_pid_sched_process_exit, + tr); + } +} + +static void +event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, + struct task_struct *prev, + struct task_struct *next, + unsigned int prev_state) +{ + struct trace_array *tr = data; + struct trace_pid_list *no_pid_list; + struct trace_pid_list *pid_list; + bool ret; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); + + /* + * Sched switch is funny, as we only want to ignore it + * in the notrace case if both prev and next should be ignored. + */ + ret = trace_ignore_this_task(NULL, no_pid_list, prev) && + trace_ignore_this_task(NULL, no_pid_list, next); + + this_cpu_write(tr->array_buffer.data->ignore_pid, ret || + (trace_ignore_this_task(pid_list, NULL, prev) && + trace_ignore_this_task(pid_list, NULL, next))); +} + +static void +event_filter_pid_sched_switch_probe_post(void *data, bool preempt, + struct task_struct *prev, + struct task_struct *next, + unsigned int prev_state) +{ + struct trace_array *tr = data; + struct trace_pid_list *no_pid_list; + struct trace_pid_list *pid_list; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); + + this_cpu_write(tr->array_buffer.data->ignore_pid, + trace_ignore_this_task(pid_list, no_pid_list, next)); +} + +static void +event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) +{ + struct trace_array *tr = data; + struct trace_pid_list *no_pid_list; + struct trace_pid_list *pid_list; + + /* Nothing to do if we are already tracing */ + if (!this_cpu_read(tr->array_buffer.data->ignore_pid)) + return; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); + + this_cpu_write(tr->array_buffer.data->ignore_pid, + trace_ignore_this_task(pid_list, no_pid_list, task)); +} + +static void +event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) +{ + struct trace_array *tr = data; + struct trace_pid_list *no_pid_list; + struct trace_pid_list *pid_list; + + /* Nothing to do if we are not tracing */ + if (this_cpu_read(tr->array_buffer.data->ignore_pid)) + return; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); + + /* Set tracing if current is enabled */ + this_cpu_write(tr->array_buffer.data->ignore_pid, + trace_ignore_this_task(pid_list, no_pid_list, current)); +} + +static void unregister_pid_events(struct trace_array *tr) +{ + unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr); + unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr); + + unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); + unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); + + unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr); + unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr); + + unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr); + unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr); +} + +static void __ftrace_clear_event_pids(struct trace_array *tr, int type) +{ + struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; + struct trace_event_file *file; + int cpu; + + pid_list = rcu_dereference_protected(tr->filtered_pids, + lockdep_is_held(&event_mutex)); + no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, + lockdep_is_held(&event_mutex)); + + /* Make sure there's something to do */ + if (!pid_type_enabled(type, pid_list, no_pid_list)) + return; + + if (!still_need_pid_events(type, pid_list, no_pid_list)) { + unregister_pid_events(tr); + + list_for_each_entry(file, &tr->events, list) { + clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); + } + + for_each_possible_cpu(cpu) + per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false; + } + + if (type & TRACE_PIDS) + rcu_assign_pointer(tr->filtered_pids, NULL); + + if (type & TRACE_NO_PIDS) + rcu_assign_pointer(tr->filtered_no_pids, NULL); + + /* Wait till all users are no longer using pid filtering */ + tracepoint_synchronize_unregister(); + + if ((type & TRACE_PIDS) && pid_list) + trace_pid_list_free(pid_list); + + if ((type & TRACE_NO_PIDS) && no_pid_list) + trace_pid_list_free(no_pid_list); +} + +static void ftrace_clear_event_pids(struct trace_array *tr, int type) +{ + mutex_lock(&event_mutex); + __ftrace_clear_event_pids(tr, type); mutex_unlock(&event_mutex); } @@ -373,8 +1218,7 @@ static void __put_system(struct event_subsystem *system) kfree(filter->filter_string); kfree(filter); } - if (system->ref_count & SYSTEM_FL_FREE_NAME) - kfree(system->name); + kfree_const(system->name); kfree(system); } @@ -384,14 +1228,14 @@ static void __get_system(struct event_subsystem *system) system_refcount_inc(system); } -static void __get_system_dir(struct ftrace_subsystem_dir *dir) +static void __get_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); dir->ref_count++; __get_system(dir->subsystem); } -static void __put_system_dir(struct ftrace_subsystem_dir *dir) +static void __put_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ @@ -402,40 +1246,54 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) kfree(dir); } -static void put_system(struct ftrace_subsystem_dir *dir) +static void put_system(struct trace_subsystem_dir *dir) { mutex_lock(&event_mutex); __put_system_dir(dir); mutex_unlock(&event_mutex); } -/* - * Open and update trace_array ref count. - * Must have the current trace_array passed to it. - */ -static int tracing_open_generic_file(struct inode *inode, struct file *filp) +static void remove_subsystem(struct trace_subsystem_dir *dir) { - struct ftrace_event_file *file = inode->i_private; - struct trace_array *tr = file->tr; - int ret; + if (!dir) + return; - if (trace_array_get(tr) < 0) - return -ENODEV; + if (!--dir->nr_events) { + eventfs_remove_dir(dir->ei); + list_del(&dir->list); + __put_system_dir(dir); + } +} - ret = tracing_open_generic(inode, filp); - if (ret < 0) - trace_array_put(tr); - return ret; +void event_file_get(struct trace_event_file *file) +{ + refcount_inc(&file->ref); } -static int tracing_release_generic_file(struct inode *inode, struct file *filp) +void event_file_put(struct trace_event_file *file) { - struct ftrace_event_file *file = inode->i_private; - struct trace_array *tr = file->tr; + if (WARN_ON_ONCE(!refcount_read(&file->ref))) { + if (file->flags & EVENT_FILE_FL_FREED) + kmem_cache_free(file_cachep, file); + return; + } - trace_array_put(tr); + if (refcount_dec_and_test(&file->ref)) { + /* Count should only go to zero when it is freed */ + if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED))) + return; + kmem_cache_free(file_cachep, file); + } +} - return 0; +static void remove_event_file_dir(struct trace_event_file *file) +{ + eventfs_remove_dir(file->ei); + list_del(&file->list); + remove_subsystem(file->system); + free_event_filter(file->filter); + file->flags |= EVENT_FILE_FL_FREED; + event_file_put(file); } /* @@ -443,56 +1301,106 @@ static int tracing_release_generic_file(struct inode *inode, struct file *filp) */ static int __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, - const char *sub, const char *event, int set) + const char *sub, const char *event, int set, + const char *mod) { - struct ftrace_event_file *file; - struct ftrace_event_call *call; + struct trace_event_file *file; + struct trace_event_call *call; + char *module __free(kfree) = NULL; + const char *name; int ret = -EINVAL; + int eret = 0; + + if (mod) { + char *p; + + module = kstrdup(mod, GFP_KERNEL); + if (!module) + return -ENOMEM; + + /* Replace all '-' with '_' as that's what modules do */ + for (p = strchr(module, '-'); p; p = strchr(p + 1, '-')) + *p = '_'; + } list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!call->name || !call->class || !call->class->reg) + /* If a module is specified, skip events that are not that module */ + if (module && (!call->module || strcmp(module_name(call->module), module))) + continue; + + name = trace_event_name(call); + + if (!name || !call->class || !call->class->reg) continue; if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) continue; if (match && - strcmp(match, call->name) != 0 && + strcmp(match, name) != 0 && strcmp(match, call->class->system) != 0) continue; if (sub && strcmp(sub, call->class->system) != 0) continue; - if (event && strcmp(event, call->name) != 0) + if (event && strcmp(event, name) != 0) continue; - ftrace_event_enable_disable(file, set); + ret = ftrace_event_enable_disable(file, set); - ret = 0; + /* + * Save the first error and return that. Some events + * may still have been enabled, but let the user + * know that something went wrong. + */ + if (ret && !eret) + eret = ret; + + ret = eret; } + /* + * If this is a module setting and nothing was found, + * check if the module was loaded. If it wasn't cache it. + */ + if (module && ret == -EINVAL && !eret) + ret = cache_mod(tr, module, set, match, sub, event); + return ret; } static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, - const char *sub, const char *event, int set) + const char *sub, const char *event, int set, + const char *mod) { int ret; mutex_lock(&event_mutex); - ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); + ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set, mod); mutex_unlock(&event_mutex); return ret; } -static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) +int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) { - char *event = NULL, *sub = NULL, *match; + char *event = NULL, *sub = NULL, *match, *mod; + int ret; + + if (!tr) + return -ENOENT; + + /* Modules events can be appended with :mod:<module> */ + mod = strstr(buf, ":mod:"); + if (mod) { + *mod = '\0'; + /* move to the module name */ + mod += 5; + } /* * The buf format can be <subsystem>:<event-name> @@ -516,9 +1424,19 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) sub = NULL; if (!strlen(event) || strcmp(event, "*") == 0) event = NULL; + } else if (mod) { + /* Allow wildcard for no length or star */ + if (!strlen(match) || strcmp(match, "*") == 0) + match = NULL; } - return __ftrace_set_clr_event(tr, match, sub, event, set); + ret = __ftrace_set_clr_event(tr, match, sub, event, set, mod); + + /* Put back the colon to allow this to be called again */ + if (buf) + *(buf - 1) = ':'; + + return ret; } /** @@ -537,10 +1455,39 @@ int trace_set_clr_event(const char *system, const char *event, int set) { struct trace_array *tr = top_trace_array(); - return __ftrace_set_clr_event(tr, NULL, system, event, set); + if (!tr) + return -ENODEV; + + return __ftrace_set_clr_event(tr, NULL, system, event, set, NULL); } EXPORT_SYMBOL_GPL(trace_set_clr_event); +/** + * trace_array_set_clr_event - enable or disable an event for a trace array. + * @tr: concerned trace array. + * @system: system name to match (NULL for any system) + * @event: event name to match (NULL for all events, within system) + * @enable: true to enable, false to disable + * + * This is a way for other parts of the kernel to enable or disable + * event recording. + * + * Returns 0 on success, -EINVAL if the parameters do not match any + * registered events. + */ +int trace_array_set_clr_event(struct trace_array *tr, const char *system, + const char *event, bool enable) +{ + int set; + + if (!tr) + return -ENOENT; + + set = (enable == true) ? 1 : 0; + return __ftrace_set_clr_event(tr, NULL, system, event, set, NULL); +} +EXPORT_SYMBOL_GPL(trace_array_set_clr_event); + /* 128 should be much more than enough */ #define EVENT_BUF_SIZE 127 @@ -556,7 +1503,7 @@ ftrace_event_write(struct file *file, const char __user *ubuf, if (!cnt) return 0; - ret = tracing_update_buffers(); + ret = tracing_update_buffers(tr); if (ret < 0) return ret; @@ -571,8 +1518,6 @@ ftrace_event_write(struct file *file, const char __user *ubuf, if (*parser.buffer == '!') set = 0; - parser.buffer[parser.idx] = 0; - ret = ftrace_set_clr_event(tr, parser.buffer + !set, set); if (ret) goto out_put; @@ -589,8 +1534,8 @@ ftrace_event_write(struct file *file, const char __user *ubuf, static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_file *file = v; - struct ftrace_event_call *call; + struct trace_event_file *file = v; + struct trace_event_call *call; struct trace_array *tr = m->private; (*pos)++; @@ -601,7 +1546,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos) * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ - if (call->class && call->class->reg) + if (call->class && call->class->reg && + !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) return file; } @@ -610,13 +1556,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); - file = list_entry(&tr->events, struct ftrace_event_file, list); + file = list_entry(&tr->events, struct trace_event_file, list); for (l = 0; l <= *pos; ) { file = t_next(m, file, &l); if (!file) @@ -625,47 +1571,87 @@ static void *t_start(struct seq_file *m, loff_t *pos) return file; } +enum set_event_iter_type { + SET_EVENT_FILE, + SET_EVENT_MOD, +}; + +struct set_event_iter { + enum set_event_iter_type type; + union { + struct trace_event_file *file; + struct event_mod_load *event_mod; + }; +}; + static void * s_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_file *file = v; + struct set_event_iter *iter = v; + struct trace_event_file *file; struct trace_array *tr = m->private; (*pos)++; - list_for_each_entry_continue(file, &tr->events, list) { - if (file->flags & FTRACE_EVENT_FL_ENABLED) - return file; + if (iter->type == SET_EVENT_FILE) { + file = iter->file; + list_for_each_entry_continue(file, &tr->events, list) { + if (file->flags & EVENT_FILE_FL_ENABLED) { + iter->file = file; + return iter; + } + } +#ifdef CONFIG_MODULES + iter->type = SET_EVENT_MOD; + iter->event_mod = list_entry(&tr->mod_events, struct event_mod_load, list); +#endif } +#ifdef CONFIG_MODULES + list_for_each_entry_continue(iter->event_mod, &tr->mod_events, list) + return iter; +#endif + + /* + * The iter is allocated in s_start() and passed via the 'v' + * parameter. To stop the iterator, NULL must be returned. But + * the return value is what the 'v' parameter in s_stop() receives + * and frees. Free iter here as it will no longer be used. + */ + kfree(iter); return NULL; } static void *s_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_file *file; struct trace_array *tr = m->private; + struct set_event_iter *iter; loff_t l; + iter = kzalloc(sizeof(*iter), GFP_KERNEL); mutex_lock(&event_mutex); + if (!iter) + return NULL; + + iter->type = SET_EVENT_FILE; + iter->file = list_entry(&tr->events, struct trace_event_file, list); - file = list_entry(&tr->events, struct ftrace_event_file, list); for (l = 0; l <= *pos; ) { - file = s_next(m, file, &l); - if (!file) + iter = s_next(m, iter, &l); + if (!iter) break; } - return file; + return iter; } static int t_show(struct seq_file *m, void *v) { - struct ftrace_event_file *file = v; - struct ftrace_event_call *call = file->event_call; + struct trace_event_file *file = v; + struct trace_event_call *call = file->event_call; if (strcmp(call->class->system, TRACE_SYSTEM) != 0) seq_printf(m, "%s:", call->class->system); - seq_printf(m, "%s\n", call->name); + seq_printf(m, "%s\n", trace_event_name(call)); return 0; } @@ -675,19 +1661,138 @@ static void t_stop(struct seq_file *m, void *p) mutex_unlock(&event_mutex); } +#ifdef CONFIG_MODULES +static int s_show(struct seq_file *m, void *v) +{ + struct set_event_iter *iter = v; + const char *system; + const char *event; + + if (iter->type == SET_EVENT_FILE) + return t_show(m, iter->file); + + /* When match is set, system and event are not */ + if (iter->event_mod->match) { + seq_printf(m, "%s:mod:%s\n", iter->event_mod->match, + iter->event_mod->module); + return 0; + } + + system = iter->event_mod->system ? : "*"; + event = iter->event_mod->event ? : "*"; + + seq_printf(m, "%s:%s:mod:%s\n", system, event, iter->event_mod->module); + + return 0; +} +#else /* CONFIG_MODULES */ +static int s_show(struct seq_file *m, void *v) +{ + struct set_event_iter *iter = v; + + return t_show(m, iter->file); +} +#endif + +static void s_stop(struct seq_file *m, void *v) +{ + kfree(v); + t_stop(m, NULL); +} + +static void * +__next(struct seq_file *m, void *v, loff_t *pos, int type) +{ + struct trace_array *tr = m->private; + struct trace_pid_list *pid_list; + + if (type == TRACE_PIDS) + pid_list = rcu_dereference_sched(tr->filtered_pids); + else + pid_list = rcu_dereference_sched(tr->filtered_no_pids); + + return trace_pid_next(pid_list, v, pos); +} + +static void * +p_next(struct seq_file *m, void *v, loff_t *pos) +{ + return __next(m, v, pos, TRACE_PIDS); +} + +static void * +np_next(struct seq_file *m, void *v, loff_t *pos) +{ + return __next(m, v, pos, TRACE_NO_PIDS); +} + +static void *__start(struct seq_file *m, loff_t *pos, int type) + __acquires(RCU) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = m->private; + + /* + * Grab the mutex, to keep calls to p_next() having the same + * tr->filtered_pids as p_start() has. + * If we just passed the tr->filtered_pids around, then RCU would + * have been enough, but doing that makes things more complex. + */ + mutex_lock(&event_mutex); + rcu_read_lock_sched(); + + if (type == TRACE_PIDS) + pid_list = rcu_dereference_sched(tr->filtered_pids); + else + pid_list = rcu_dereference_sched(tr->filtered_no_pids); + + if (!pid_list) + return NULL; + + return trace_pid_start(pid_list, pos); +} + +static void *p_start(struct seq_file *m, loff_t *pos) + __acquires(RCU) +{ + return __start(m, pos, TRACE_PIDS); +} + +static void *np_start(struct seq_file *m, loff_t *pos) + __acquires(RCU) +{ + return __start(m, pos, TRACE_NO_PIDS); +} + +static void p_stop(struct seq_file *m, void *p) + __releases(RCU) +{ + rcu_read_unlock_sched(); + mutex_unlock(&event_mutex); +} + static ssize_t event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file = filp->private_data; + struct trace_event_file *file; + unsigned long flags; char buf[4] = "0"; - if (file->flags & FTRACE_EVENT_FL_ENABLED && - !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + mutex_lock(&event_mutex); + file = event_file_file(filp); + if (likely(file)) + flags = file->flags; + mutex_unlock(&event_mutex); + + if (!file) + return -ENODEV; + + if (flags & EVENT_FILE_FL_ENABLED && + !(flags & EVENT_FILE_FL_SOFT_DISABLED)) strcpy(buf, "1"); - if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED || - file->flags & FTRACE_EVENT_FL_SOFT_MODE) + if (atomic_read(&file->sm_ref) != 0) strcat(buf, "*"); strcat(buf, "\n"); @@ -699,27 +1804,28 @@ static ssize_t event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file = filp->private_data; + struct trace_event_file *file; unsigned long val; int ret; - if (!file) - return -EINVAL; - ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; - ret = tracing_update_buffers(); - if (ret < 0) - return ret; + guard(mutex)(&event_mutex); switch (val) { case 0: case 1: - mutex_lock(&event_mutex); + file = event_file_file(filp); + if (!file) + return -ENODEV; + ret = tracing_update_buffers(file->tr); + if (ret < 0) + return ret; ret = ftrace_event_enable_disable(file, val); - mutex_unlock(&event_mutex); + if (ret < 0) + return ret; break; default: @@ -728,30 +1834,31 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, *ppos += cnt; - return ret ? ret : cnt; + return cnt; } -static ssize_t -system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) +/* + * Returns: + * 0 : no events exist? + * 1 : all events are disabled + * 2 : all events are enabled + * 3 : some events are enabled and some are enabled + */ +int trace_events_enabled(struct trace_array *tr, const char *system) { - const char set_to_char[4] = { '?', '0', '1', 'X' }; - struct ftrace_subsystem_dir *dir = filp->private_data; - struct event_subsystem *system = dir->subsystem; - struct ftrace_event_call *call; - struct ftrace_event_file *file; - struct trace_array *tr = dir->tr; - char buf[2]; + struct trace_event_call *call; + struct trace_event_file *file; int set = 0; - int ret; - mutex_lock(&event_mutex); + guard(mutex)(&event_mutex); + list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!call->name || !call->class || !call->class->reg) + if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || + !trace_event_name(call) || !call->class || !call->class->reg) continue; - if (system && strcmp(call->class->system, system->name) != 0) + if (system && strcmp(call->class->system, system) != 0) continue; /* @@ -759,7 +1866,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, * or if all events or cleared, or if we have * a mixture. */ - set |= (1 << !!(file->flags & FTRACE_EVENT_FL_ENABLED)); + set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED)); /* * If we have a mixture, no need to look further. @@ -767,7 +1874,23 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, if (set == 3) break; } - mutex_unlock(&event_mutex); + + return set; +} + +static ssize_t +system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + const char set_to_char[4] = { '?', '0', '1', 'X' }; + struct trace_subsystem_dir *dir = filp->private_data; + struct event_subsystem *system = dir->subsystem; + struct trace_array *tr = dir->tr; + char buf[2]; + int set; + int ret; + + set = trace_events_enabled(tr, system ? system->name : NULL); buf[0] = set_to_char[set]; buf[1] = '\n'; @@ -781,7 +1904,7 @@ static ssize_t system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; const char *name = NULL; unsigned long val; @@ -791,7 +1914,7 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, if (ret) return ret; - ret = tracing_update_buffers(); + ret = tracing_update_buffers(dir->tr); if (ret < 0) return ret; @@ -805,7 +1928,7 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, if (system) name = system->name; - ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val); + ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val, NULL); if (ret) goto out; @@ -825,73 +1948,49 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = m->private; - struct ftrace_event_field *field; + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); + struct list_head *node = v; (*pos)++; switch ((unsigned long)v) { case FORMAT_HEADER: - if (unlikely(list_empty(common_head))) - return NULL; - - field = list_entry(common_head->prev, - struct ftrace_event_field, link); - return field; + node = common_head; + break; case FORMAT_FIELD_SEPERATOR: - if (unlikely(list_empty(head))) - return NULL; - - field = list_entry(head->prev, struct ftrace_event_field, link); - return field; + node = head; + break; case FORMAT_PRINTFMT: /* all done */ return NULL; } - field = v; - if (field->link.prev == common_head) + node = node->prev; + if (node == common_head) return (void *)FORMAT_FIELD_SEPERATOR; - else if (field->link.prev == head) + else if (node == head) return (void *)FORMAT_PRINTFMT; - - field = list_entry(field->link.prev, struct ftrace_event_field, link); - - return field; -} - -static void *f_start(struct seq_file *m, loff_t *pos) -{ - loff_t l = 0; - void *p; - - /* Start by showing the header */ - if (!*pos) - return (void *)FORMAT_HEADER; - - p = (void *)FORMAT_HEADER; - do { - p = f_next(m, p, &l); - } while (p && l < *pos); - - return p; + else + return node; } static int f_show(struct seq_file *m, void *v) { - struct ftrace_event_call *call = m->private; + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct ftrace_event_field *field; const char *array_descriptor; switch ((unsigned long)v) { case FORMAT_HEADER: - seq_printf(m, "name: %s\n", call->name); + seq_printf(m, "name: %s\n", trace_event_name(call)); seq_printf(m, "ID: %d\n", call->event.type); - seq_printf(m, "format:\n"); + seq_puts(m, "format:\n"); return 0; case FORMAT_FIELD_SEPERATOR: @@ -904,8 +2003,7 @@ static int f_show(struct seq_file *m, void *v) return 0; } - field = v; - + field = list_entry(v, struct ftrace_event_field, link); /* * Smartly shows the array type(except dynamic array). * Normal: @@ -915,25 +2013,49 @@ static int f_show(struct seq_file *m, void *v) */ array_descriptor = strchr(field->type, '['); - if (!strncmp(field->type, "__data_loc", 10)) + if (str_has_prefix(field->type, "__data_loc")) array_descriptor = NULL; if (!array_descriptor) seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", field->type, field->name, field->offset, field->size, !!field->is_signed); - else - seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", + else if (field->len) + seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n", (int)(array_descriptor - field->type), field->type, field->name, - array_descriptor, field->offset, + field->len, field->offset, field->size, !!field->is_signed); + else + seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n", + (int)(array_descriptor - field->type), + field->type, field->name, + field->offset, field->size, !!field->is_signed); return 0; } +static void *f_start(struct seq_file *m, loff_t *pos) +{ + struct trace_event_file *file; + void *p = (void *)FORMAT_HEADER; + loff_t l = 0; + + /* ->stop() is called even if ->start() fails */ + mutex_lock(&event_mutex); + file = event_file_file(m->private); + if (!file) + return ERR_PTR(-ENODEV); + + while (l < *pos && p) + p = f_next(m, p, &l); + + return p; +} + static void f_stop(struct seq_file *m, void *p) { + mutex_unlock(&event_mutex); } static const struct seq_operations trace_format_seq_ops = { @@ -945,62 +2067,65 @@ static const struct seq_operations trace_format_seq_ops = { static int trace_format_open(struct inode *inode, struct file *file) { - struct ftrace_event_call *call = inode->i_private; struct seq_file *m; int ret; + /* Do we want to hide event format files on tracefs lockdown? */ + ret = seq_open(file, &trace_format_seq_ops); if (ret < 0) return ret; m = file->private_data; - m->private = call; + m->private = file; return 0; } +#ifdef CONFIG_PERF_EVENTS static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; - struct trace_seq *s; - int r; - - if (*ppos) - return 0; + int id = (long)event_file_data(filp); + char buf[32]; + int len; - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; + if (unlikely(!id)) + return -ENODEV; - trace_seq_init(s); - trace_seq_printf(s, "%d\n", call->event.type); + len = sprintf(buf, "%d\n", id); - r = simple_read_from_buffer(ubuf, cnt, ppos, - s->buffer, s->len); - kfree(s); - return r; + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } +#endif static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + struct trace_event_file *file; struct trace_seq *s; - int r; + int r = -ENODEV; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) return -ENOMEM; trace_seq_init(s); - print_event_filter(call, s); - r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); + mutex_lock(&event_mutex); + file = event_file_file(filp); + if (file) + print_event_filter(file, s); + mutex_unlock(&event_mutex); + + if (file) + r = simple_read_from_buffer(ubuf, cnt, ppos, + s->buffer, trace_seq_used(s)); kfree(s); @@ -1011,25 +2136,28 @@ static ssize_t event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + struct trace_event_file *file; char *buf; - int err; + int err = -ENODEV; if (cnt >= PAGE_SIZE) return -EINVAL; - buf = (char *)__get_free_page(GFP_TEMPORARY); - if (!buf) - return -ENOMEM; + buf = memdup_user_nul(ubuf, cnt); + if (IS_ERR(buf)) + return PTR_ERR(buf); - if (copy_from_user(buf, ubuf, cnt)) { - free_page((unsigned long) buf); - return -EFAULT; + mutex_lock(&event_mutex); + file = event_file_file(filp); + if (file) { + if (file->flags & EVENT_FILE_FL_FREED) + err = -ENODEV; + else + err = apply_event_filter(file, buf); } - buf[cnt] = '\0'; + mutex_unlock(&event_mutex); - err = apply_event_filter(call, buf); - free_page((unsigned long) buf); + kfree(buf); if (err < 0) return err; @@ -1042,18 +2170,23 @@ static LIST_HEAD(event_subsystems); static int subsystem_open(struct inode *inode, struct file *filp) { + struct trace_subsystem_dir *dir = NULL, *iter_dir; + struct trace_array *tr = NULL, *iter_tr; struct event_subsystem *system = NULL; - struct ftrace_subsystem_dir *dir = NULL; /* Initialize for gcc */ - struct trace_array *tr; int ret; + if (tracing_is_disabled()) + return -ENODEV; + /* Make sure the system still exists */ - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - list_for_each_entry(dir, &tr->systems, list) { - if (dir == inode->i_private) { + mutex_lock(&trace_types_lock); + list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) { + list_for_each_entry(iter_dir, &iter_tr->systems, list) { + if (iter_dir == inode->i_private) { /* Don't open systems with no events */ + tr = iter_tr; + dir = iter_dir; if (dir->nr_events) { __get_system_dir(dir); system = dir->subsystem; @@ -1063,15 +2196,12 @@ static int subsystem_open(struct inode *inode, struct file *filp) } } exit_loop: - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); if (!system) return -ENODEV; - /* Some versions of gcc think dir can be uninitialized here */ - WARN_ON(!dir); - /* Still need to increment the ref count of the system */ if (trace_array_get(tr) < 0) { put_system(dir); @@ -1089,36 +2219,29 @@ static int subsystem_open(struct inode *inode, struct file *filp) static int system_tr_open(struct inode *inode, struct file *filp) { - struct ftrace_subsystem_dir *dir; + struct trace_subsystem_dir *dir; struct trace_array *tr = inode->i_private; int ret; - if (trace_array_get(tr) < 0) - return -ENODEV; - /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); - if (!dir) { - trace_array_put(tr); + if (!dir) return -ENOMEM; - } - - dir->tr = tr; - ret = tracing_open_generic(inode, filp); + ret = tracing_open_generic_tr(inode, filp); if (ret < 0) { - trace_array_put(tr); kfree(dir); + return ret; } - + dir->tr = tr; filp->private_data = dir; - return ret; + return 0; } static int subsystem_release(struct inode *inode, struct file *file) { - struct ftrace_subsystem_dir *dir = file->private_data; + struct trace_subsystem_dir *dir = file->private_data; trace_array_put(dir->tr); @@ -1139,7 +2262,7 @@ static ssize_t subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; struct trace_seq *s; int r; @@ -1154,7 +2277,8 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); print_subsystem_event_filter(system, s); - r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); + r = simple_read_from_buffer(ubuf, cnt, ppos, + s->buffer, trace_seq_used(s)); kfree(s); @@ -1165,25 +2289,19 @@ static ssize_t subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; char *buf; int err; if (cnt >= PAGE_SIZE) return -EINVAL; - buf = (char *)__get_free_page(GFP_TEMPORARY); - if (!buf) - return -ENOMEM; - - if (copy_from_user(buf, ubuf, cnt)) { - free_page((unsigned long) buf); - return -EFAULT; - } - buf[cnt] = '\0'; + buf = memdup_user_nul(ubuf, cnt); + if (IS_ERR(buf)) + return PTR_ERR(buf); err = apply_subsystem_event_filter(dir, buf); - free_page((unsigned long) buf); + kfree(buf); if (err < 0) return err; @@ -1193,9 +2311,9 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, } static ssize_t -show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +show_header_page_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - int (*func)(struct trace_seq *s) = filp->private_data; + struct trace_array *tr = filp->private_data; struct trace_seq *s; int r; @@ -1208,16 +2326,171 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) trace_seq_init(s); - func(s); - r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); + ring_buffer_print_page_header(tr->array_buffer.buffer, s); + r = simple_read_from_buffer(ubuf, cnt, ppos, + s->buffer, trace_seq_used(s)); kfree(s); return r; } +static ssize_t +show_header_event_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + ring_buffer_print_entry_header(s); + r = simple_read_from_buffer(ubuf, cnt, ppos, + s->buffer, trace_seq_used(s)); + + kfree(s); + + return r; +} + +static void ignore_task_cpu(void *data) +{ + struct trace_array *tr = data; + struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; + + /* + * This function is called by on_each_cpu() while the + * event_mutex is held. + */ + pid_list = rcu_dereference_protected(tr->filtered_pids, + mutex_is_locked(&event_mutex)); + no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, + mutex_is_locked(&event_mutex)); + + this_cpu_write(tr->array_buffer.data->ignore_pid, + trace_ignore_this_task(pid_list, no_pid_list, current)); +} + +static void register_pid_events(struct trace_array *tr) +{ + /* + * Register a probe that is called before all other probes + * to set ignore_pid if next or prev do not match. + * Register a probe this is called after all other probes + * to only keep ignore_pid set if next pid matches. + */ + register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre, + tr, INT_MAX); + register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post, + tr, 0); + + register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, + tr, INT_MAX); + register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, + tr, 0); + + register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, + tr, INT_MAX); + register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, + tr, 0); + + register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre, + tr, INT_MAX); + register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post, + tr, 0); +} + +static ssize_t +event_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, int type) +{ + struct seq_file *m = filp->private_data; + struct trace_array *tr = m->private; + struct trace_pid_list *filtered_pids = NULL; + struct trace_pid_list *other_pids = NULL; + struct trace_pid_list *pid_list; + struct trace_event_file *file; + ssize_t ret; + + if (!cnt) + return 0; + + ret = tracing_update_buffers(tr); + if (ret < 0) + return ret; + + guard(mutex)(&event_mutex); + + if (type == TRACE_PIDS) { + filtered_pids = rcu_dereference_protected(tr->filtered_pids, + lockdep_is_held(&event_mutex)); + other_pids = rcu_dereference_protected(tr->filtered_no_pids, + lockdep_is_held(&event_mutex)); + } else { + filtered_pids = rcu_dereference_protected(tr->filtered_no_pids, + lockdep_is_held(&event_mutex)); + other_pids = rcu_dereference_protected(tr->filtered_pids, + lockdep_is_held(&event_mutex)); + } + + ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); + if (ret < 0) + return ret; + + if (type == TRACE_PIDS) + rcu_assign_pointer(tr->filtered_pids, pid_list); + else + rcu_assign_pointer(tr->filtered_no_pids, pid_list); + + list_for_each_entry(file, &tr->events, list) { + set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); + } + + if (filtered_pids) { + tracepoint_synchronize_unregister(); + trace_pid_list_free(filtered_pids); + } else if (pid_list && !other_pids) { + register_pid_events(tr); + } + + /* + * Ignoring of pids is done at task switch. But we have to + * check for those tasks that are currently running. + * Always do this in case a pid was appended or removed. + */ + on_each_cpu(ignore_task_cpu, tr, 1); + + *ppos += ret; + + return ret; +} + +static ssize_t +ftrace_event_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS); +} + +static ssize_t +ftrace_event_npid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS); +} + static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); +static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); +static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); +static int ftrace_event_release(struct inode *inode, struct file *file); static const struct seq_operations show_event_seq_ops = { .start = t_start, @@ -1229,8 +2502,22 @@ static const struct seq_operations show_event_seq_ops = { static const struct seq_operations show_set_event_seq_ops = { .start = s_start, .next = s_next, - .show = t_show, - .stop = t_stop, + .show = s_show, + .stop = s_stop, +}; + +static const struct seq_operations show_set_pid_seq_ops = { + .start = p_start, + .next = p_next, + .show = trace_pid_show, + .stop = p_stop, +}; + +static const struct seq_operations show_set_no_pid_seq_ops = { + .start = np_start, + .next = np_next, + .show = trace_pid_show, + .stop = p_stop, }; static const struct file_operations ftrace_avail_fops = { @@ -1245,14 +2532,30 @@ static const struct file_operations ftrace_set_event_fops = { .read = seq_read, .write = ftrace_event_write, .llseek = seq_lseek, - .release = seq_release, + .release = ftrace_event_release, +}; + +static const struct file_operations ftrace_set_event_pid_fops = { + .open = ftrace_event_set_pid_open, + .read = seq_read, + .write = ftrace_event_pid_write, + .llseek = seq_lseek, + .release = ftrace_event_release, +}; + +static const struct file_operations ftrace_set_event_notrace_pid_fops = { + .open = ftrace_event_set_npid_open, + .read = seq_read, + .write = ftrace_event_npid_write, + .llseek = seq_lseek, + .release = ftrace_event_release, }; static const struct file_operations ftrace_enable_fops = { - .open = tracing_open_generic_file, + .open = tracing_open_file_tr, .read = event_enable_read, .write = event_enable_write, - .release = tracing_release_generic_file, + .release = tracing_release_file_tr, .llseek = default_llseek, }; @@ -1263,16 +2566,18 @@ static const struct file_operations ftrace_event_format_fops = { .release = seq_release, }; +#ifdef CONFIG_PERF_EVENTS static const struct file_operations ftrace_event_id_fops = { - .open = tracing_open_generic, .read = event_id_read, .llseek = default_llseek, }; +#endif static const struct file_operations ftrace_event_filter_fops = { - .open = tracing_open_generic, + .open = tracing_open_file_tr, .read = event_filter_read, .write = event_filter_write, + .release = tracing_release_file_tr, .llseek = default_llseek, }; @@ -1300,10 +2605,18 @@ static const struct file_operations ftrace_tr_enable_fops = { .release = subsystem_release, }; -static const struct file_operations ftrace_show_header_fops = { - .open = tracing_open_generic, - .read = show_header, +static const struct file_operations ftrace_show_header_page_fops = { + .open = tracing_open_generic_tr, + .read = show_header_page_file, + .llseek = default_llseek, + .release = tracing_release_generic_tr, +}; + +static const struct file_operations ftrace_show_header_event_fops = { + .open = tracing_open_generic_tr, + .read = show_header_event_file, .llseek = default_llseek, + .release = tracing_release_generic_tr, }; static int @@ -1313,6 +2626,10 @@ ftrace_event_open(struct inode *inode, struct file *file, struct seq_file *m; int ret; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + ret = seq_open(file, seq_ops); if (ret < 0) return ret; @@ -1323,11 +2640,21 @@ ftrace_event_open(struct inode *inode, struct file *file, return ret; } +static int ftrace_event_release(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return seq_release(inode, file); +} + static int ftrace_event_avail_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_event_seq_ops; + /* Checks for tracefs lockdown */ return ftrace_event_open(inode, file, seq_ops); } @@ -1336,12 +2663,62 @@ ftrace_event_set_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_event_seq_ops; struct trace_array *tr = inode->i_private; + int ret; + + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_events(tr); - return ftrace_event_open(inode, file, seq_ops); + ret = ftrace_event_open(inode, file, seq_ops); + if (ret < 0) + trace_array_put(tr); + return ret; +} + +static int +ftrace_event_set_pid_open(struct inode *inode, struct file *file) +{ + const struct seq_operations *seq_ops = &show_set_pid_seq_ops; + struct trace_array *tr = inode->i_private; + int ret; + + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; + + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + ftrace_clear_event_pids(tr, TRACE_PIDS); + + ret = ftrace_event_open(inode, file, seq_ops); + if (ret < 0) + trace_array_put(tr); + return ret; +} + +static int +ftrace_event_set_npid_open(struct inode *inode, struct file *file) +{ + const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops; + struct trace_array *tr = inode->i_private; + int ret; + + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; + + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + ftrace_clear_event_pids(tr, TRACE_NO_PIDS); + + ret = ftrace_event_open(inode, file, seq_ops); + if (ret < 0) + trace_array_put(tr); + return ret; } static struct event_subsystem * @@ -1357,15 +2734,9 @@ create_new_subsystem(const char *name) system->ref_count = 1; /* Only allocate if dynamic (kprobes and modules) */ - if (!core_kernel_data((unsigned long)name)) { - system->ref_count |= SYSTEM_FL_FREE_NAME; - system->name = kstrdup(name, GFP_KERNEL); - if (!system->name) - goto out_free; - } else - system->name = name; - - system->filter = NULL; + system->name = kstrdup_const(name, GFP_KERNEL); + if (!system->name) + goto out_free; system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); if (!system->filter) @@ -1376,19 +2747,45 @@ create_new_subsystem(const char *name) return system; out_free: - if (system->ref_count & SYSTEM_FL_FREE_NAME) - kfree(system->name); + kfree_const(system->name); kfree(system); return NULL; } -static struct dentry * +static int system_callback(const char *name, umode_t *mode, void **data, + const struct file_operations **fops) +{ + if (strcmp(name, "filter") == 0) + *fops = &ftrace_subsystem_filter_fops; + + else if (strcmp(name, "enable") == 0) + *fops = &ftrace_system_enable_fops; + + else + return 0; + + *mode = TRACE_MODE_WRITE; + return 1; +} + +static struct eventfs_inode * event_subsystem_dir(struct trace_array *tr, const char *name, - struct ftrace_event_file *file, struct dentry *parent) + struct trace_event_file *file, struct eventfs_inode *parent) { - struct ftrace_subsystem_dir *dir; - struct event_subsystem *system; - struct dentry *entry; + struct event_subsystem *system, *iter; + struct trace_subsystem_dir *dir; + struct eventfs_inode *ei; + int nr_entries; + static struct eventfs_entry system_entries[] = { + { + .name = "filter", + .callback = system_callback, + }, + { + .name = "enable", + .callback = system_callback, + } + }; /* First see if we did not already create this dir */ list_for_each_entry(dir, &tr->systems, list) { @@ -1396,18 +2793,18 @@ event_subsystem_dir(struct trace_array *tr, const char *name, if (strcmp(system->name, name) == 0) { dir->nr_events++; file->system = dir; - return dir->entry; + return dir->ei; } } /* Now see if the system itself exists. */ - list_for_each_entry(system, &event_subsystems, list) { - if (strcmp(system->name, name) == 0) + system = NULL; + list_for_each_entry(iter, &event_subsystems, list) { + if (strcmp(iter->name, name) == 0) { + system = iter; break; + } } - /* Reset system variable when not found */ - if (&system->list == &event_subsystems) - system = NULL; dir = kmalloc(sizeof(*dir), GFP_KERNEL); if (!dir) @@ -1420,135 +2817,259 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } else __get_system(system); - dir->entry = debugfs_create_dir(name, parent); - if (!dir->entry) { - pr_warning("Failed to create system directory %s\n", name); + /* ftrace only has directories no files */ + if (strcmp(name, "ftrace") == 0) + nr_entries = 0; + else + nr_entries = ARRAY_SIZE(system_entries); + + ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir); + if (IS_ERR(ei)) { + pr_warn("Failed to create system directory %s\n", name); __put_system(system); goto out_free; } + dir->ei = ei; dir->tr = tr; dir->ref_count = 1; dir->nr_events = 1; dir->subsystem = system; file->system = dir; - entry = debugfs_create_file("filter", 0644, dir->entry, dir, - &ftrace_subsystem_filter_fops); - if (!entry) { - kfree(system->filter); - system->filter = NULL; - pr_warning("Could not create debugfs '%s/filter' entry\n", name); - } - - trace_create_file("enable", 0644, dir->entry, dir, - &ftrace_system_enable_fops); - list_add(&dir->list, &tr->systems); - return dir->entry; + return dir->ei; out_free: kfree(dir); out_fail: /* Only print this message if failed on memory allocation */ if (!dir || !system) - pr_warning("No memory to create event subsystem %s\n", - name); + pr_warn("No memory to create event subsystem %s\n", name); return NULL; } static int -event_create_dir(struct dentry *parent, - struct ftrace_event_file *file, - const struct file_operations *id, - const struct file_operations *enable, - const struct file_operations *filter, - const struct file_operations *format) -{ - struct ftrace_event_call *call = file->event_call; - struct trace_array *tr = file->tr; +event_define_fields(struct trace_event_call *call) +{ struct list_head *head; - struct dentry *d_events; - int ret; + int ret = 0; /* - * If the trace point header did not define TRACE_SYSTEM - * then the system would be called "TRACE_SYSTEM". + * Other events may have the same class. Only update + * the fields if they are not already defined. */ - if (strcmp(call->class->system, TRACE_SYSTEM) != 0) { - d_events = event_subsystem_dir(tr, call->class->system, file, parent); - if (!d_events) - return -ENOMEM; - } else - d_events = parent; + head = trace_get_fields(call); + if (list_empty(head)) { + struct trace_event_fields *field = call->class->fields_array; + unsigned int offset = sizeof(struct trace_entry); - file->dir = debugfs_create_dir(call->name, d_events); - if (!file->dir) { - pr_warning("Could not create debugfs '%s' directory\n", - call->name); - return -1; + for (; field->type; field++) { + if (field->type == TRACE_FUNCTION_TYPE) { + field->define_fields(call); + break; + } + + offset = ALIGN(offset, field->align); + ret = trace_define_field_ext(call, field->type, field->name, + offset, field->size, + field->is_signed, field->filter_type, + field->len, field->needs_test); + if (WARN_ON_ONCE(ret)) { + pr_err("error code is %d\n", ret); + break; + } + + offset += field->size; + } } - if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) - trace_create_file("enable", 0644, file->dir, file, - enable); + return ret; +} -#ifdef CONFIG_PERF_EVENTS - if (call->event.type && call->class->reg) - trace_create_file("id", 0444, file->dir, call, - id); -#endif +static int event_callback(const char *name, umode_t *mode, void **data, + const struct file_operations **fops) +{ + struct trace_event_file *file = *data; + struct trace_event_call *call = file->event_call; + + if (strcmp(name, "format") == 0) { + *mode = TRACE_MODE_READ; + *fops = &ftrace_event_format_fops; + return 1; + } /* - * Other events may have the same class. Only update - * the fields if they are not already defined. + * Only event directories that can be enabled should have + * triggers or filters, with the exception of the "print" + * event that can have a "trigger" file. */ - head = trace_get_fields(call); - if (list_empty(head)) { - ret = call->class->define_fields(call); - if (ret < 0) { - pr_warning("Could not initialize trace point" - " events/%s\n", call->name); - return -1; + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { + if (call->class->reg && strcmp(name, "enable") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &ftrace_enable_fops; + return 1; + } + + if (strcmp(name, "filter") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &ftrace_event_filter_fops; + return 1; + } + } + + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || + strcmp(trace_event_name(call), "print") == 0) { + if (strcmp(name, "trigger") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &event_trigger_fops; + return 1; } } - trace_create_file("filter", 0644, file->dir, call, - filter); - trace_create_file("format", 0444, file->dir, call, - format); +#ifdef CONFIG_PERF_EVENTS + if (call->event.type && call->class->reg && + strcmp(name, "id") == 0) { + *mode = TRACE_MODE_READ; + *data = (void *)(long)call->event.type; + *fops = &ftrace_event_id_fops; + return 1; + } +#endif +#ifdef CONFIG_HIST_TRIGGERS + if (strcmp(name, "hist") == 0) { + *mode = TRACE_MODE_READ; + *fops = &event_hist_fops; + return 1; + } +#endif +#ifdef CONFIG_HIST_TRIGGERS_DEBUG + if (strcmp(name, "hist_debug") == 0) { + *mode = TRACE_MODE_READ; + *fops = &event_hist_debug_fops; + return 1; + } +#endif +#ifdef CONFIG_TRACE_EVENT_INJECT + if (call->event.type && call->class->reg && + strcmp(name, "inject") == 0) { + *mode = 0200; + *fops = &event_inject_fops; + return 1; + } +#endif return 0; } -static void remove_subsystem(struct ftrace_subsystem_dir *dir) +/* The file is incremented on creation and freeing the enable file decrements it */ +static void event_release(const char *name, void *data) { - if (!dir) - return; + struct trace_event_file *file = data; - if (!--dir->nr_events) { - debugfs_remove_recursive(dir->entry); - list_del(&dir->list); - __put_system_dir(dir); + event_file_put(file); +} + +static int +event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) +{ + struct trace_event_call *call = file->event_call; + struct trace_array *tr = file->tr; + struct eventfs_inode *e_events; + struct eventfs_inode *ei; + const char *name; + int nr_entries; + int ret; + static struct eventfs_entry event_entries[] = { + { + .name = "enable", + .callback = event_callback, + .release = event_release, + }, + { + .name = "filter", + .callback = event_callback, + }, + { + .name = "trigger", + .callback = event_callback, + }, + { + .name = "format", + .callback = event_callback, + }, +#ifdef CONFIG_PERF_EVENTS + { + .name = "id", + .callback = event_callback, + }, +#endif +#ifdef CONFIG_HIST_TRIGGERS + { + .name = "hist", + .callback = event_callback, + }, +#endif +#ifdef CONFIG_HIST_TRIGGERS_DEBUG + { + .name = "hist_debug", + .callback = event_callback, + }, +#endif +#ifdef CONFIG_TRACE_EVENT_INJECT + { + .name = "inject", + .callback = event_callback, + }, +#endif + }; + + /* + * If the trace point header did not define TRACE_SYSTEM + * then the system would be called "TRACE_SYSTEM". This should + * never happen. + */ + if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0)) + return -ENODEV; + + e_events = event_subsystem_dir(tr, call->class->system, file, parent); + if (!e_events) + return -ENOMEM; + + nr_entries = ARRAY_SIZE(event_entries); + + name = trace_event_name(call); + ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file); + if (IS_ERR(ei)) { + pr_warn("Could not create tracefs '%s' directory\n", name); + return -1; } + + file->ei = ei; + + ret = event_define_fields(call); + if (ret < 0) { + pr_warn("Could not initialize trace point events/%s\n", name); + return ret; + } + + /* Gets decremented on freeing of the "enable" file */ + event_file_get(file); + + return 0; } -static void remove_event_from_tracers(struct ftrace_event_call *call) +static void remove_event_from_tracers(struct trace_event_call *call) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr; do_for_each_event_file_safe(tr, file) { - if (file->event_call != call) continue; - list_del(&file->list); - debugfs_remove_recursive(file->dir); - remove_subsystem(file->system); - kmem_cache_free(file_cachep, file); - + remove_event_file_dir(file); /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this @@ -1559,14 +3080,18 @@ static void remove_event_from_tracers(struct ftrace_event_call *call) } while_for_each_event_file(); } -static void event_remove(struct ftrace_event_call *call) +static void event_remove(struct trace_event_call *call) { struct trace_array *tr; - struct ftrace_event_file *file; + struct trace_event_file *file; do_for_each_event_file(tr, file) { if (file->event_call != call) continue; + + if (file->flags & EVENT_FILE_FL_WAS_ENABLED) + tr->clear_trace = true; + ftrace_event_enable_disable(file, 0); /* * The do_for_each_event_file() is @@ -1578,30 +3103,31 @@ static void event_remove(struct ftrace_event_call *call) } while_for_each_event_file(); if (call->event.funcs) - __unregister_ftrace_event(&call->event); + __unregister_trace_event(&call->event); remove_event_from_tracers(call); list_del(&call->list); } -static int event_init(struct ftrace_event_call *call) +static int event_init(struct trace_event_call *call) { int ret = 0; + const char *name; - if (WARN_ON(!call->name)) + name = trace_event_name(call); + if (WARN_ON(!name)) return -EINVAL; if (call->class->raw_init) { ret = call->class->raw_init(call); if (ret < 0 && ret != -ENOSYS) - pr_warn("Could not initialize trace events/%s\n", - call->name); + pr_warn("Could not initialize trace events/%s\n", name); } return ret; } static int -__register_event(struct ftrace_event_call *call, struct module *mod) +__register_event(struct trace_event_call *call, struct module *mod) { int ret; @@ -1609,108 +3135,587 @@ __register_event(struct ftrace_event_call *call, struct module *mod) if (ret < 0) return ret; + down_write(&trace_event_sem); list_add(&call->list, &ftrace_events); - call->mod = mod; + up_write(&trace_event_sem); + + if (call->flags & TRACE_EVENT_FL_DYNAMIC) + atomic_set(&call->refcnt, 0); + else + call->module = mod; return 0; } -static struct ftrace_event_file * -trace_create_new_event(struct ftrace_event_call *call, +static char *eval_replace(char *ptr, struct trace_eval_map *map, int len) +{ + int rlen; + int elen; + + /* Find the length of the eval value as a string */ + elen = snprintf(ptr, 0, "%ld", map->eval_value); + /* Make sure there's enough room to replace the string with the value */ + if (len < elen) + return NULL; + + snprintf(ptr, elen + 1, "%ld", map->eval_value); + + /* Get the rest of the string of ptr */ + rlen = strlen(ptr + len); + memmove(ptr + elen, ptr + len, rlen); + /* Make sure we end the new string */ + ptr[elen + rlen] = 0; + + return ptr + elen; +} + +static void update_event_printk(struct trace_event_call *call, + struct trace_eval_map *map) +{ + char *ptr; + int quote = 0; + int len = strlen(map->eval_string); + + for (ptr = call->print_fmt; *ptr; ptr++) { + if (*ptr == '\\') { + ptr++; + /* paranoid */ + if (!*ptr) + break; + continue; + } + if (*ptr == '"') { + quote ^= 1; + continue; + } + if (quote) + continue; + if (isdigit(*ptr)) { + /* skip numbers */ + do { + ptr++; + /* Check for alpha chars like ULL */ + } while (isalnum(*ptr)); + if (!*ptr) + break; + /* + * A number must have some kind of delimiter after + * it, and we can ignore that too. + */ + continue; + } + if (isalpha(*ptr) || *ptr == '_') { + if (strncmp(map->eval_string, ptr, len) == 0 && + !isalnum(ptr[len]) && ptr[len] != '_') { + ptr = eval_replace(ptr, map, len); + /* enum/sizeof string smaller than value */ + if (WARN_ON_ONCE(!ptr)) + return; + /* + * No need to decrement here, as eval_replace() + * returns the pointer to the character passed + * the eval, and two evals can not be placed + * back to back without something in between. + * We can skip that something in between. + */ + continue; + } + skip_more: + do { + ptr++; + } while (isalnum(*ptr) || *ptr == '_'); + if (!*ptr) + break; + /* + * If what comes after this variable is a '.' or + * '->' then we can continue to ignore that string. + */ + if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) { + ptr += *ptr == '.' ? 1 : 2; + if (!*ptr) + break; + goto skip_more; + } + /* + * Once again, we can skip the delimiter that came + * after the string. + */ + continue; + } + } +} + +static void add_str_to_module(struct module *module, char *str) +{ + struct module_string *modstr; + + modstr = kmalloc(sizeof(*modstr), GFP_KERNEL); + + /* + * If we failed to allocate memory here, then we'll just + * let the str memory leak when the module is removed. + * If this fails to allocate, there's worse problems than + * a leaked string on module removal. + */ + if (WARN_ON_ONCE(!modstr)) + return; + + modstr->module = module; + modstr->str = str; + + list_add(&modstr->next, &module_strings); +} + +#define ATTRIBUTE_STR "__attribute__(" +#define ATTRIBUTE_STR_LEN (sizeof(ATTRIBUTE_STR) - 1) + +/* Remove all __attribute__() from @type. Return allocated string or @type. */ +static char *sanitize_field_type(const char *type) +{ + char *attr, *tmp, *next, *ret = (char *)type; + int depth; + + next = (char *)type; + while ((attr = strstr(next, ATTRIBUTE_STR))) { + /* Retry if "__attribute__(" is a part of another word. */ + if (attr != next && !isspace(attr[-1])) { + next = attr + ATTRIBUTE_STR_LEN; + continue; + } + + if (ret == type) { + ret = kstrdup(type, GFP_KERNEL); + if (WARN_ON_ONCE(!ret)) + return NULL; + attr = ret + (attr - type); + } + + /* the ATTRIBUTE_STR already has the first '(' */ + depth = 1; + next = attr + ATTRIBUTE_STR_LEN; + do { + tmp = strpbrk(next, "()"); + /* There is unbalanced parentheses */ + if (WARN_ON_ONCE(!tmp)) { + kfree(ret); + return (char *)type; + } + + if (*tmp == '(') + depth++; + else + depth--; + next = tmp + 1; + } while (depth > 0); + next = skip_spaces(next); + strcpy(attr, next); + next = attr; + } + return ret; +} + +static char *find_replacable_eval(const char *type, const char *eval_string, + int len) +{ + char *ptr; + + if (!eval_string) + return NULL; + + ptr = strchr(type, '['); + if (!ptr) + return NULL; + ptr++; + + if (!isalpha(*ptr) && *ptr != '_') + return NULL; + + if (strncmp(eval_string, ptr, len) != 0) + return NULL; + + return ptr; +} + +static void update_event_fields(struct trace_event_call *call, + struct trace_eval_map *map) +{ + struct ftrace_event_field *field; + const char *eval_string = NULL; + struct list_head *head; + int len = 0; + char *ptr; + char *str; + + /* Dynamic events should never have field maps */ + if (call->flags & TRACE_EVENT_FL_DYNAMIC) + return; + + if (map) { + eval_string = map->eval_string; + len = strlen(map->eval_string); + } + + head = trace_get_fields(call); + list_for_each_entry(field, head, link) { + str = sanitize_field_type(field->type); + if (!str) + return; + + ptr = find_replacable_eval(str, eval_string, len); + if (ptr) { + if (str == field->type) { + str = kstrdup(field->type, GFP_KERNEL); + if (WARN_ON_ONCE(!str)) + return; + ptr = str + (ptr - field->type); + } + + ptr = eval_replace(ptr, map, len); + /* enum/sizeof string smaller than value */ + if (WARN_ON_ONCE(!ptr)) { + kfree(str); + continue; + } + } + + if (str == field->type) + continue; + /* + * If the event is part of a module, then we need to free the string + * when the module is removed. Otherwise, it will stay allocated + * until a reboot. + */ + if (call->module) + add_str_to_module(call->module, str); + + field->type = str; + if (field->filter_type == FILTER_OTHER) + field->filter_type = filter_assign_type(field->type); + } +} + +/* Update all events for replacing eval and sanitizing */ +void trace_event_update_all(struct trace_eval_map **map, int len) +{ + struct trace_event_call *call, *p; + const char *last_system = NULL; + bool first = false; + bool updated; + int last_i; + int i; + + down_write(&trace_event_sem); + list_for_each_entry_safe(call, p, &ftrace_events, list) { + /* events are usually grouped together with systems */ + if (!last_system || call->class->system != last_system) { + first = true; + last_i = 0; + last_system = call->class->system; + } + + updated = false; + /* + * Since calls are grouped by systems, the likelihood that the + * next call in the iteration belongs to the same system as the + * previous call is high. As an optimization, we skip searching + * for a map[] that matches the call's system if the last call + * was from the same system. That's what last_i is for. If the + * call has the same system as the previous call, then last_i + * will be the index of the first map[] that has a matching + * system. + */ + for (i = last_i; i < len; i++) { + if (call->class->system == map[i]->system) { + /* Save the first system if need be */ + if (first) { + last_i = i; + first = false; + } + update_event_printk(call, map[i]); + update_event_fields(call, map[i]); + updated = true; + } + } + /* If not updated yet, update field for sanitizing. */ + if (!updated) + update_event_fields(call, NULL); + cond_resched(); + } + up_write(&trace_event_sem); +} + +static bool event_in_systems(struct trace_event_call *call, + const char *systems) +{ + const char *system; + const char *p; + + if (!systems) + return true; + + system = call->class->system; + p = strstr(systems, system); + if (!p) + return false; + + if (p != systems && !isspace(*(p - 1)) && *(p - 1) != ',') + return false; + + p += strlen(system); + return !*p || isspace(*p) || *p == ','; +} + +#ifdef CONFIG_HIST_TRIGGERS +/* + * Wake up waiter on the hist_poll_wq from irq_work because the hist trigger + * may happen in any context. + */ +static void hist_poll_event_irq_work(struct irq_work *work) +{ + wake_up_all(&hist_poll_wq); +} + +DEFINE_IRQ_WORK(hist_poll_work, hist_poll_event_irq_work); +DECLARE_WAIT_QUEUE_HEAD(hist_poll_wq); +#endif + +static struct trace_event_file * +trace_create_new_event(struct trace_event_call *call, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_pid_list *no_pid_list; + struct trace_pid_list *pid_list; + struct trace_event_file *file; + unsigned int first; + + if (!event_in_systems(call, tr->system_names)) + return NULL; file = kmem_cache_alloc(file_cachep, GFP_TRACE); if (!file) - return NULL; + return ERR_PTR(-ENOMEM); + + pid_list = rcu_dereference_protected(tr->filtered_pids, + lockdep_is_held(&event_mutex)); + no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, + lockdep_is_held(&event_mutex)); + + if (!trace_pid_list_first(pid_list, &first) || + !trace_pid_list_first(no_pid_list, &first)) + file->flags |= EVENT_FILE_FL_PID_FILTER; file->event_call = call; file->tr = tr; atomic_set(&file->sm_ref, 0); + atomic_set(&file->tm_ref, 0); + INIT_LIST_HEAD(&file->triggers); list_add(&file->list, &tr->events); + refcount_set(&file->ref, 1); return file; } +#define MAX_BOOT_TRIGGERS 32 + +static struct boot_triggers { + const char *event; + char *trigger; +} bootup_triggers[MAX_BOOT_TRIGGERS]; + +static char bootup_trigger_buf[COMMAND_LINE_SIZE]; +static int nr_boot_triggers; + +static __init int setup_trace_triggers(char *str) +{ + char *trigger; + char *buf; + int i; + + strscpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE); + trace_set_ring_buffer_expanded(NULL); + disable_tracing_selftest("running event triggers"); + + buf = bootup_trigger_buf; + for (i = 0; i < MAX_BOOT_TRIGGERS; i++) { + trigger = strsep(&buf, ","); + if (!trigger) + break; + bootup_triggers[i].event = strsep(&trigger, "."); + bootup_triggers[i].trigger = trigger; + if (!bootup_triggers[i].trigger) + break; + } + + nr_boot_triggers = i; + return 1; +} +__setup("trace_trigger=", setup_trace_triggers); + /* Add an event to a trace directory */ static int -__trace_add_new_event(struct ftrace_event_call *call, - struct trace_array *tr, - const struct file_operations *id, - const struct file_operations *enable, - const struct file_operations *filter, - const struct file_operations *format) +__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; file = trace_create_new_event(call, tr); + /* + * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed + * allocation, or NULL if the event is not part of the tr->system_names. + * When the event is not part of the tr->system_names, return zero, not + * an error. + */ if (!file) - return -ENOMEM; + return 0; + + if (IS_ERR(file)) + return PTR_ERR(file); - return event_create_dir(tr->event_dir, file, id, enable, filter, format); + if (eventdir_initialized) + return event_create_dir(tr->event_dir, file); + else + return event_define_fields(call); +} + +static void trace_early_triggers(struct trace_event_file *file, const char *name) +{ + int ret; + int i; + + for (i = 0; i < nr_boot_triggers; i++) { + if (strcmp(name, bootup_triggers[i].event)) + continue; + mutex_lock(&event_mutex); + ret = trigger_process_regex(file, bootup_triggers[i].trigger); + mutex_unlock(&event_mutex); + if (ret) + pr_err("Failed to register trigger '%s' on event %s\n", + bootup_triggers[i].trigger, + bootup_triggers[i].event); + } } /* - * Just create a decriptor for early init. A descriptor is required + * Just create a descriptor for early init. A descriptor is required * for enabling events at boot. We want to enable events before * the filesystem is initialized. */ -static __init int -__trace_early_add_new_event(struct ftrace_event_call *call, +static int +__trace_early_add_new_event(struct trace_event_call *call, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; + int ret; file = trace_create_new_event(call, tr); + /* + * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed + * allocation, or NULL if the event is not part of the tr->system_names. + * When the event is not part of the tr->system_names, return zero, not + * an error. + */ if (!file) - return -ENOMEM; + return 0; + + if (IS_ERR(file)) + return PTR_ERR(file); + + ret = event_define_fields(call); + if (ret) + return ret; + + trace_early_triggers(file, trace_event_name(call)); return 0; } struct ftrace_module_file_ops; -static void __add_event_to_tracers(struct ftrace_event_call *call, - struct ftrace_module_file_ops *file_ops); +static void __add_event_to_tracers(struct trace_event_call *call); /* Add an additional event_call dynamically */ -int trace_add_event_call(struct ftrace_event_call *call) +int trace_add_event_call(struct trace_event_call *call) { int ret; - mutex_lock(&trace_types_lock); - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); + + guard(mutex)(&trace_types_lock); ret = __register_event(call, NULL); - if (ret >= 0) - __add_event_to_tracers(call, NULL); + if (ret < 0) + return ret; - mutex_unlock(&event_mutex); - mutex_unlock(&trace_types_lock); + __add_event_to_tracers(call); return ret; } +EXPORT_SYMBOL_GPL(trace_add_event_call); /* * Must be called under locking of trace_types_lock, event_mutex and * trace_event_sem. */ -static void __trace_remove_event_call(struct ftrace_event_call *call) +static void __trace_remove_event_call(struct trace_event_call *call) { event_remove(call); trace_destroy_fields(call); - destroy_preds(call); +} + +static int probe_remove_event_call(struct trace_event_call *call) +{ + struct trace_array *tr; + struct trace_event_file *file; + +#ifdef CONFIG_PERF_EVENTS + if (call->perf_refcount) + return -EBUSY; +#endif + do_for_each_event_file(tr, file) { + if (file->event_call != call) + continue; + /* + * We can't rely on ftrace_event_enable_disable(enable => 0) + * we are going to do, soft mode can suppress + * TRACE_REG_UNREGISTER. + */ + if (file->flags & EVENT_FILE_FL_ENABLED) + goto busy; + + if (file->flags & EVENT_FILE_FL_WAS_ENABLED) + tr->clear_trace = true; + /* + * The do_for_each_event_file_safe() is + * a double loop. After finding the call for this + * trace_array, we use break to jump to the next + * trace_array. + */ + break; + } while_for_each_event_file(); + + __trace_remove_event_call(call); + + return 0; + busy: + /* No need to clear the trace now */ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + tr->clear_trace = false; + } + return -EBUSY; } /* Remove an event_call */ -void trace_remove_event_call(struct ftrace_event_call *call) +int trace_remove_event_call(struct trace_event_call *call) { + int ret; + + lockdep_assert_held(&event_mutex); + mutex_lock(&trace_types_lock); - mutex_lock(&event_mutex); down_write(&trace_event_sem); - __trace_remove_event_call(call); + ret = probe_remove_event_call(call); up_write(&trace_event_sem); - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + + return ret; } +EXPORT_SYMBOL_GPL(trace_remove_event_call); #define for_each_event(event, start, end) \ for (event = start; \ @@ -1718,121 +3723,73 @@ void trace_remove_event_call(struct ftrace_event_call *call) event++) #ifdef CONFIG_MODULES - -static LIST_HEAD(ftrace_module_file_list); - -/* - * Modules must own their file_operations to keep up with - * reference counting. - */ -struct ftrace_module_file_ops { - struct list_head list; - struct module *mod; - struct file_operations id; - struct file_operations enable; - struct file_operations format; - struct file_operations filter; -}; - -static struct ftrace_module_file_ops * -find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod) +static void update_mod_cache(struct trace_array *tr, struct module *mod) { - /* - * As event_calls are added in groups by module, - * when we find one file_ops, we don't need to search for - * each call in that module, as the rest should be the - * same. Only search for a new one if the last one did - * not match. - */ - if (file_ops && mod == file_ops->mod) - return file_ops; + struct event_mod_load *event_mod, *n; - list_for_each_entry(file_ops, &ftrace_module_file_list, list) { - if (file_ops->mod == mod) - return file_ops; + list_for_each_entry_safe(event_mod, n, &tr->mod_events, list) { + if (strcmp(event_mod->module, mod->name) != 0) + continue; + + __ftrace_set_clr_event_nolock(tr, event_mod->match, + event_mod->system, + event_mod->event, 1, mod->name); + free_event_mod(event_mod); } - return NULL; } -static struct ftrace_module_file_ops * -trace_create_file_ops(struct module *mod) +static void update_cache_events(struct module *mod) { - struct ftrace_module_file_ops *file_ops; - - /* - * This is a bit of a PITA. To allow for correct reference - * counting, modules must "own" their file_operations. - * To do this, we allocate the file operations that will be - * used in the event directory. - */ - - file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL); - if (!file_ops) - return NULL; - - file_ops->mod = mod; - - file_ops->id = ftrace_event_id_fops; - file_ops->id.owner = mod; - - file_ops->enable = ftrace_enable_fops; - file_ops->enable.owner = mod; - - file_ops->filter = ftrace_event_filter_fops; - file_ops->filter.owner = mod; - - file_ops->format = ftrace_event_format_fops; - file_ops->format.owner = mod; - - list_add(&file_ops->list, &ftrace_module_file_list); + struct trace_array *tr; - return file_ops; + list_for_each_entry(tr, &ftrace_trace_arrays, list) + update_mod_cache(tr, mod); } static void trace_module_add_events(struct module *mod) { - struct ftrace_module_file_ops *file_ops = NULL; - struct ftrace_event_call **call, **start, **end; - - start = mod->trace_events; - end = mod->trace_events + mod->num_trace_events; + struct trace_event_call **call, **start, **end; - if (start == end) + if (!mod->num_trace_events) return; - file_ops = trace_create_file_ops(mod); - if (!file_ops) + /* Don't add infrastructure for mods without tracepoints */ + if (trace_module_has_bad_taint(mod)) { + pr_err("%s: module has bad taint, not creating trace events\n", + mod->name); return; + } + + start = mod->trace_events; + end = mod->trace_events + mod->num_trace_events; for_each_event(call, start, end) { __register_event(*call, mod); - __add_event_to_tracers(*call, file_ops); + __add_event_to_tracers(*call); } + + update_cache_events(mod); } static void trace_module_remove_events(struct module *mod) { - struct ftrace_module_file_ops *file_ops; - struct ftrace_event_call *call, *p; - bool clear_trace = false; + struct trace_event_call *call, *p; + struct module_string *modstr, *m; down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { - if (call->mod == mod) { - if (call->flags & TRACE_EVENT_FL_WAS_ENABLED) - clear_trace = true; + if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module) + continue; + if (call->module == mod) __trace_remove_event_call(call); - } } - - /* Now free the file_operations */ - list_for_each_entry(file_ops, &ftrace_module_file_list, list) { - if (file_ops->mod == mod) - break; - } - if (&file_ops->list != &ftrace_module_file_list) { - list_del(&file_ops->list); - kfree(file_ops); + /* Check for any strings allocated for this module */ + list_for_each_entry_safe(modstr, m, &module_strings, next) { + if (modstr->module != mod) + continue; + list_del(&modstr->next); + kfree(modstr->str); + kfree(modstr); } up_write(&trace_event_sem); @@ -1844,8 +3801,7 @@ static void trace_module_remove_events(struct module *mod) * over from this module may be passed to the new module events and * unexpected results may occur. */ - if (clear_trace) - tracing_reset_all_online_cpus(); + tracing_reset_all_online_cpus_unlocked(); } static int trace_module_notify(struct notifier_block *self, @@ -1853,8 +3809,8 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); switch (val) { case MODULE_STATE_COMING: trace_module_add_events(mod); @@ -1863,78 +3819,147 @@ static int trace_module_notify(struct notifier_block *self, trace_module_remove_events(mod); break; } - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); - return 0; + return NOTIFY_OK; } -static int -__trace_add_new_mod_event(struct ftrace_event_call *call, - struct trace_array *tr, - struct ftrace_module_file_ops *file_ops) +static struct notifier_block trace_module_nb = { + .notifier_call = trace_module_notify, + .priority = 1, /* higher than trace.c module notify */ +}; +#endif /* CONFIG_MODULES */ + +/* Create a new event directory structure for a trace directory. */ +static void +__trace_add_event_dirs(struct trace_array *tr) { - return __trace_add_new_event(call, tr, - &file_ops->id, &file_ops->enable, - &file_ops->filter, &file_ops->format); + struct trace_event_call *call; + int ret; + + lockdep_assert_held(&trace_event_sem); + + list_for_each_entry(call, &ftrace_events, list) { + ret = __trace_add_new_event(call, tr); + if (ret < 0) + pr_warn("Could not create directory for event %s\n", + trace_event_name(call)); + } } -#else -static inline struct ftrace_module_file_ops * -find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod) +/* Returns any file that matches the system and event */ +struct trace_event_file * +__find_event_file(struct trace_array *tr, const char *system, const char *event) { + struct trace_event_file *file; + struct trace_event_call *call; + const char *name; + + list_for_each_entry(file, &tr->events, list) { + + call = file->event_call; + name = trace_event_name(call); + + if (!name || !call->class) + continue; + + if (strcmp(event, name) == 0 && + strcmp(system, call->class->system) == 0) + return file; + } return NULL; } -static inline int trace_module_notify(struct notifier_block *self, - unsigned long val, void *data) + +/* Returns valid trace event files that match system and event */ +struct trace_event_file * +find_event_file(struct trace_array *tr, const char *system, const char *event) { - return 0; + struct trace_event_file *file; + + file = __find_event_file(tr, system, event); + if (!file || !file->event_call->class->reg || + file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) + return NULL; + + return file; } -static inline int -__trace_add_new_mod_event(struct ftrace_event_call *call, - struct trace_array *tr, - struct ftrace_module_file_ops *file_ops) + +/** + * trace_get_event_file - Find and return a trace event file + * @instance: The name of the trace instance containing the event + * @system: The name of the system containing the event + * @event: The name of the event + * + * Return a trace event file given the trace instance name, trace + * system, and trace event name. If the instance name is NULL, it + * refers to the top-level trace array. + * + * This function will look it up and return it if found, after calling + * trace_array_get() to prevent the instance from going away, and + * increment the event's module refcount to prevent it from being + * removed. + * + * To release the file, call trace_put_event_file(), which will call + * trace_array_put() and decrement the event's module refcount. + * + * Return: The trace event on success, ERR_PTR otherwise. + */ +struct trace_event_file *trace_get_event_file(const char *instance, + const char *system, + const char *event) { - return -ENODEV; + struct trace_array *tr = top_trace_array(); + struct trace_event_file *file = NULL; + int ret = -EINVAL; + + if (instance) { + tr = trace_array_find_get(instance); + if (!tr) + return ERR_PTR(-ENOENT); + } else { + ret = trace_array_get(tr); + if (ret) + return ERR_PTR(ret); + } + + guard(mutex)(&event_mutex); + + file = find_event_file(tr, system, event); + if (!file) { + trace_array_put(tr); + return ERR_PTR(-EINVAL); + } + + /* Don't let event modules unload while in use */ + ret = trace_event_try_get_ref(file->event_call); + if (!ret) { + trace_array_put(tr); + return ERR_PTR(-EBUSY); + } + + return file; } -#endif /* CONFIG_MODULES */ +EXPORT_SYMBOL_GPL(trace_get_event_file); -/* Create a new event directory structure for a trace directory. */ -static void -__trace_add_event_dirs(struct trace_array *tr) +/** + * trace_put_event_file - Release a file from trace_get_event_file() + * @file: The trace event file + * + * If a file was retrieved using trace_get_event_file(), this should + * be called when it's no longer needed. It will cancel the previous + * trace_array_get() called by that function, and decrement the + * event's module refcount. + */ +void trace_put_event_file(struct trace_event_file *file) { - struct ftrace_module_file_ops *file_ops = NULL; - struct ftrace_event_call *call; - int ret; + mutex_lock(&event_mutex); + trace_event_put_ref(file->event_call); + mutex_unlock(&event_mutex); - list_for_each_entry(call, &ftrace_events, list) { - if (call->mod) { - /* - * Directories for events by modules need to - * keep module ref counts when opened (as we don't - * want the module to disappear when reading one - * of these files). The file_ops keep account of - * the module ref count. - */ - file_ops = find_ftrace_file_ops(file_ops, call->mod); - if (!file_ops) - continue; /* Warn? */ - ret = __trace_add_new_mod_event(call, tr, file_ops); - if (ret < 0) - pr_warning("Could not create directory for event %s\n", - call->name); - continue; - } - ret = __trace_add_new_event(call, tr, - &ftrace_event_id_fops, - &ftrace_enable_fops, - &ftrace_event_filter_fops, - &ftrace_event_format_fops); - if (ret < 0) - pr_warning("Could not create directory for event %s\n", - call->name); - } + trace_array_put(file->tr); } +EXPORT_SYMBOL_GPL(trace_put_event_file); #ifdef CONFIG_DYNAMIC_FTRACE @@ -1943,122 +3968,156 @@ __trace_add_event_dirs(struct trace_array *tr) #define DISABLE_EVENT_STR "disable_event" struct event_probe_data { - struct ftrace_event_file *file; + struct trace_event_file *file; unsigned long count; int ref; bool enable; }; -static struct ftrace_event_file * -find_event_file(struct trace_array *tr, const char *system, const char *event) +static void update_event_probe(struct event_probe_data *data) { - struct ftrace_event_file *file; - struct ftrace_event_call *call; - - list_for_each_entry(file, &tr->events, list) { - - call = file->event_call; - - if (!call->name || !call->class || !call->class->reg) - continue; - - if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) - continue; - - if (strcmp(event, call->name) == 0 && - strcmp(system, call->class->system) == 0) - return file; - } - return NULL; + if (data->enable) + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); + else + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); } static void -event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data) +event_enable_probe(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - struct event_probe_data **pdata = (struct event_probe_data **)_data; - struct event_probe_data *data = *pdata; + struct ftrace_func_mapper *mapper = data; + struct event_probe_data *edata; + void **pdata; - if (!data) + pdata = ftrace_func_mapper_find_ip(mapper, ip); + if (!pdata || !*pdata) return; - if (data->enable) - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags); - else - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags); + edata = *pdata; + update_event_probe(edata); } static void -event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data) +event_enable_count_probe(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - struct event_probe_data **pdata = (struct event_probe_data **)_data; - struct event_probe_data *data = *pdata; + struct ftrace_func_mapper *mapper = data; + struct event_probe_data *edata; + void **pdata; - if (!data) + pdata = ftrace_func_mapper_find_ip(mapper, ip); + if (!pdata || !*pdata) return; - if (!data->count) + edata = *pdata; + + if (!edata->count) return; /* Skip if the event is in a state we want to switch to */ - if (data->enable == !(data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) return; - if (data->count != -1) - (data->count)--; + if (edata->count != -1) + (edata->count)--; - event_enable_probe(ip, parent_ip, _data); + update_event_probe(edata); } static int event_enable_print(struct seq_file *m, unsigned long ip, - struct ftrace_probe_ops *ops, void *_data) + struct ftrace_probe_ops *ops, void *data) { - struct event_probe_data *data = _data; + struct ftrace_func_mapper *mapper = data; + struct event_probe_data *edata; + void **pdata; + + pdata = ftrace_func_mapper_find_ip(mapper, ip); + + if (WARN_ON_ONCE(!pdata || !*pdata)) + return 0; + + edata = *pdata; seq_printf(m, "%ps:", (void *)ip); seq_printf(m, "%s:%s:%s", - data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, - data->file->event_call->class->system, - data->file->event_call->name); + edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, + edata->file->event_call->class->system, + trace_event_name(edata->file->event_call)); - if (data->count == -1) - seq_printf(m, ":unlimited\n"); + if (edata->count == -1) + seq_puts(m, ":unlimited\n"); else - seq_printf(m, ":count=%ld\n", data->count); + seq_printf(m, ":count=%ld\n", edata->count); return 0; } static int -event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip, - void **_data) +event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr, + unsigned long ip, void *init_data, void **data) { - struct event_probe_data **pdata = (struct event_probe_data **)_data; - struct event_probe_data *data = *pdata; + struct ftrace_func_mapper *mapper = *data; + struct event_probe_data *edata = init_data; + int ret; - data->ref++; + if (!mapper) { + mapper = allocate_ftrace_func_mapper(); + if (!mapper) + return -ENODEV; + *data = mapper; + } + + ret = ftrace_func_mapper_add_ip(mapper, ip, edata); + if (ret < 0) + return ret; + + edata->ref++; + + return 0; +} + +static int free_probe_data(void *data) +{ + struct event_probe_data *edata = data; + + edata->ref--; + if (!edata->ref) { + /* Remove soft mode */ + __ftrace_event_enable_disable(edata->file, 0, 1); + trace_event_put_ref(edata->file->event_call); + kfree(edata); + } return 0; } static void -event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip, - void **_data) +event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr, + unsigned long ip, void *data) { - struct event_probe_data **pdata = (struct event_probe_data **)_data; - struct event_probe_data *data = *pdata; + struct ftrace_func_mapper *mapper = data; + struct event_probe_data *edata; - if (WARN_ON_ONCE(data->ref <= 0)) + if (!ip) { + if (!mapper) + return; + free_ftrace_func_mapper(mapper, free_probe_data); return; - - data->ref--; - if (!data->ref) { - /* Remove the SOFT_MODE flag */ - __ftrace_event_enable_disable(data->file, 0, 1); - module_put(data->file->event_call->mod); - kfree(data); } - *pdata = NULL; + + edata = ftrace_func_mapper_remove_ip(mapper, ip); + + if (WARN_ON_ONCE(!edata)) + return; + + if (WARN_ON_ONCE(edata->ref <= 0)) + return; + + free_probe_data(edata); } static struct ftrace_probe_ops event_enable_probe_ops = { @@ -2090,19 +4149,22 @@ static struct ftrace_probe_ops event_disable_count_probe_ops = { }; static int -event_enable_func(struct ftrace_hash *hash, +event_enable_func(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enabled) { - struct trace_array *tr = top_trace_array(); - struct ftrace_event_file *file; + struct trace_event_file *file; struct ftrace_probe_ops *ops; struct event_probe_data *data; + unsigned long count = -1; const char *system; const char *event; char *number; bool enable; int ret; + if (!tr) + return -ENODEV; + /* hash funcs only work with set_ftrace_filter */ if (!enabled || !param) return -EINVAL; @@ -2113,12 +4175,11 @@ event_enable_func(struct ftrace_hash *hash, event = strsep(¶m, ":"); - mutex_lock(&event_mutex); + guard(mutex)(&event_mutex); - ret = -EINVAL; file = find_event_file(tr, system, event); if (!file) - goto out; + return -EINVAL; enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; @@ -2127,73 +4188,62 @@ event_enable_func(struct ftrace_hash *hash, else ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops; - if (glob[0] == '!') { - unregister_ftrace_function_probe_func(glob+1, ops); - ret = 0; - goto out; - } - - ret = -ENOMEM; - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - goto out; - - data->enable = enable; - data->count = -1; - data->file = file; - - if (!param) - goto out_reg; + if (glob[0] == '!') + return unregister_ftrace_function_probe_func(glob+1, tr, ops); - number = strsep(¶m, ":"); + if (param) { + number = strsep(¶m, ":"); - ret = -EINVAL; - if (!strlen(number)) - goto out_free; + if (!strlen(number)) + return -EINVAL; - /* - * We use the callback data field (which is a pointer) - * as our counter. - */ - ret = kstrtoul(number, 0, &data->count); - if (ret) - goto out_free; + /* + * We use the callback data field (which is a pointer) + * as our counter. + */ + ret = kstrtoul(number, 0, &count); + if (ret) + return ret; + } - out_reg: /* Don't let event modules unload while probe registered */ - ret = try_module_get(file->event_call->mod); - if (!ret) { - ret = -EBUSY; - goto out_free; - } + ret = trace_event_try_get_ref(file->event_call); + if (!ret) + return -EBUSY; ret = __ftrace_event_enable_disable(file, 1, 1); if (ret < 0) goto out_put; - ret = register_ftrace_function_probe(glob, ops, data); + + ret = -ENOMEM; + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out_put; + + data->enable = enable; + data->count = count; + data->file = file; + + ret = register_ftrace_function_probe(glob, tr, ops, data); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. * Consider no functions a failure too. */ - if (!ret) { - ret = -ENOENT; - goto out_disable; - } else if (ret < 0) - goto out_disable; + /* Just return zero, not the number of enabled functions */ - ret = 0; - out: - mutex_unlock(&event_mutex); - return ret; + if (ret > 0) + return 0; + + kfree(data); + + if (!ret) + ret = -ENOENT; - out_disable: __ftrace_event_enable_disable(file, 0, 1); out_put: - module_put(file->event_call->mod); - out_free: - kfree(data); - goto out; + trace_event_put_ref(file->event_call); + return ret; } static struct ftrace_func_command event_enable_cmd = { @@ -2223,52 +4273,47 @@ static inline int register_event_cmds(void) { return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ /* - * The top level array has already had its ftrace_event_file - * descriptors created in order to allow for early events to - * be recorded. This function is called after the debugfs has been - * initialized, and we now have to create the files associated - * to the events. + * The top level array and trace arrays created by boot-time tracing + * have already had its trace_event_file descriptors created in order + * to allow for early events to be recorded. + * This function is called after the tracefs has been initialized, + * and we now have to create the files associated to the events. */ -static __init void -__trace_early_add_event_dirs(struct trace_array *tr) +static void __trace_early_add_event_dirs(struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; int ret; list_for_each_entry(file, &tr->events, list) { - ret = event_create_dir(tr->event_dir, file, - &ftrace_event_id_fops, - &ftrace_enable_fops, - &ftrace_event_filter_fops, - &ftrace_event_format_fops); + ret = event_create_dir(tr->event_dir, file); if (ret < 0) - pr_warning("Could not create directory for event %s\n", - file->event_call->name); + pr_warn("Could not create directory for event %s\n", + trace_event_name(file->event_call)); } } /* - * For early boot up, the top trace array requires to have - * a list of events that can be enabled. This must be done before - * the filesystem is set up in order to allow events to be traced - * early. + * For early boot up, the top trace array and the trace arrays created + * by boot-time tracing require to have a list of events that can be + * enabled. This must be done before the filesystem is set up in order + * to allow events to be traced early. */ -static __init void -__trace_early_add_events(struct trace_array *tr) +void __trace_early_add_events(struct trace_array *tr) { - struct ftrace_event_call *call; + struct trace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { /* Early boot up should not have any modules loaded */ - if (WARN_ON_ONCE(call->mod)) + if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) && + WARN_ON_ONCE(call->module)) continue; ret = __trace_early_add_new_event(call, tr); if (ret < 0) - pr_warning("Could not create early event %s\n", - call->name); + pr_warn("Could not create early event %s\n", + trace_event_name(call)); } } @@ -2276,87 +4321,103 @@ __trace_early_add_events(struct trace_array *tr) static void __trace_remove_event_dirs(struct trace_array *tr) { - struct ftrace_event_file *file, *next; + struct trace_event_file *file, *next; - list_for_each_entry_safe(file, next, &tr->events, list) { - list_del(&file->list); - debugfs_remove_recursive(file->dir); - remove_subsystem(file->system); - kmem_cache_free(file_cachep, file); - } + list_for_each_entry_safe(file, next, &tr->events, list) + remove_event_file_dir(file); } -static void -__add_event_to_tracers(struct ftrace_event_call *call, - struct ftrace_module_file_ops *file_ops) +static void __add_event_to_tracers(struct trace_event_call *call) { struct trace_array *tr; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (file_ops) - __trace_add_new_mod_event(call, tr, file_ops); - else - __trace_add_new_event(call, tr, - &ftrace_event_id_fops, - &ftrace_enable_fops, - &ftrace_event_filter_fops, - &ftrace_event_format_fops); - } + list_for_each_entry(tr, &ftrace_trace_arrays, list) + __trace_add_new_event(call, tr); } -static struct notifier_block trace_module_nb = { - .notifier_call = trace_module_notify, - .priority = 0, -}; - -extern struct ftrace_event_call *__start_ftrace_events[]; -extern struct ftrace_event_call *__stop_ftrace_events[]; +extern struct trace_event_call *__start_ftrace_events[]; +extern struct trace_event_call *__stop_ftrace_events[]; static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; static __init int setup_trace_event(char *str) { - strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE); - ring_buffer_expanded = true; - tracing_selftest_disabled = true; + strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE); + trace_set_ring_buffer_expanded(NULL); + disable_tracing_selftest("running event tracing"); return 1; } __setup("trace_event=", setup_trace_event); +static int events_callback(const char *name, umode_t *mode, void **data, + const struct file_operations **fops) +{ + if (strcmp(name, "enable") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &ftrace_tr_enable_fops; + return 1; + } + + if (strcmp(name, "header_page") == 0) { + *mode = TRACE_MODE_READ; + *fops = &ftrace_show_header_page_fops; + + } else if (strcmp(name, "header_event") == 0) { + *mode = TRACE_MODE_READ; + *fops = &ftrace_show_header_event_fops; + } else + return 0; + + return 1; +} + /* Expects to have event_mutex held when called */ static int create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) { - struct dentry *d_events; + struct eventfs_inode *e_events; struct dentry *entry; - - entry = debugfs_create_file("set_event", 0644, parent, - tr, &ftrace_set_event_fops); - if (!entry) { - pr_warning("Could not create debugfs 'set_event' entry\n"); + int nr_entries; + static struct eventfs_entry events_entries[] = { + { + .name = "enable", + .callback = events_callback, + }, + { + .name = "header_page", + .callback = events_callback, + }, + { + .name = "header_event", + .callback = events_callback, + }, + }; + + entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, + tr, &ftrace_set_event_fops); + if (!entry) return -ENOMEM; - } - d_events = debugfs_create_dir("events", parent); - if (!d_events) { - pr_warning("Could not create debugfs 'events' directory\n"); + nr_entries = ARRAY_SIZE(events_entries); + + e_events = eventfs_create_events_dir("events", parent, events_entries, + nr_entries, tr); + if (IS_ERR(e_events)) { + pr_warn("Could not create tracefs 'events' directory\n"); return -ENOMEM; } - /* ring buffer internal formats */ - trace_create_file("header_page", 0444, d_events, - ring_buffer_print_page_header, - &ftrace_show_header_fops); + /* There are not as crucial, just warn if they are not created */ - trace_create_file("header_event", 0444, d_events, - ring_buffer_print_entry_header, - &ftrace_show_header_fops); + trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent, + tr, &ftrace_set_event_pid_fops); - trace_create_file("enable", 0644, d_events, - tr, &ftrace_tr_enable_fops); + trace_create_file("set_event_notrace_pid", + TRACE_MODE_WRITE, parent, tr, + &ftrace_set_event_notrace_pid_fops); - tr->event_dir = d_events; + tr->event_dir = e_events; return 0; } @@ -2368,27 +4429,31 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) * * When a new instance is created, it needs to set up its events * directory, as well as other files associated with events. It also - * creates the event hierachry in the @parent/events directory. + * creates the event hierarchy in the @parent/events directory. * * Returns 0 on success. + * + * Must be called with event_mutex held. */ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) { int ret; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); ret = create_event_toplevel_files(parent, tr); if (ret) - goto out_unlock; + goto out; down_write(&trace_event_sem); - __trace_add_event_dirs(tr); + /* If tr already has the event list, it is initialized in early boot. */ + if (unlikely(!list_empty(&tr->events))) + __trace_early_add_event_dirs(tr); + else + __trace_add_event_dirs(tr); up_write(&trace_event_sem); - out_unlock: - mutex_unlock(&event_mutex); - + out: return ret; } @@ -2401,56 +4466,90 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) { int ret; - mutex_lock(&event_mutex); + guard(mutex)(&event_mutex); ret = create_event_toplevel_files(parent, tr); if (ret) - goto out_unlock; + return ret; down_write(&trace_event_sem); __trace_early_add_event_dirs(tr); up_write(&trace_event_sem); - out_unlock: - mutex_unlock(&event_mutex); - - return ret; + return 0; } +/* Must be called with event_mutex held */ int event_trace_del_tracer(struct trace_array *tr) { - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); + + /* Disable any event triggers and associated soft-disabled events */ + clear_event_triggers(tr); + + /* Clear the pid list */ + __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS); /* Disable any running events */ - __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); + __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0, NULL); + + /* Make sure no more events are being executed */ + tracepoint_synchronize_unregister(); down_write(&trace_event_sem); __trace_remove_event_dirs(tr); - debugfs_remove_recursive(tr->event_dir); + eventfs_remove_events_dir(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; - mutex_unlock(&event_mutex); - return 0; } static __init int event_trace_memsetup(void) { field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC); - file_cachep = KMEM_CACHE(ftrace_event_file, SLAB_PANIC); + file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC); return 0; } +__init void +early_enable_events(struct trace_array *tr, char *buf, bool disable_first) +{ + char *token; + int ret; + + while (true) { + token = strsep(&buf, ","); + + if (!token) + break; + + if (*token) { + /* Restarting syscalls requires that we stop them first */ + if (disable_first) + ftrace_set_clr_event(tr, token, 0); + + ret = ftrace_set_clr_event(tr, token, 1); + if (ret) + pr_warn("Failed to enable trace event: %s\n", token); + } + + /* Put back the comma to allow this to be called again */ + if (buf) + *(buf - 1) = ','; + } +} + static __init int event_trace_enable(void) { struct trace_array *tr = top_trace_array(); - struct ftrace_event_call **iter, *call; - char *buf = bootup_event_buf; - char *token; + struct trace_event_call **iter, *call; int ret; + if (!tr) + return -ENODEV; + for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) { call = *iter; @@ -2459,6 +4558,8 @@ static __init int event_trace_enable(void) list_add(&call->list, &ftrace_events); } + register_trigger_cmds(); + /* * We need the top trace array to have a working set of trace * points at early init, before the debug files and directories @@ -2467,63 +4568,89 @@ static __init int event_trace_enable(void) */ __trace_early_add_events(tr); - while (true) { - token = strsep(&buf, ","); - - if (!token) - break; - if (!*token) - continue; - - ret = ftrace_set_clr_event(tr, token, 1); - if (ret) - pr_warn("Failed to enable trace event: %s\n", token); - } + early_enable_events(tr, bootup_event_buf, false); trace_printk_start_comm(); register_event_cmds(); + return 0; } -static __init int event_trace_init(void) +/* + * event_trace_enable() is called from trace_event_init() first to + * initialize events and perhaps start any events that are on the + * command line. Unfortunately, there are some events that will not + * start this early, like the system call tracepoints that need + * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But + * event_trace_enable() is called before pid 1 starts, and this flag + * is never set, making the syscall tracepoint never get reached, but + * the event is enabled regardless (and not doing anything). + */ +static __init int event_trace_enable_again(void) { struct trace_array *tr; - struct dentry *d_tracer; - struct dentry *entry; - int ret; tr = top_trace_array(); + if (!tr) + return -ENODEV; - d_tracer = tracing_init_dentry(); - if (!d_tracer) - return 0; + early_enable_events(tr, bootup_event_buf, true); - entry = debugfs_create_file("available_events", 0444, d_tracer, - tr, &ftrace_avail_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'available_events' entry\n"); + return 0; +} + +early_initcall(event_trace_enable_again); + +/* Init fields which doesn't related to the tracefs */ +static __init int event_trace_init_fields(void) +{ + if (trace_define_generic_fields()) + pr_warn("tracing: Failed to allocated generic fields"); if (trace_define_common_fields()) - pr_warning("tracing: Failed to allocate common fields"); + pr_warn("tracing: Failed to allocate common fields"); - ret = early_event_add_tracer(d_tracer, tr); + return 0; +} + +__init int event_trace_init(void) +{ + struct trace_array *tr; + int ret; + + tr = top_trace_array(); + if (!tr) + return -ENODEV; + + trace_create_file("available_events", TRACE_MODE_READ, + NULL, tr, &ftrace_avail_fops); + + ret = early_event_add_tracer(NULL, tr); if (ret) return ret; +#ifdef CONFIG_MODULES ret = register_module_notifier(&trace_module_nb); if (ret) - pr_warning("Failed to register trace events module notifier\n"); + pr_warn("Failed to register trace events module notifier\n"); +#endif + + eventdir_initialized = true; return 0; } -early_initcall(event_trace_memsetup); -core_initcall(event_trace_enable); -fs_initcall(event_trace_init); -#ifdef CONFIG_FTRACE_STARTUP_TEST +void __init trace_event_init(void) +{ + event_trace_memsetup(); + init_ftrace_syscalls(); + event_trace_enable(); + event_trace_init_fields(); +} + +#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST static DEFINE_SPINLOCK(test_spinlock); static DEFINE_SPINLOCK(test_spinlock_irq); @@ -2555,8 +4682,11 @@ static __init int event_test_thread(void *unused) kfree(test_malloc); set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) + while (!kthread_should_stop()) { schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); return 0; } @@ -2579,14 +4709,16 @@ static __init void event_test_stuff(void) */ static __init void event_trace_self_tests(void) { - struct ftrace_subsystem_dir *dir; - struct ftrace_event_file *file; - struct ftrace_event_call *call; + struct trace_subsystem_dir *dir; + struct trace_event_file *file; + struct trace_event_call *call; struct event_subsystem *system; struct trace_array *tr; int ret; tr = top_trace_array(); + if (!tr) + return; pr_info("Running tests on trace events:\n"); @@ -2610,14 +4742,14 @@ static __init void event_trace_self_tests(void) continue; #endif - pr_info("Testing event %s: ", call->name); + pr_info("Testing event %s: ", trace_event_name(call)); /* * If an event is already enabled, someone is using * it and the self test should not be on. */ - if (file->flags & FTRACE_EVENT_FL_ENABLED) { - pr_warning("Enabled event during self test!\n"); + if (file->flags & EVENT_FILE_FL_ENABLED) { + pr_warn("Enabled event during self test!\n"); WARN_ON_ONCE(1); continue; } @@ -2643,19 +4775,19 @@ static __init void event_trace_self_tests(void) pr_info("Testing event system %s: ", system->name); - ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1); + ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1, NULL); if (WARN_ON_ONCE(ret)) { - pr_warning("error enabling system %s\n", - system->name); + pr_warn("error enabling system %s\n", + system->name); continue; } event_test_stuff(); - ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0); + ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0, NULL); if (WARN_ON_ONCE(ret)) { - pr_warning("error disabling system %s\n", - system->name); + pr_warn("error disabling system %s\n", + system->name); continue; } @@ -2667,18 +4799,18 @@ static __init void event_trace_self_tests(void) pr_info("Running tests on all trace events:\n"); pr_info("Testing all events: "); - ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1); + ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1, NULL); if (WARN_ON_ONCE(ret)) { - pr_warning("error enabling all events\n"); + pr_warn("error enabling all events\n"); return; } event_test_stuff(); /* reset sysname */ - ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); + ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0, NULL); if (WARN_ON_ONCE(ret)) { - pr_warning("error disabling all events\n"); + pr_warn("error disabling all events\n"); return; } @@ -2689,19 +4821,20 @@ static __init void event_trace_self_tests(void) static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); -static void +static struct trace_event_file event_trace_file __initdata; + +static void __init function_test_events_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *regs) { + struct trace_buffer *buffer; struct ring_buffer_event *event; - struct ring_buffer *buffer; struct ftrace_entry *entry; - unsigned long flags; + unsigned int trace_ctx; long disabled; int cpu; - int pc; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); @@ -2709,19 +4842,17 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, if (disabled != 1) goto out; - local_save_flags(flags); - - event = trace_current_buffer_lock_reserve(&buffer, - TRACE_FN, sizeof(*entry), - flags, pc); + event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, + TRACE_FN, sizeof(*entry), + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); entry->ip = ip; entry->parent_ip = parent_ip; - trace_buffer_unlock_commit(buffer, event, flags, pc); - + event_trigger_unlock_commit(&event_trace_file, buffer, event, + entry, trace_ctx); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); preempt_enable_notrace(); @@ -2730,12 +4861,16 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops trace_ops __initdata = { .func = function_test_events_call, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static __init void event_trace_self_test_with_function(void) { int ret; + + event_trace_file.tr = top_trace_array(); + if (WARN_ON(!event_trace_file.tr)) + return; + ret = register_ftrace_function(&trace_ops); if (WARN_ON(ret < 0)) { pr_info("Failed to enable function tracer for event tests\n"); |
