From 8c1a49aedb73fb2f15aaa32ad9e2e1c4289f45cb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 10 Jan 2014 11:13:54 -0500 Subject: tracing: Pass trace_array to set_flag callback As options (flags) may affect instances instead of being global the set_flag() callbacks need to receive the trace_array descriptor of the instance they will be modifying. Signed-off-by: Steven Rostedt --- kernel/trace/blktrace.c | 3 ++- kernel/trace/trace.c | 18 ++++++++++-------- kernel/trace/trace.h | 3 ++- kernel/trace/trace_functions.c | 3 ++- kernel/trace/trace_functions_graph.c | 3 ++- kernel/trace/trace_irqsoff.c | 6 ++++-- kernel/trace/trace_nop.c | 2 +- kernel/trace/trace_sched_wakeup.c | 6 ++++-- 8 files changed, 27 insertions(+), 17 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b418cb0d7242..0d758ca61933 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1427,7 +1427,8 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) return print_one_line(iter, true); } -static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) +static int +blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { /* don't output context-info for blk_classic output */ if (bit == TRACE_BLK_OPT_CLASSIC) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 815c878f409b..d7dfc7efc4bf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -73,7 +73,8 @@ static struct tracer_flags dummy_tracer_flags = { .opts = dummy_tracer_opt }; -static int dummy_set_flag(u32 old_flags, u32 bit, int set) +static int +dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { return 0; } @@ -3339,13 +3340,14 @@ static int tracing_trace_options_show(struct seq_file *m, void *v) return 0; } -static int __set_tracer_option(struct tracer *trace, +static int __set_tracer_option(struct trace_array *tr, struct tracer_flags *tracer_flags, struct tracer_opt *opts, int neg) { + struct tracer *trace = tr->current_trace; int ret; - ret = trace->set_flag(tracer_flags->val, opts->bit, !neg); + ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); if (ret) return ret; @@ -3357,8 +3359,9 @@ static int __set_tracer_option(struct tracer *trace, } /* Try to assign a tracer specific option */ -static int set_tracer_option(struct tracer *trace, char *cmp, int neg) +static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) { + struct tracer *trace = tr->current_trace; struct tracer_flags *tracer_flags = trace->flags; struct tracer_opt *opts = NULL; int i; @@ -3367,8 +3370,7 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg) opts = &tracer_flags->opts[i]; if (strcmp(cmp, opts->name) == 0) - return __set_tracer_option(trace, trace->flags, - opts, neg); + return __set_tracer_option(tr, trace->flags, opts, neg); } return -EINVAL; @@ -3440,7 +3442,7 @@ static int trace_set_options(struct trace_array *tr, char *option) /* If no option could be set, test the specific tracer options */ if (!trace_options[i]) - ret = set_tracer_option(tr->current_trace, cmp, neg); + ret = set_tracer_option(tr, cmp, neg); mutex_unlock(&trace_types_lock); @@ -5689,7 +5691,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, if (!!(topt->flags->val & topt->opt->bit) != val) { mutex_lock(&trace_types_lock); - ret = __set_tracer_option(topt->tr->current_trace, topt->flags, + ret = __set_tracer_option(topt->tr, topt->flags, topt->opt, !val); mutex_unlock(&trace_types_lock); if (ret) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 02b592f2d4b7..649a23d421c1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -355,7 +355,8 @@ struct tracer { void (*print_header)(struct seq_file *m); enum print_line_t (*print_line)(struct trace_iterator *iter); /* If you handled the flag setting, return 0 */ - int (*set_flag)(u32 old_flags, u32 bit, int set); + int (*set_flag)(struct trace_array *tr, + u32 old_flags, u32 bit, int set); /* Return 0 if OK with change, else return non-zero */ int (*flag_changed)(struct tracer *tracer, u32 mask, int set); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 38fe1483c508..85e517e84f50 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -175,7 +175,8 @@ static void tracing_stop_function_trace(void) unregister_ftrace_function(&trace_ops); } -static int func_set_flag(u32 old_flags, u32 bit, int set) +static int +func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { switch (bit) { case TRACE_FUNC_OPT_STACK: diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 0b99120d395c..deff11200261 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -1476,7 +1476,8 @@ void graph_trace_close(struct trace_iterator *iter) } } -static int func_graph_set_flag(u32 old_flags, u32 bit, int set) +static int +func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { if (bit == TRACE_GRAPH_PRINT_IRQS) ftrace_graph_skip_irqs = !set; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2aefbee93a6d..fd99b0c183ac 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -160,7 +160,8 @@ static struct ftrace_ops trace_ops __read_mostly = #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +static int +irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { int cpu; @@ -266,7 +267,8 @@ __trace_function(struct trace_array *tr, #else #define __trace_function trace_function -static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +static int +irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { return -EINVAL; } diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 394f94417e2f..f3984098c0d7 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -62,7 +62,7 @@ static void nop_trace_reset(struct trace_array *tr) * If you don't implement it, then the flag setting will be * automatically accepted. */ -static int nop_set_flag(u32 old_flags, u32 bit, int set) +static int nop_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { /* * Note that you don't need to update nop_flags.val yourself. diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 6e32635e5e57..f0bbdc261028 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -209,7 +209,8 @@ static void stop_func_tracer(int graph) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static int wakeup_set_flag(u32 old_flags, u32 bit, int set) +static int +wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { if (!(bit & TRACE_DISPLAY_GRAPH)) @@ -311,7 +312,8 @@ __trace_function(struct trace_array *tr, #else #define __trace_function trace_function -static int wakeup_set_flag(u32 old_flags, u32 bit, int set) +static int +wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { return -EINVAL; } -- cgit From bf6065b5c7014ab30383405718c7a6b96d2cbdb2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 10 Jan 2014 17:51:01 -0500 Subject: tracing: Pass trace_array to flag_changed callback As options (flags) may affect instances instead of being global the flag_changed() callbacks need to receive the trace_array descriptor of the instance they will be modifying. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 2 +- kernel/trace/trace_irqsoff.c | 4 +++- kernel/trace/trace_sched_wakeup.c | 4 +++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d7dfc7efc4bf..ee8da93e91e0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3393,7 +3393,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) /* Give the tracer a chance to approve the change */ if (tr->current_trace->flag_changed) - if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled)) + if (tr->current_trace->flag_changed(tr, mask, !!enabled)) return -EINVAL; if (enabled) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 649a23d421c1..36e44732c650 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -358,7 +358,7 @@ struct tracer { int (*set_flag)(struct trace_array *tr, u32 old_flags, u32 bit, int set); /* Return 0 if OK with change, else return non-zero */ - int (*flag_changed)(struct tracer *tracer, + int (*flag_changed)(struct trace_array *tr, u32 mask, int set); struct tracer *next; struct tracer_flags *flags; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index fd99b0c183ac..4bf812f454e6 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -572,8 +572,10 @@ static void irqsoff_function_set(int set) unregister_irqsoff_function(is_graph()); } -static int irqsoff_flag_changed(struct tracer *tracer, u32 mask, int set) +static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set) { + struct tracer *tracer = tr->current_trace; + if (mask & TRACE_ITER_FUNCTION) irqsoff_function_set(set); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index f0bbdc261028..e14da5e97a69 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -179,8 +179,10 @@ static void wakeup_function_set(int set) unregister_wakeup_function(is_graph()); } -static int wakeup_flag_changed(struct tracer *tracer, u32 mask, int set) +static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) { + struct tracer *tracer = tr->current_trace; + if (mask & TRACE_ITER_FUNCTION) wakeup_function_set(set); -- cgit From 607e2ea167e56db84387f3ab97e59a862e101cab Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 6 Nov 2013 22:42:48 -0500 Subject: tracing: Set up infrastructure to allow tracers for instances Currently the tracers (function, function_graph, irqsoff, etc) can only be used by the top level tracing directory (not for instances). This sets up the infrastructure to allow instances to be able to run a separate tracer apart from the what the top level tracing is doing. As tracers need to adapt for being used by instances, the tracers must flag if they can be used by instances or not. Currently only the 'nop' tracer can be used by all instances. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 72 ++++++++++++++++++++++++++++++++++++++---------- kernel/trace/trace.h | 1 + kernel/trace/trace_nop.c | 3 +- 3 files changed, 60 insertions(+), 16 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee8da93e91e0..944cd021aabf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -119,7 +119,7 @@ enum ftrace_dump_mode ftrace_dump_on_oops; /* When set, tracing will stop when a WARN*() is hit */ int __disable_trace_on_warning; -static int tracing_set_tracer(const char *buf); +static int tracing_set_tracer(struct trace_array *tr, const char *buf); #define MAX_TRACER_SIZE 100 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; @@ -1231,7 +1231,7 @@ int register_tracer(struct tracer *type) printk(KERN_INFO "Starting tracer '%s'\n", type->name); /* Do we want this tracer to start on bootup? */ - tracing_set_tracer(type->name); + tracing_set_tracer(&global_trace, type->name); default_bootup_tracer = NULL; /* disable other selftests, since this will break it. */ tracing_selftest_disabled = true; @@ -3122,27 +3122,52 @@ static int tracing_open(struct inode *inode, struct file *file) return ret; } +/* + * Some tracers are not suitable for instance buffers. + * A tracer is always available for the global array (toplevel) + * or if it explicitly states that it is. + */ +static bool +trace_ok_for_array(struct tracer *t, struct trace_array *tr) +{ + return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; +} + +/* Find the next tracer that this trace array may use */ +static struct tracer * +get_tracer_for_array(struct trace_array *tr, struct tracer *t) +{ + while (t && !trace_ok_for_array(t, tr)) + t = t->next; + + return t; +} + static void * t_next(struct seq_file *m, void *v, loff_t *pos) { + struct trace_array *tr = m->private; struct tracer *t = v; (*pos)++; if (t) - t = t->next; + t = get_tracer_for_array(tr, t->next); return t; } static void *t_start(struct seq_file *m, loff_t *pos) { + struct trace_array *tr = m->private; struct tracer *t; loff_t l = 0; mutex_lock(&trace_types_lock); - for (t = trace_types; t && l < *pos; t = t_next(m, t, &l)) - ; + + t = get_tracer_for_array(tr, trace_types); + for (; t && l < *pos; t = t_next(m, t, &l)) + ; return t; } @@ -3177,10 +3202,21 @@ static const struct seq_operations show_traces_seq_ops = { static int show_traces_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + struct seq_file *m; + int ret; + if (tracing_disabled) return -ENODEV; - return seq_open(file, &show_traces_seq_ops); + ret = seq_open(file, &show_traces_seq_ops); + if (ret) + return ret; + + m = file->private_data; + m->private = tr; + + return 0; } static ssize_t @@ -3871,10 +3907,9 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer); static void destroy_trace_option_files(struct trace_option_dentry *topts); -static int tracing_set_tracer(const char *buf) +static int tracing_set_tracer(struct trace_array *tr, const char *buf) { static struct trace_option_dentry *topts; - struct trace_array *tr = &global_trace; struct tracer *t; #ifdef CONFIG_TRACER_MAX_TRACE bool had_max_tr; @@ -3902,6 +3937,12 @@ static int tracing_set_tracer(const char *buf) if (t == tr->current_trace) goto out; + /* Some tracers are only allowed for the top level buffer */ + if (!trace_ok_for_array(t, tr)) { + ret = -EINVAL; + goto out; + } + trace_branch_disable(); tr->current_trace->enabled = false; @@ -3958,6 +3999,7 @@ static ssize_t tracing_set_trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { + struct trace_array *tr = filp->private_data; char buf[MAX_TRACER_SIZE+1]; int i; size_t ret; @@ -3977,7 +4019,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) buf[i] = 0; - err = tracing_set_tracer(buf); + err = tracing_set_tracer(tr, buf); if (err) return err; @@ -6193,6 +6235,12 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) { int cpu; + trace_create_file("available_tracers", 0444, d_tracer, + tr, &show_traces_fops); + + trace_create_file("current_tracer", 0644, d_tracer, + tr, &set_tracer_fops); + trace_create_file("tracing_cpumask", 0644, d_tracer, tr, &tracing_cpumask_fops); @@ -6245,12 +6293,6 @@ static __init int tracer_init_debugfs(void) init_tracer_debugfs(&global_trace, d_tracer); - trace_create_file("available_tracers", 0444, d_tracer, - &global_trace, &show_traces_fops); - - trace_create_file("current_tracer", 0644, d_tracer, - &global_trace, &set_tracer_fops); - #ifdef CONFIG_TRACER_MAX_TRACE trace_create_file("tracing_max_latency", 0644, d_tracer, &tracing_max_latency, &tracing_max_lat_fops); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 36e44732c650..ea51bb2004d2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -364,6 +364,7 @@ struct tracer { struct tracer_flags *flags; bool print_max; bool enabled; + bool allow_instances; #ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; #endif diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index f3984098c0d7..69a5cc94c01a 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -96,6 +96,7 @@ struct tracer nop_trace __read_mostly = .selftest = trace_selftest_startup_nop, #endif .flags = &nop_flags, - .set_flag = nop_set_flag + .set_flag = nop_set_flag, + .allow_instances = true, }; -- cgit From f1b21c9a40704dfdf7b8423c7d2969ea31c9857d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 14 Jan 2014 12:33:33 -0500 Subject: tracing: Only let top level have option files Currently, only the top level instance can have tracing options. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 944cd021aabf..da9543cdbe7a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3968,9 +3968,11 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) free_snapshot(tr); } #endif - destroy_trace_option_files(topts); - - topts = create_trace_option_files(tr, t); + /* Currently, only the top instance has options */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { + destroy_trace_option_files(topts); + topts = create_trace_option_files(tr, t); + } #ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { -- cgit From b7e00a6c53e9134d5cf7631582acaf027a5ded26 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 7 Nov 2013 09:36:25 -0500 Subject: ftrace: Add private data to ftrace_ops Passing data to the function callback was originally done by adding the ftrace_ops in another structure, and using the container_of() to get the field. But this adds a bit more complexity than it is worth, and adding a simple .private field to ftrace_ops makes things a lot easier. But be warned, the .private data should not be freed once it is used unless the ftrace_ops itself has gone through the necessary freeing routines. A simple synchronize_sched() is not enough as functions can be traced that are called outside the view of RCU and all its concoctions. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f4233b195dab..ef1607ed7044 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -105,11 +105,23 @@ enum { FTRACE_OPS_FL_INITIALIZED = 1 << 8, }; +/* + * Note, ftrace_ops can be referenced outside of RCU protection. + * (Although, for perf, the control ops prevent that). If ftrace_ops is + * allocated and not part of kernel core data, the unregistering of it will + * perform a scheduling on all CPUs to make sure that there are no more users. + * Depending on the load of the system that may take a bit of time. + * + * Any private data added must also take care not to be freed and if private + * data is added to a ftrace_ops that is in core code, the user of the + * ftrace_ops must perform a schedule_on_each_cpu() before freeing it. + */ struct ftrace_ops { ftrace_func_t func; struct ftrace_ops *next; unsigned long flags; int __percpu *disabled; + void *private; #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_hash *notrace_hash; struct ftrace_hash *filter_hash; -- cgit From e6435e96ec6f31a05690876a19e63e451f7b37e2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 10 Jan 2014 14:31:31 -0500 Subject: ftrace: Copy ops private to global_ops private If global_ops function is being called directly, instead of the global_ops list function, set the global_ops private to be the same as the ops private that's being called directly. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cd7f76d1eb86..98ae4ed965db 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -244,7 +244,11 @@ static void control_ops_free(struct ftrace_ops *ops) static void update_global_ops(void) { - ftrace_func_t func; + ftrace_func_t func = ftrace_global_list_func; + void *private = NULL; + + /* The list has its own recursion protection. */ + global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; /* * If there's only one function registered, then call that @@ -254,23 +258,17 @@ static void update_global_ops(void) if (ftrace_global_list == &ftrace_list_end || ftrace_global_list->next == &ftrace_list_end) { func = ftrace_global_list->func; + private = ftrace_global_list->private; /* * As we are calling the function directly. * If it does not have recursion protection, * the function_trace_op needs to be updated * accordingly. */ - if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) - global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; - else + if (!(ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)) global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE; - } else { - func = ftrace_global_list_func; - /* The list has its own recursion protection. */ - global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; } - /* If we filter on pids, update to use the pid function */ if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); @@ -278,6 +276,7 @@ static void update_global_ops(void) } global_ops.func = func; + global_ops.private = private; } static void ftrace_sync(struct work_struct *work) -- cgit From 6b450d2533e2c1c71fbe7f1bdce0bb1c9f813030 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 14 Jan 2014 08:43:01 -0500 Subject: tracing: Disable tracers before deletion of instance When an instance is about to be deleted, make sure the tracer is set to nop. If it isn't reset the tracer and set it to the nop tracer, otherwise memory leaks and bad pointers may result. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index da9543cdbe7a..7d5913bb46e8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3907,6 +3907,23 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer); static void destroy_trace_option_files(struct trace_option_dentry *topts); +/* + * Used to clear out the tracer before deletion of an instance. + * Must have trace_types_lock held. + */ +static void tracing_set_nop(struct trace_array *tr) +{ + if (tr->current_trace == &nop_trace) + return; + + tr->current_trace->enabled = false; + + if (tr->current_trace->reset) + tr->current_trace->reset(tr); + + tr->current_trace = &nop_trace; +} + static int tracing_set_tracer(struct trace_array *tr, const char *buf) { static struct trace_option_dentry *topts; @@ -6142,6 +6159,7 @@ static int instance_delete(const char *name) list_del(&tr->list); + tracing_set_nop(tr); event_trace_del_tracer(tr); debugfs_remove_recursive(tr->dir); free_percpu(tr->trace_buffer.data); -- cgit From 50512ab576e1ce29953c9259e1f36ce16f350f20 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 14 Jan 2014 08:52:35 -0500 Subject: tracing: Convert tracer->enabled to counter As tracers will soon be used by instances, the tracer enabled field needs to be converted to a counter instead of a boolean. This counter is protected by the trace_types_lock mutex. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 +++--- kernel/trace/trace.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7d5913bb46e8..f9f22c435036 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3916,7 +3916,7 @@ static void tracing_set_nop(struct trace_array *tr) if (tr->current_trace == &nop_trace) return; - tr->current_trace->enabled = false; + tr->current_trace->enabled--; if (tr->current_trace->reset) tr->current_trace->reset(tr); @@ -3962,7 +3962,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) trace_branch_disable(); - tr->current_trace->enabled = false; + tr->current_trace->enabled--; if (tr->current_trace->reset) tr->current_trace->reset(tr); @@ -4006,7 +4006,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) } tr->current_trace = t; - tr->current_trace->enabled = true; + tr->current_trace->enabled++; trace_branch_enable(tr); out: mutex_unlock(&trace_types_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ea51bb2004d2..86915b220bbe 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -362,8 +362,8 @@ struct tracer { u32 mask, int set); struct tracer *next; struct tracer_flags *flags; + int enabled; bool print_max; - bool enabled; bool allow_instances; #ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; -- cgit From f20a580627f43e73e4e57cb37e3864080ca06088 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 7 Nov 2013 20:08:58 -0500 Subject: ftrace: Allow instances to use function tracing Allow instances (sub-buffers) to enable function tracing. Each instance will have its own function tracing capability. For now, instances will not have function stack tracing, or will they be able to pick and choose what functions they can trace. Picking and choosing their own functions will come later. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 5 ++ kernel/trace/trace_functions.c | 116 +++++++++++++++++++++++++++-------------- 2 files changed, 81 insertions(+), 40 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 86915b220bbe..35cca055da0f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -210,6 +210,11 @@ struct trace_array { struct list_head events; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ int ref; +#ifdef CONFIG_FUNCTION_TRACER + struct ftrace_ops *ops; + /* function tracing enabled */ + int function_enabled; +#endif }; enum { diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 85e517e84f50..3f8dc1ce8b9c 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -13,33 +13,83 @@ #include #include #include +#include #include #include "trace.h" -/* function tracing enabled */ -static int ftrace_function_enabled; +static void tracing_start_function_trace(struct trace_array *tr); +static void tracing_stop_function_trace(struct trace_array *tr); +static void +function_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs); +static void +function_stack_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs); +static struct ftrace_ops trace_ops; +static struct ftrace_ops trace_stack_ops; +static struct tracer_flags func_flags; + +/* Our option */ +enum { + TRACE_FUNC_OPT_STACK = 0x1, +}; + +static int allocate_ftrace_ops(struct trace_array *tr) +{ + struct ftrace_ops *ops; -static struct trace_array *func_trace; + ops = kzalloc(sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; -static void tracing_start_function_trace(void); -static void tracing_stop_function_trace(void); + /* Currently only the non stack verision is supported */ + ops->func = function_trace_call; + ops->flags = FTRACE_OPS_FL_RECURSION_SAFE; + + tr->ops = ops; + ops->private = tr; + return 0; +} static int function_trace_init(struct trace_array *tr) { - func_trace = tr; + struct ftrace_ops *ops; + int ret; + + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { + /* There's only one global tr */ + if (!trace_ops.private) { + trace_ops.private = tr; + trace_stack_ops.private = tr; + } + + if (func_flags.val & TRACE_FUNC_OPT_STACK) + ops = &trace_stack_ops; + else + ops = &trace_ops; + tr->ops = ops; + } else { + ret = allocate_ftrace_ops(tr); + if (ret) + return ret; + } + tr->trace_buffer.cpu = get_cpu(); put_cpu(); tracing_start_cmdline_record(); - tracing_start_function_trace(); + tracing_start_function_trace(tr); return 0; } static void function_trace_reset(struct trace_array *tr) { - tracing_stop_function_trace(); + tracing_stop_function_trace(tr); tracing_stop_cmdline_record(); + if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) + kfree(tr->ops); + tr->ops = NULL; } static void function_trace_start(struct trace_array *tr) @@ -47,25 +97,18 @@ static void function_trace_start(struct trace_array *tr) tracing_reset_online_cpus(&tr->trace_buffer); } -/* Our option */ -enum { - TRACE_FUNC_OPT_STACK = 0x1, -}; - -static struct tracer_flags func_flags; - static void function_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { - struct trace_array *tr = func_trace; + struct trace_array *tr = op->private; struct trace_array_cpu *data; unsigned long flags; int bit; int cpu; int pc; - if (unlikely(!ftrace_function_enabled)) + if (unlikely(!tr->function_enabled)) return; pc = preempt_count(); @@ -91,14 +134,14 @@ static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { - struct trace_array *tr = func_trace; + struct trace_array *tr = op->private; struct trace_array_cpu *data; unsigned long flags; long disabled; int cpu; int pc; - if (unlikely(!ftrace_function_enabled)) + if (unlikely(!tr->function_enabled)) return; /* @@ -128,7 +171,6 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, local_irq_restore(flags); } - static struct ftrace_ops trace_ops __read_mostly = { .func = function_trace_call, @@ -153,26 +195,17 @@ static struct tracer_flags func_flags = { .opts = func_opts }; -static void tracing_start_function_trace(void) +static void tracing_start_function_trace(struct trace_array *tr) { - ftrace_function_enabled = 0; - - if (func_flags.val & TRACE_FUNC_OPT_STACK) - register_ftrace_function(&trace_stack_ops); - else - register_ftrace_function(&trace_ops); - - ftrace_function_enabled = 1; + tr->function_enabled = 0; + register_ftrace_function(tr->ops); + tr->function_enabled = 1; } -static void tracing_stop_function_trace(void) +static void tracing_stop_function_trace(struct trace_array *tr) { - ftrace_function_enabled = 0; - - if (func_flags.val & TRACE_FUNC_OPT_STACK) - unregister_ftrace_function(&trace_stack_ops); - else - unregister_ftrace_function(&trace_ops); + tr->function_enabled = 0; + unregister_ftrace_function(tr->ops); } static int @@ -184,12 +217,14 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK)) break; + unregister_ftrace_function(tr->ops); + if (set) { - unregister_ftrace_function(&trace_ops); - register_ftrace_function(&trace_stack_ops); + tr->ops = &trace_stack_ops; + register_ftrace_function(tr->ops); } else { - unregister_ftrace_function(&trace_stack_ops); - register_ftrace_function(&trace_ops); + tr->ops = &trace_ops; + register_ftrace_function(tr->ops); } break; @@ -209,6 +244,7 @@ static struct tracer function_trace __tracer_data = .wait_pipe = poll_wait_pipe, .flags = &func_flags, .set_flag = func_set_flag, + .allow_instances = true, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function, #endif -- cgit From e3b3e2e847080e3cc14bee778c6ced3d59bfd76c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 11 Nov 2013 23:07:14 -0500 Subject: ftrace: Pass in global_ops for use with filtering files In preparation for having the function tracing instances be able to filter on functions, the generic filter functions must first be converted to take in the global_ops as a parameter. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 98ae4ed965db..2b3e23991c8a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2870,7 +2870,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, static int ftrace_filter_open(struct inode *inode, struct file *file) { - return ftrace_regex_open(&global_ops, + struct ftrace_ops *ops = inode->i_private; + + return ftrace_regex_open(ops, FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH, inode, file); } @@ -2878,7 +2880,9 @@ ftrace_filter_open(struct inode *inode, struct file *file) static int ftrace_notrace_open(struct inode *inode, struct file *file) { - return ftrace_regex_open(&global_ops, FTRACE_ITER_NOTRACE, + struct ftrace_ops *ops = inode->i_private; + + return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE, inode, file); } @@ -4118,10 +4122,10 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) d_tracer, NULL, &ftrace_enabled_fops); trace_create_file("set_ftrace_filter", 0644, d_tracer, - NULL, &ftrace_filter_fops); + &global_ops, &ftrace_filter_fops); trace_create_file("set_ftrace_notrace", 0644, d_tracer, - NULL, &ftrace_notrace_fops); + &global_ops, &ftrace_notrace_fops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER trace_create_file("set_graph_function", 0444, d_tracer, -- cgit From 591dffdade9f07692a7dd3ed16830ec24e901ece Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 10 Jan 2014 16:17:45 -0500 Subject: ftrace: Allow for function tracing instance to filter functions Create a "set_ftrace_filter" and "set_ftrace_notrace" files in the instance directories to let users filter of functions to trace for the given instance. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 39 ++++++++++++++++++++++++++++++++++----- kernel/trace/trace.c | 4 ++++ kernel/trace/trace.h | 25 ++++++++++++++++++++++++- kernel/trace/trace_functions.c | 40 ++++++++++++++++++++++++++++++++-------- 5 files changed, 96 insertions(+), 14 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ef1607ed7044..e6141be2fad5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -92,6 +92,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * STUB - The ftrace_ops is just a place holder. * INITIALIZED - The ftrace_ops has already been initialized (first use time * register_ftrace_function() is called, it will initialized the ops) + * DELETED - The ops are being deleted, do not let them be registered again. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -103,6 +104,7 @@ enum { FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, FTRACE_OPS_FL_STUB = 1 << 7, FTRACE_OPS_FL_INITIALIZED = 1 << 8, + FTRACE_OPS_FL_DELETED = 1 << 9, }; /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2b3e23991c8a..dcee546f21bc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -436,6 +436,9 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, static int __register_ftrace_function(struct ftrace_ops *ops) { + if (ops->flags & FTRACE_OPS_FL_DELETED) + return -EINVAL; + if (FTRACE_WARN_ON(ops == &global_ops)) return -EINVAL; @@ -4112,6 +4115,36 @@ static const struct file_operations ftrace_graph_notrace_fops = { }; #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +void ftrace_create_filter_files(struct ftrace_ops *ops, + struct dentry *parent) +{ + + trace_create_file("set_ftrace_filter", 0644, parent, + ops, &ftrace_filter_fops); + + trace_create_file("set_ftrace_notrace", 0644, parent, + ops, &ftrace_notrace_fops); +} + +/* + * The name "destroy_filter_files" is really a misnomer. Although + * in the future, it may actualy delete the files, but this is + * really intended to make sure the ops passed in are disabled + * and that when this function returns, the caller is free to + * free the ops. + * + * The "destroy" name is only to match the "create" name that this + * should be paired with. + */ +void ftrace_destroy_filter_files(struct ftrace_ops *ops) +{ + mutex_lock(&ftrace_lock); + if (ops->flags & FTRACE_OPS_FL_ENABLED) + ftrace_shutdown(ops, 0); + ops->flags |= FTRACE_OPS_FL_DELETED; + mutex_unlock(&ftrace_lock); +} + static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { @@ -4121,11 +4154,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) trace_create_file("enabled_functions", 0444, d_tracer, NULL, &ftrace_enabled_fops); - trace_create_file("set_ftrace_filter", 0644, d_tracer, - &global_ops, &ftrace_filter_fops); - - trace_create_file("set_ftrace_notrace", 0644, d_tracer, - &global_ops, &ftrace_notrace_fops); + ftrace_create_filter_files(&global_ops, d_tracer); #ifdef CONFIG_FUNCTION_GRAPH_TRACER trace_create_file("set_graph_function", 0444, d_tracer, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f9f22c435036..d95ec2876bbb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6161,6 +6161,7 @@ static int instance_delete(const char *name) tracing_set_nop(tr); event_trace_del_tracer(tr); + ftrace_destroy_function_files(tr); debugfs_remove_recursive(tr->dir); free_percpu(tr->trace_buffer.data); ring_buffer_free(tr->trace_buffer.buffer); @@ -6291,6 +6292,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("tracing_on", 0644, d_tracer, tr, &rb_simple_fops); + if (ftrace_create_function_files(tr, d_tracer)) + WARN(1, "Could not allocate function filter files"); + #ifdef CONFIG_TRACER_SNAPSHOT trace_create_file("snapshot", 0644, d_tracer, tr, &snapshot_fops); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 35cca055da0f..ffc314b7e92b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -819,13 +819,36 @@ static inline int ftrace_trace_task(struct task_struct *task) return test_tsk_trace_trace(task); } extern int ftrace_is_dead(void); +int ftrace_create_function_files(struct trace_array *tr, + struct dentry *parent); +void ftrace_destroy_function_files(struct trace_array *tr); #else static inline int ftrace_trace_task(struct task_struct *task) { return 1; } static inline int ftrace_is_dead(void) { return 0; } -#endif +static inline int +ftrace_create_function_files(struct trace_array *tr, + struct dentry *parent) +{ + return 0; +} +static inline void ftrace_destroy_function_files(struct trace_array *tr) { } +#endif /* CONFIG_FUNCTION_TRACER */ + +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) +void ftrace_create_filter_files(struct ftrace_ops *ops, + struct dentry *parent); +void ftrace_destroy_filter_files(struct ftrace_ops *ops); +#else +/* + * The ops parameter passed in is usually undefined. + * This must be a macro. + */ +#define ftrace_create_filter_files(ops, parent) do { } while (0) +#define ftrace_destroy_filter_files(ops) do { } while (0) +#endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */ int ftrace_event_is_function(struct ftrace_event_call *call); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 3f8dc1ce8b9c..5b781d2be383 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -52,10 +52,34 @@ static int allocate_ftrace_ops(struct trace_array *tr) return 0; } + +int ftrace_create_function_files(struct trace_array *tr, + struct dentry *parent) +{ + int ret; + + /* The top level array uses the "global_ops". */ + if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { + ret = allocate_ftrace_ops(tr); + if (ret) + return ret; + } + + ftrace_create_filter_files(tr->ops, parent); + + return 0; +} + +void ftrace_destroy_function_files(struct trace_array *tr) +{ + ftrace_destroy_filter_files(tr->ops); + kfree(tr->ops); + tr->ops = NULL; +} + static int function_trace_init(struct trace_array *tr) { struct ftrace_ops *ops; - int ret; if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { /* There's only one global tr */ @@ -69,10 +93,13 @@ static int function_trace_init(struct trace_array *tr) else ops = &trace_ops; tr->ops = ops; - } else { - ret = allocate_ftrace_ops(tr); - if (ret) - return ret; + } else if (!tr->ops) { + /* + * Instance trace_arrays get their ops allocated + * at instance creation. Unless it failed + * the allocation. + */ + return -ENOMEM; } tr->trace_buffer.cpu = get_cpu(); @@ -87,9 +114,6 @@ static void function_trace_reset(struct trace_array *tr) { tracing_stop_function_trace(tr); tracing_stop_cmdline_record(); - if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) - kfree(tr->ops); - tr->ops = NULL; } static void function_trace_start(struct trace_array *tr) -- cgit From a43b97043048eac1686f409af7ad3bb8071b9d83 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 17 Jan 2014 17:08:36 +0900 Subject: tracing/uprobes: Rename uprobe_{trace,perf}_print() functions The uprobe_{trace,perf}_print functions are misnomers since what they do is not printing. There's also a real print function named print_uprobe_event() so they'll only increase confusion IMHO. Rename them with double underscores to follow convention of kprobe. Link: http://lkml.kernel.org/r/1389946120-19610-2-git-send-email-namhyung@kernel.org Reviewed-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Srikar Dronamraju Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 79e52d93860b..c5d2612bf233 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -758,7 +758,7 @@ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) mutex_unlock(&ucb->mutex); } -static void uprobe_trace_print(struct trace_uprobe *tu, +static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; @@ -807,14 +807,14 @@ out: static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) { if (!is_ret_probe(tu)) - uprobe_trace_print(tu, 0, regs); + __uprobe_trace_func(tu, 0, regs); return 0; } static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { - uprobe_trace_print(tu, func, regs); + __uprobe_trace_func(tu, func, regs); } /* Event entry printers */ @@ -1014,7 +1014,7 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc, return ret; } -static void uprobe_perf_print(struct trace_uprobe *tu, +static void __uprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct ftrace_event_call *call = &tu->tp.call; @@ -1078,14 +1078,14 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) return UPROBE_HANDLER_REMOVE; if (!is_ret_probe(tu)) - uprobe_perf_print(tu, 0, regs); + __uprobe_perf_func(tu, 0, regs); return 0; } static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { - uprobe_perf_print(tu, func, regs); + __uprobe_perf_func(tu, func, regs); } #endif /* CONFIG_PERF_EVENTS */ -- cgit From dd9fa555d7bbfcc7dbc63eb744806e9f6cb62e9f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 17 Jan 2014 17:08:37 +0900 Subject: tracing/uprobes: Move argument fetching to uprobe_dispatcher() A single uprobe event might serve different users like ftrace and perf. And this is especially important for upcoming multi buffer support. But in this case it'll fetch (same) data from userspace multiple times. So move it to the beginning of the dispatcher function and reuse it for each users. Link: http://lkml.kernel.org/r/1389946120-19610-3-git-send-email-namhyung@kernel.org Reviewed-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: zhangwei(Jovi) Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 93 +++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c5d2612bf233..d83155e0da78 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -759,30 +759,25 @@ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) } static void __uprobe_trace_func(struct trace_uprobe *tu, - unsigned long func, struct pt_regs *regs) + unsigned long func, struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; - struct uprobe_cpu_buffer *ucb; void *data; - int size, dsize, esize; + int size, esize; struct ftrace_event_call *call = &tu->tp.call; - dsize = __get_data_size(&tu->tp, regs); - esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - - if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu->tp.size + dsize > PAGE_SIZE)) + if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) return; - ucb = uprobe_buffer_get(); - store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); - + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; event = trace_current_buffer_lock_reserve(&buffer, call->event.type, size, 0, 0); if (!event) - goto out; + return; entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { @@ -798,23 +793,22 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, if (!call_filter_check_discard(call, entry, buffer, event)) trace_buffer_unlock_commit(buffer, event, 0, 0); - -out: - uprobe_buffer_put(ucb); } /* uprobe handler */ -static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) +static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { if (!is_ret_probe(tu)) - __uprobe_trace_func(tu, 0, regs); + __uprobe_trace_func(tu, 0, regs, ucb, dsize); return 0; } static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, - struct pt_regs *regs) + struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { - __uprobe_trace_func(tu, func, regs); + __uprobe_trace_func(tu, func, regs, ucb, dsize); } /* Event entry printers */ @@ -1015,30 +1009,23 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc, } static void __uprobe_perf_func(struct trace_uprobe *tu, - unsigned long func, struct pt_regs *regs) + unsigned long func, struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { struct ftrace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; struct hlist_head *head; - struct uprobe_cpu_buffer *ucb; void *data; - int size, dsize, esize; + int size, esize; int rctx; - dsize = __get_data_size(&tu->tp, regs); esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - if (WARN_ON_ONCE(!uprobe_cpu_buffer)) - return; - size = esize + tu->tp.size + dsize; size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return; - ucb = uprobe_buffer_get(); - store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); - preempt_disable(); head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) @@ -1068,24 +1055,25 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); out: preempt_enable(); - uprobe_buffer_put(ucb); } /* uprobe profile handler */ -static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) +static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) return UPROBE_HANDLER_REMOVE; if (!is_ret_probe(tu)) - __uprobe_perf_func(tu, 0, regs); + __uprobe_perf_func(tu, 0, regs, ucb, dsize); return 0; } static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, - struct pt_regs *regs) + struct pt_regs *regs, + struct uprobe_cpu_buffer *ucb, int dsize) { - __uprobe_perf_func(tu, func, regs); + __uprobe_perf_func(tu, func, regs, ucb, dsize); } #endif /* CONFIG_PERF_EVENTS */ @@ -1127,8 +1115,11 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) { struct trace_uprobe *tu; struct uprobe_dispatch_data udd; + struct uprobe_cpu_buffer *ucb; + int dsize, esize; int ret = 0; + tu = container_of(con, struct trace_uprobe, consumer); tu->nhit++; @@ -1137,13 +1128,29 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) current->utask->vaddr = (unsigned long) &udd; +#ifdef CONFIG_PERF_EVENTS + if ((tu->tp.flags & TP_FLAG_TRACE) == 0 && + !uprobe_perf_filter(&tu->consumer, 0, current->mm)) + return UPROBE_HANDLER_REMOVE; +#endif + + if (WARN_ON_ONCE(!uprobe_cpu_buffer)) + return 0; + + dsize = __get_data_size(&tu->tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + ucb = uprobe_buffer_get(); + store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); + if (tu->tp.flags & TP_FLAG_TRACE) - ret |= uprobe_trace_func(tu, regs); + ret |= uprobe_trace_func(tu, regs, ucb, dsize); #ifdef CONFIG_PERF_EVENTS if (tu->tp.flags & TP_FLAG_PROFILE) - ret |= uprobe_perf_func(tu, regs); + ret |= uprobe_perf_func(tu, regs, ucb, dsize); #endif + uprobe_buffer_put(ucb); return ret; } @@ -1152,6 +1159,8 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, { struct trace_uprobe *tu; struct uprobe_dispatch_data udd; + struct uprobe_cpu_buffer *ucb; + int dsize, esize; tu = container_of(con, struct trace_uprobe, consumer); @@ -1160,13 +1169,23 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, current->utask->vaddr = (unsigned long) &udd; + if (WARN_ON_ONCE(!uprobe_cpu_buffer)) + return 0; + + dsize = __get_data_size(&tu->tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + ucb = uprobe_buffer_get(); + store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); + if (tu->tp.flags & TP_FLAG_TRACE) - uretprobe_trace_func(tu, func, regs); + uretprobe_trace_func(tu, func, regs, ucb, dsize); #ifdef CONFIG_PERF_EVENTS if (tu->tp.flags & TP_FLAG_PROFILE) - uretprobe_perf_func(tu, func, regs); + uretprobe_perf_func(tu, func, regs, ucb, dsize); #endif + uprobe_buffer_put(ucb); return 0; } -- cgit From 70ed91c6ec7f8bf20369634017d887d48ac979d2 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Fri, 17 Jan 2014 17:08:38 +0900 Subject: tracing/uprobes: Support ftrace_event_file base multibuffer Support multi-buffer on uprobe-based dynamic events by using ftrace_event_file. This patch is based kprobe-based dynamic events multibuffer support work initially, commited by Masami(commit 41a7dd420c), but revised as below: Oleg changed the kprobe-based multibuffer design from array-pointers of ftrace_event_file into simple list, so this patch also change to the list design. rcu_read_lock/unlock added into uprobe_trace_func/uretprobe_trace_func, to synchronize with ftrace_event_file list add and delete. Even though we allow multi-uprobes instances now, but TP_FLAG_PROFILE/TP_FLAG_TRACE are still mutually exclusive in probe_event_enable currently, this means we cannot allow one user is using uprobe-tracer, and another user is using perf-probe on same uprobe concurrently. (Perhaps this will be fix in future, kprobe don't have this limitation now) Link: http://lkml.kernel.org/r/1389946120-19610-4-git-send-email-namhyung@kernel.org Reviewed-by: Masami Hiramatsu Reviewed-by: Oleg Nesterov Cc: Ingo Molnar Cc: Frederic Weisbecker Cc: Srikar Dronamraju Signed-off-by: zhangwei(Jovi) Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 17 ------- kernel/trace/trace_probe.h | 17 +++++++ kernel/trace/trace_uprobe.c | 105 +++++++++++++++++++++++++++++++++++--------- 3 files changed, 101 insertions(+), 38 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index bdbae450c13e..d021d21dd150 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -35,11 +35,6 @@ struct trace_kprobe { struct trace_probe tp; }; -struct event_file_link { - struct ftrace_event_file *file; - struct list_head list; -}; - #define SIZEOF_TRACE_KPROBE(n) \ (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) @@ -387,18 +382,6 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file) return ret; } -static struct event_file_link * -find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) -{ - struct event_file_link *link; - - list_for_each_entry(link, &tp->files, list) - if (link->file == file) - return link; - - return NULL; -} - /* * Disable trace_probe * if the file is NULL, disable "perf" handler, or disable "trace" handler. diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index b73574a5f429..fb1ab5dfbd42 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -288,6 +288,11 @@ struct trace_probe { struct probe_arg args[]; }; +struct event_file_link { + struct ftrace_event_file *file; + struct list_head list; +}; + static inline bool trace_probe_is_enabled(struct trace_probe *tp) { return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); @@ -316,6 +321,18 @@ static inline int is_good_name(const char *name) return 1; } +static inline struct event_file_link * +find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) +{ + struct event_file_link *link; + + list_for_each_entry(link, &tp->files, list) + if (link->file == file) + return link; + + return NULL; +} + extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, struct probe_arg *parg, bool is_return, bool is_kprobe); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d83155e0da78..349c6df9e332 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -260,6 +260,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) goto error; INIT_LIST_HEAD(&tu->list); + INIT_LIST_HEAD(&tu->tp.files); tu->consumer.handler = uprobe_dispatcher; if (is_ret) tu->consumer.ret_handler = uretprobe_dispatcher; @@ -760,7 +761,8 @@ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb, int dsize, + struct ftrace_event_file *ftrace_file) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; @@ -769,13 +771,15 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, int size, esize; struct ftrace_event_call *call = &tu->tp.call; + WARN_ON(call != ftrace_file->event_call); + if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; - event = trace_current_buffer_lock_reserve(&buffer, call->event.type, - size, 0, 0); + event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + call->event.type, size, 0, 0); if (!event) return; @@ -799,8 +803,16 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, struct uprobe_cpu_buffer *ucb, int dsize) { - if (!is_ret_probe(tu)) - __uprobe_trace_func(tu, 0, regs, ucb, dsize); + struct event_file_link *link; + + if (is_ret_probe(tu)) + return 0; + + rcu_read_lock(); + list_for_each_entry_rcu(link, &tu->tp.files, list) + __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file); + rcu_read_unlock(); + return 0; } @@ -808,7 +820,12 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer *ucb, int dsize) { - __uprobe_trace_func(tu, func, regs, ucb, dsize); + struct event_file_link *link; + + rcu_read_lock(); + list_for_each_entry_rcu(link, &tu->tp.files, list) + __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file); + rcu_read_unlock(); } /* Event entry printers */ @@ -855,12 +872,31 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self, struct mm_struct *mm); static int -probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter) +probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, + filter_func_t filter) { - int ret = 0; + bool enabled = trace_probe_is_enabled(&tu->tp); + struct event_file_link *link = NULL; + int ret; + + if (file) { + if (tu->tp.flags & TP_FLAG_PROFILE) + return -EINTR; - if (trace_probe_is_enabled(&tu->tp)) - return -EINTR; + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + link->file = file; + list_add_tail_rcu(&link->list, &tu->tp.files); + + tu->tp.flags |= TP_FLAG_TRACE; + } else { + if (tu->tp.flags & TP_FLAG_TRACE) + return -EINTR; + + tu->tp.flags |= TP_FLAG_PROFILE; + } ret = uprobe_buffer_enable(); if (ret < 0) @@ -868,24 +904,49 @@ probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter) WARN_ON(!uprobe_filter_is_empty(&tu->filter)); - tu->tp.flags |= flag; + if (enabled) + return 0; + tu->consumer.filter = filter; ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); - if (ret) - tu->tp.flags &= ~flag; + if (ret) { + if (file) { + list_del(&link->list); + kfree(link); + tu->tp.flags &= ~TP_FLAG_TRACE; + } else + tu->tp.flags &= ~TP_FLAG_PROFILE; + } return ret; } -static void probe_event_disable(struct trace_uprobe *tu, int flag) +static void +probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file) { if (!trace_probe_is_enabled(&tu->tp)) return; + if (file) { + struct event_file_link *link; + + link = find_event_file_link(&tu->tp, file); + if (!link) + return; + + list_del_rcu(&link->list); + /* synchronize with u{,ret}probe_trace_func */ + synchronize_sched(); + kfree(link); + + if (!list_empty(&tu->tp.files)) + return; + } + WARN_ON(!uprobe_filter_is_empty(&tu->filter)); uprobe_unregister(tu->inode, tu->offset, &tu->consumer); - tu->tp.flags &= ~flag; + tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE; uprobe_buffer_disable(); } @@ -1077,25 +1138,27 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, } #endif /* CONFIG_PERF_EVENTS */ -static -int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) +static int +trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, + void *data) { struct trace_uprobe *tu = event->data; + struct ftrace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return probe_event_enable(tu, TP_FLAG_TRACE, NULL); + return probe_event_enable(tu, file, NULL); case TRACE_REG_UNREGISTER: - probe_event_disable(tu, TP_FLAG_TRACE); + probe_event_disable(tu, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter); + return probe_event_enable(tu, NULL, uprobe_perf_filter); case TRACE_REG_PERF_UNREGISTER: - probe_event_disable(tu, TP_FLAG_PROFILE); + probe_event_disable(tu, NULL); return 0; case TRACE_REG_PERF_OPEN: -- cgit From ca3b162021a421b38a9cd7b66555b9b01568dc9d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 17 Jan 2014 17:08:39 +0900 Subject: tracing/uprobes: Support event triggering Add support for event triggering to uprobes. This is same as kprobes support added by Tom (plus cleanup by Steven). Link: http://lkml.kernel.org/r/1389946120-19610-5-git-send-email-namhyung@kernel.org Reviewed-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: zhangwei(Jovi) Cc: Tom Zanussi Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 349c6df9e332..01fcb0db75cb 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -776,6 +776,9 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) return; + if (ftrace_trigger_soft_disabled(ftrace_file)) + return; + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, @@ -795,8 +798,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, 0, 0); + event_trigger_unlock_commit(ftrace_file, buffer, event, entry, 0, 0); } /* uprobe handler */ -- cgit From 43fe98913c9f67e3b523615ee3316f9520a623e0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 17 Jan 2014 17:08:40 +0900 Subject: tracing/uprobes: Support mix of ftrace and perf It seems there's no reason to prevent mixed used of ftrace and perf for a single uprobe event. At least the kprobes already support it. Link: http://lkml.kernel.org/r/1389946120-19610-6-git-send-email-namhyung@kernel.org Reviewed-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: zhangwei(Jovi) Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 01fcb0db75cb..e4473367e7a4 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -882,9 +882,6 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, int ret; if (file) { - if (tu->tp.flags & TP_FLAG_PROFILE) - return -EINTR; - link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; @@ -893,12 +890,8 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, list_add_tail_rcu(&link->list, &tu->tp.files); tu->tp.flags |= TP_FLAG_TRACE; - } else { - if (tu->tp.flags & TP_FLAG_TRACE) - return -EINTR; - + } else tu->tp.flags |= TP_FLAG_PROFILE; - } ret = uprobe_buffer_enable(); if (ret < 0) -- cgit From e1e232ca6b8faa210e5509f17d55519b4392524f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 10 Feb 2014 23:38:46 -0500 Subject: tracing: Add trace_clock= kernel parameter Being able to change the trace clock at boot can be advantageous if you need a better source of when things happen across CPUs. The default trace clock is the fastest, but it uses local clocks which may not be synced across CPUs and it does not let you know when events took place with respect to events on other CPUs. The global trace clock can help in this case, and if you do not care about timings, the counter "clock" is the best, as that is just a simple atomic counter that is incremented for every event. Usage is to add "trace_clock=counter" on the kernel command line. You can replace counter with "global" or any of the clocks listed in /sys/kernel/debug/tracing/trace_clock Suggested-by: Thomas Gleixner Tested-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Appreciated-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 61 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d95ec2876bbb..c90f55d80f86 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -181,6 +181,17 @@ static int __init set_trace_boot_options(char *str) } __setup("trace_options=", set_trace_boot_options); +static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; +static char *trace_boot_clock __initdata; + +static int __init set_trace_boot_clock(char *str) +{ + strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); + trace_boot_clock = trace_boot_clock_buf; + return 0; +} +__setup("trace_clock=", set_trace_boot_clock); + unsigned long long ns2usecs(cycle_t nsec) { @@ -4746,25 +4757,10 @@ static int tracing_clock_show(struct seq_file *m, void *v) return 0; } -static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *fpos) +static int tracing_set_clock(struct trace_array *tr, const char *clockstr) { - struct seq_file *m = filp->private_data; - struct trace_array *tr = m->private; - char buf[64]; - const char *clockstr; int i; - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - clockstr = strstrip(buf); - for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { if (strcmp(trace_clocks[i].name, clockstr) == 0) break; @@ -4792,6 +4788,32 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, mutex_unlock(&trace_types_lock); + return 0; +} + +static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *fpos) +{ + struct seq_file *m = filp->private_data; + struct trace_array *tr = m->private; + char buf[64]; + const char *clockstr; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + clockstr = strstrip(buf); + + ret = tracing_set_clock(tr, clockstr); + if (ret) + return ret; + *fpos += cnt; return cnt; @@ -6574,6 +6596,13 @@ __init static int tracer_alloc_buffers(void) trace_init_cmdlines(); + if (trace_boot_clock) { + ret = tracing_set_clock(&global_trace, trace_boot_clock); + if (ret < 0) + pr_warning("Trace clock %s not defined, going back to default\n", + trace_boot_clock); + } + /* * register_tracer() might reference current_trace, so it * needs to be set before we register anything. This is -- cgit From 1fcc155351f183e5044180eeb372a8ff47710855 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 19 Feb 2014 15:12:18 -0500 Subject: ftrace: Have static function trace clear ENABLED flag on unregister The ENABLED flag needs to be cleared when a ftrace_ops is unregistered otherwise it wont be able to be registered again. This is only for static tracing and does not affect DYNAMIC_FTRACE at all. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index dcee546f21bc..5313c1100d30 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4463,7 +4463,13 @@ static inline void ftrace_startup_enable(int command) { } (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ ___ret; \ }) -# define ftrace_shutdown(ops, command) __unregister_ftrace_function(ops) +# define ftrace_shutdown(ops, command) \ + ({ \ + int ___ret = __unregister_ftrace_function(ops); \ + if (!___ret) \ + (ops)->flags &= ~FTRACE_OPS_FL_ENABLED; \ + ___ret; \ + }) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) -- cgit From c932c6b7c913a5661e04059045fa1eac762c82fa Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 21 Feb 2014 10:43:12 -0500 Subject: ftrace/x86: Run a sync after fixup on failure If a failure occurs while enabling a trace, it bails out and will remove the tracepoints to be back to what the code originally was. But the fix up had some bugs in it. By injecting a failure in the code, the fix up ran to completion, but shortly afterward the system rebooted. There was two bugs here. The first was that there was no final sync run across the CPUs after the fix up was done, and before the ftrace int3 handler flag was reset. That means that other CPUs could still see the breakpoint and trigger on it long after the flag was cleared, and the int3 handler would think it was a spurious interrupt. Worse yet, the int3 handler could hit other breakpoints because the ftrace int3 handler flag would have prevented the int3 handler from going further. Here's a description of the issue: CPU0 CPU1 ---- ---- remove_breakpoint(); modifying_ftrace_code = 0; [still sees breakpoint] [sees modifying_ftrace_code as zero] [no breakpoint handler] [goto failed case] [trap exception - kernel breakpoint, no handler] BUG() The second bug was that the removal of the breakpoints required the "within()" logic updates instead of accessing the ip address directly. As the kernel text is mapped read-only when CONFIG_DEBUG_RODATA is set, and the removal of the breakpoint is a modification of the kernel text. The ftrace_write() includes the "within()" logic, where as, the probe_kernel_write() does not. This prevented the breakpoint from being removed at all. Link: http://lkml.kernel.org/r/1392650573-3390-1-git-send-email-pmladek@suse.cz Reported-by: Petr Mladek Tested-by: Petr Mladek Acked-by: H. Peter Anvin Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index e6253195a301..6b566c82d82c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -455,7 +455,7 @@ static int remove_breakpoint(struct dyn_ftrace *rec) } update: - return probe_kernel_write((void *)ip, &nop[0], 1); + return ftrace_write(ip, nop, 1); } static int add_update_code(unsigned long ip, unsigned const char *new) @@ -634,6 +634,7 @@ void ftrace_replace_code(int enable) rec = ftrace_rec_iter_record(iter); remove_breakpoint(rec); } + run_sync(); } static int @@ -664,7 +665,7 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code, return ret; fail_update: - probe_kernel_write((void *)ip, &old_code[0], 1); + ftrace_write(ip, old_code, 1); goto out; } -- cgit From 12729f14d8357fb845d75155228b21e76360272d Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 24 Feb 2014 17:12:20 +0100 Subject: ftrace/x86: One more missing sync after fixup of function modification failure If a failure occurs while modifying ftrace function, it bails out and will remove the tracepoints to be back to what the code originally was. There is missing the final sync run across the CPUs after the fix up is done and before the ftrace int3 handler flag is reset. Here's the description of the problem: CPU0 CPU1 ---- ---- remove_breakpoint(); modifying_ftrace_code = 0; [still sees breakpoint] [sees modifying_ftrace_code as zero] [no breakpoint handler] [goto failed case] [trap exception - kernel breakpoint, no handler] BUG() Link: http://lkml.kernel.org/r/1393258342-29978-2-git-send-email-pmladek@suse.cz Fixes: 8a4d0a687a5 "ftrace: Use breakpoint method to update ftrace caller" Acked-by: Frederic Weisbecker Acked-by: H. Peter Anvin Signed-off-by: Petr Mladek Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 6b566c82d82c..69885e2f2095 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -660,8 +660,8 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code, ret = -EPERM; goto out; } - run_sync(); out: + run_sync(); return ret; fail_update: -- cgit From 7dec935a3aa04412cba2cebe1524ae0d34a30c24 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 26 Feb 2014 10:54:36 -0500 Subject: tracepoint: Do not waste memory on mods with no tracepoints No reason to allocate tp_module structures for modules that have no tracepoints. This just wastes memory. Fixes: b75ef8b44b1c "Tracepoint: Dissociate from module mutex" Acked-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 29f26540e9c9..0d4ef26574ff 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -636,6 +636,9 @@ static int tracepoint_module_coming(struct module *mod) struct tp_module *tp_mod, *iter; int ret = 0; + if (!mod->num_tracepoints) + return 0; + /* * We skip modules that taint the kernel, especially those with different * module headers (for forced load), to make sure we don't cause a crash. @@ -679,6 +682,9 @@ static int tracepoint_module_going(struct module *mod) { struct tp_module *pos; + if (!mod->num_tracepoints) + return 0; + mutex_lock(&tracepoints_mutex); tracepoint_update_probe_range(mod->tracepoints_ptrs, mod->tracepoints_ptrs + mod->num_tracepoints); -- cgit From 92550405c493a3c2fa14bf37d1d60cd6c7d0f585 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 25 Feb 2014 21:33:59 -0500 Subject: ftrace/x86: Have ftrace_write() return -EPERM and clean up callers Having ftrace_write() return -EPERM on failure, as that's what the callers return, then we can clean up the code a bit. That is, instead of: if (ftrace_write(...)) return -EPERM; return 0; or if (ftrace_write(...)) { ret = -EPERM; goto_out; } We can instead have: return ftrace_write(...); or ret = ftrace_write(...); if (ret) goto out; Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 69885e2f2095..8cabf638cb63 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -308,7 +308,10 @@ static int ftrace_write(unsigned long ip, const char *val, int size) if (within(ip, (unsigned long)_text, (unsigned long)_etext)) ip = (unsigned long)__va(__pa_symbol(ip)); - return probe_kernel_write((void *)ip, val, size); + if (probe_kernel_write((void *)ip, val, size)) + return -EPERM; + + return 0; } static int add_break(unsigned long ip, const char *old) @@ -323,10 +326,7 @@ static int add_break(unsigned long ip, const char *old) if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) return -EINVAL; - if (ftrace_write(ip, &brk, 1)) - return -EPERM; - - return 0; + return ftrace_write(ip, &brk, 1); } static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) @@ -463,9 +463,7 @@ static int add_update_code(unsigned long ip, unsigned const char *new) /* skip breakpoint */ ip++; new++; - if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1)) - return -EPERM; - return 0; + return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1); } static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) @@ -520,10 +518,7 @@ static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) new = ftrace_call_replace(ip, addr); - if (ftrace_write(ip, new, 1)) - return -EPERM; - - return 0; + return ftrace_write(ip, new, 1); } static int finish_update_nop(struct dyn_ftrace *rec) @@ -533,9 +528,7 @@ static int finish_update_nop(struct dyn_ftrace *rec) new = ftrace_nop_replace(); - if (ftrace_write(ip, new, 1)) - return -EPERM; - return 0; + return ftrace_write(ip, new, 1); } static int finish_update(struct dyn_ftrace *rec, int enable) @@ -656,10 +649,6 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code, run_sync(); ret = ftrace_write(ip, new_code, 1); - if (ret) { - ret = -EPERM; - goto out; - } out: run_sync(); return ret; -- cgit From 1d6bae966e90134bcfd7807b8f9488d55198de91 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 9 Aug 2012 19:16:14 -0400 Subject: tracing: Move raw output code from macro to standalone function The code for trace events to format the raw recorded event data into human readable format in the 'trace' file is repeated for every event in the system. When you have over 500 events, this can add up quite a bit. By making helper functions in the core kernel to do the work instead, we can shrink the size of the kernel down a bit. With a kernel configured with 502 events, the change in size was: text data bss dec hex filename 12991007 1913568 9785344 24689919 178bcff /tmp/vmlinux.orig 12990946 1913568 9785344 24689858 178bcc2 /tmp/vmlinux.patched Note, this version does not save as much as the version of this patch I had a few years ago. That is because in the mean time, commit f71130de5c7f ("tracing: Add a helper function for event print functions") did a lot of the work my original patch did. But this change helps slightly, and is part of a larger clean up to reduce the size much further. Link: http://lkml.kernel.org/r/20120810034707.378538034@goodmis.org Cc: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 5 +++++ include/trace/ftrace.h | 10 +--------- kernel/trace/trace_output.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4e4cc28623ad..a91ab93d7250 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -163,6 +163,8 @@ void trace_current_buffer_discard_commit(struct ring_buffer *buffer, void tracing_record_cmdline(struct task_struct *tsk); +int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); + struct event_filter; enum trace_reg { @@ -197,6 +199,9 @@ struct ftrace_event_class { extern int ftrace_event_reg(struct ftrace_event_call *event, enum trace_reg type, void *data); +int ftrace_output_event(struct trace_iterator *iter, struct ftrace_event_call *event, + char *fmt, ...); + enum { TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1a8b28db3775..3873d6e4697f 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -265,11 +265,9 @@ static notrace enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \ struct trace_event *event) \ { \ - struct trace_seq *s = &iter->seq; \ struct ftrace_raw_##template *field; \ struct trace_entry *entry; \ struct trace_seq *p = &iter->tmp_seq; \ - int ret; \ \ entry = iter->ent; \ \ @@ -281,13 +279,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \ field = (typeof(field))entry; \ \ trace_seq_init(p); \ - ret = trace_seq_printf(s, "%s: ", #call); \ - if (ret) \ - ret = trace_seq_printf(s, print); \ - if (!ret) \ - return TRACE_TYPE_PARTIAL_LINE; \ - \ - return TRACE_TYPE_HANDLED; \ + return ftrace_output_call(iter, #call, print); \ } \ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ .trace = ftrace_raw_output_##call, \ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index ed32284fbe32..ca0e79e2abaa 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -439,6 +439,37 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, } EXPORT_SYMBOL(ftrace_raw_output_prep); +static int ftrace_output_raw(struct trace_iterator *iter, char *name, + char *fmt, va_list ap) +{ + struct trace_seq *s = &iter->seq; + int ret; + + ret = trace_seq_printf(s, "%s: ", name); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_vprintf(s, fmt, ap); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = ftrace_output_raw(iter, name, fmt, ap); + va_end(ap); + + return ret; +} +EXPORT_SYMBOL_GPL(ftrace_output_call); + #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) { -- cgit From 35bb4399bd0ef16b8a57fccea0047d98b6b0e7fb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 9 Aug 2012 22:26:46 -0400 Subject: tracing: Move event storage for array from macro to standalone function The code that shows array fields for events is defined for all events. This can add up quite a bit when you have over 500 events. By making helper functions in the core kernel to do the work instead, we can shrink the size of the kernel down a bit. With a kernel configured with 502 events, the change in size was: text data bss dec hex filename 12990946 1913568 9785344 24689858 178bcc2 /tmp/vmlinux 12987390 1913504 9785344 24686238 178ae9e /tmp/vmlinux.patched That's a total of 3556 bytes, which comes down to 7 bytes per event. Although it's not much, this code is just called at initialization of the events. Link: http://lkml.kernel.org/r/20120810034708.084036335@goodmis.org Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 8 ++++---- include/trace/ftrace.h | 12 ++++-------- kernel/trace/trace_events.c | 6 ------ kernel/trace/trace_export.c | 12 ++++-------- kernel/trace/trace_output.c | 21 +++++++++++++++++++++ 5 files changed, 33 insertions(+), 26 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index a91ab93d7250..ffe642eb84fa 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -202,6 +202,10 @@ extern int ftrace_event_reg(struct ftrace_event_call *event, int ftrace_output_event(struct trace_iterator *iter, struct ftrace_event_call *event, char *fmt, ...); +int ftrace_event_define_field(struct ftrace_event_call *call, + char *type, int len, char *item, int offset, + int field_size, int sign, int filter); + enum { TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, @@ -500,10 +504,6 @@ enum { FILTER_TRACE_FN, }; -#define EVENT_STORAGE_SIZE 128 -extern struct mutex event_storage_mutex; -extern char event_storage[EVENT_STORAGE_SIZE]; - extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 3873d6e4697f..54928faf2119 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -302,15 +302,11 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ #undef __array #define __array(type, item, len) \ do { \ - mutex_lock(&event_storage_mutex); \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - snprintf(event_storage, sizeof(event_storage), \ - "%s[%d]", #type, len); \ - ret = trace_define_field(event_call, event_storage, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), FILTER_OTHER); \ - mutex_unlock(&event_storage_mutex); \ + ret = ftrace_event_define_field(event_call, #type, len, \ + #item, offsetof(typeof(field), item), \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; \ } while (0); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e71ffd4eccb5..22826c73a9da 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,12 +27,6 @@ DEFINE_MUTEX(event_mutex); -DEFINE_MUTEX(event_storage_mutex); -EXPORT_SYMBOL_GPL(event_storage_mutex); - -char event_storage[EVENT_STORAGE_SIZE]; -EXPORT_SYMBOL_GPL(event_storage); - LIST_HEAD(ftrace_events); static LIST_HEAD(ftrace_common_fields); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 7c3e3e72e2b6..39c746c5ae73 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -96,14 +96,10 @@ static void __always_unused ____ftrace_check_##name(void) \ #define __array(type, item, len) \ do { \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - mutex_lock(&event_storage_mutex); \ - snprintf(event_storage, sizeof(event_storage), \ - "%s[%d]", #type, len); \ - ret = trace_define_field(event_call, event_storage, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), filter_type); \ - mutex_unlock(&event_storage_mutex); \ + ret = ftrace_event_define_field(event_call, #type, len, \ + #item, offsetof(typeof(field), item), \ + sizeof(field.item), \ + is_signed_type(type), filter_type); \ if (ret) \ return ret; \ } while (0); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index ca0e79e2abaa..ee8d74840b88 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -20,6 +20,10 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; static int next_event_type = __TRACE_LAST_TYPE + 1; +#define EVENT_STORAGE_SIZE 128 +static DEFINE_MUTEX(event_storage_mutex); +static char event_storage[EVENT_STORAGE_SIZE]; + int trace_print_seq(struct seq_file *m, struct trace_seq *s) { int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; @@ -470,6 +474,23 @@ int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) } EXPORT_SYMBOL_GPL(ftrace_output_call); +int ftrace_event_define_field(struct ftrace_event_call *call, + char *type, int len, char *item, int offset, + int field_size, int sign, int filter) +{ + int ret; + + mutex_lock(&event_storage_mutex); + snprintf(event_storage, sizeof(event_storage), + "%s[%d]", type, len); + ret = trace_define_field(call, event_storage, item, offset, + field_size, sign, filter); + mutex_unlock(&event_storage_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(ftrace_event_define_field); + #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) { -- cgit From 3fd40d1ee6a317523172ab95b6f7ea41ba8fcee3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 9 Aug 2012 22:42:57 -0400 Subject: tracing: Use helper functions in event assignment to shrink macro size The functions that assign the contents for the ftrace events are defined by the TRACE_EVENT() macros. Each event has its own unique way to assign data to its buffer. When you have over 500 events, that means there's 500 functions assigning data uniquely for each event (not really that many, as DECLARE_EVENT_CLASS() and multiple DEFINE_EVENT()s will only need a single function). By making helper functions in the core kernel to do some of the work instead, we can shrink the size of the kernel down a bit. With a kernel configured with 502 events, the change in size was: text data bss dec hex filename 12987390 1913504 9785344 24686238 178ae9e /tmp/vmlinux 12959102 1913504 9785344 24657950 178401e /tmp/vmlinux.patched That's a total of 28288 bytes, which comes down to 56 bytes per event. Link: http://lkml.kernel.org/r/20120810034708.370808175@goodmis.org Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 15 +++++++++++++++ include/trace/ftrace.h | 22 ++++++---------------- kernel/trace/trace_events.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 16 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ffe642eb84fa..9d3fe0658398 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -206,6 +206,21 @@ int ftrace_event_define_field(struct ftrace_event_call *call, char *type, int len, char *item, int offset, int field_size, int sign, int filter); +struct ftrace_event_buffer { + struct ring_buffer *buffer; + struct ring_buffer_event *event; + struct ftrace_event_file *ftrace_file; + void *entry; + unsigned long flags; + int pc; +}; + +void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, + struct ftrace_event_file *ftrace_file, + unsigned long len); + +void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer); + enum { TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 54928faf2119..1cc2265caa52 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -532,37 +532,27 @@ static notrace void \ ftrace_raw_event_##call(void *__data, proto) \ { \ struct ftrace_event_file *ftrace_file = __data; \ - struct ftrace_event_call *event_call = ftrace_file->event_call; \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - struct ring_buffer_event *event; \ + struct ftrace_event_buffer fbuffer; \ struct ftrace_raw_##call *entry; \ - struct ring_buffer *buffer; \ - unsigned long irq_flags; \ int __data_size; \ - int pc; \ \ if (ftrace_trigger_soft_disabled(ftrace_file)) \ return; \ \ - local_save_flags(irq_flags); \ - pc = preempt_count(); \ - \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ - event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, \ - event_call->event.type, \ - sizeof(*entry) + __data_size, \ - irq_flags, pc); \ - if (!event) \ + entry = ftrace_event_buffer_reserve(&fbuffer, ftrace_file, \ + sizeof(*entry) + __data_size); \ + \ + if (!entry) \ return; \ - entry = ring_buffer_event_data(event); \ \ tstruct \ \ { assign; } \ \ - event_trigger_unlock_commit(ftrace_file, buffer, event, entry, \ - irq_flags, pc); \ + ftrace_event_buffer_commit(&fbuffer); \ } /* * The ftrace_test_probe is compiled out, it is only here as a build time check diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 22826c73a9da..b8f73b333a3c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -188,6 +188,36 @@ int trace_event_raw_init(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_event_raw_init); +void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, + struct ftrace_event_file *ftrace_file, + unsigned long len) +{ + struct ftrace_event_call *event_call = ftrace_file->event_call; + + local_save_flags(fbuffer->flags); + fbuffer->pc = preempt_count(); + fbuffer->ftrace_file = ftrace_file; + + fbuffer->event = + trace_event_buffer_lock_reserve(&fbuffer->buffer, ftrace_file, + event_call->event.type, len, + fbuffer->flags, fbuffer->pc); + if (!fbuffer->event) + return NULL; + + fbuffer->entry = ring_buffer_event_data(fbuffer->event); + return fbuffer->entry; +} +EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve); + +void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer) +{ + event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer, + fbuffer->event, fbuffer->entry, + fbuffer->flags, fbuffer->pc); +} +EXPORT_SYMBOL_GPL(ftrace_event_buffer_commit); + int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type, void *data) { -- cgit From b196e2b9e262be01737dc2bbf9e3c7c87340fa4d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 13 Feb 2014 15:45:07 -0500 Subject: tracing: Warn if a tracepoint is not set via debugfs Tracepoints were made to allow enabling a tracepoint in a module before that module was loaded. When a tracepoint is enabled and it does not exist, the name is stored and will be enabled when the tracepoint is created. The problem with this approach is that when a tracepoint is enabled when it expects to be there, it gives no warning that it does not exist. To add salt to the wound, if a module is added and sets the FORCED flag, which can happen if it isn't signed properly, the tracepoint code will not enabled the tracepoints, but they will be created in the debugfs system! When a user goes to enable the tracepoint, the tracepoint code will not see it existing and will think it is to be enabled later AND WILL NOT GIVE A WARNING. The tracing will look like it succeeded but will actually be doing nothing. This will cause lots of confusion and headaches for developers trying to figure out why they are not seeing their tracepoints. Link: http://lkml.kernel.org/r/20140213154507.4040fb06@gandalf.local.home Reported-by: Mathieu Desnoyers Reported-by: Johannes Berg Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 0d4ef26574ff..0058f33d05c1 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -62,6 +62,7 @@ struct tracepoint_entry { struct hlist_node hlist; struct tracepoint_func *funcs; int refcount; /* Number of times armed. 0 if disarmed. */ + int enabled; /* Tracepoint enabled */ char name[0]; }; @@ -237,6 +238,7 @@ static struct tracepoint_entry *add_tracepoint(const char *name) memcpy(&e->name[0], name, name_len); e->funcs = NULL; e->refcount = 0; + e->enabled = 0; hlist_add_head(&e->hlist, head); return e; } @@ -316,6 +318,7 @@ static void tracepoint_update_probe_range(struct tracepoint * const *begin, if (mark_entry) { set_tracepoint(&mark_entry, *iter, !!mark_entry->refcount); + mark_entry->enabled = !!mark_entry->refcount; } else { disable_tracepoint(*iter); } @@ -380,6 +383,8 @@ tracepoint_add_probe(const char *name, void *probe, void *data) int tracepoint_probe_register(const char *name, void *probe, void *data) { struct tracepoint_func *old; + struct tracepoint_entry *entry; + int ret = 0; mutex_lock(&tracepoints_mutex); old = tracepoint_add_probe(name, probe, data); @@ -388,9 +393,13 @@ int tracepoint_probe_register(const char *name, void *probe, void *data) return PTR_ERR(old); } tracepoint_update_probes(); /* may update entry */ + entry = get_tracepoint(name); + /* Make sure the entry was enabled */ + if (!entry || !entry->enabled) + ret = -ENODEV; mutex_unlock(&tracepoints_mutex); release_probes(old); - return 0; + return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_register); -- cgit From f479447ad94879299b08b3f0b40f481baaff7eb7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 26 Feb 2014 12:52:13 -0500 Subject: tracing: Fix event header writeback.h to include tracepoint.h The trace event headers are required to include tracepoint.h. The only reason they worked now is because module.h included tracepoint.h, and that will soon change. Link: http://lkml.kernel.org/r/20140226190644.442886305@goodmis.org Fixes: 455b2864686d "writeback: Initial tracing support" Cc: Dave Chinner Cc: Jens Axboe Signed-off-by: Steven Rostedt --- include/trace/events/writeback.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index c7bbbe794e65..309a086e2a0b 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -4,6 +4,7 @@ #if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_WRITEBACK_H +#include #include #include -- cgit From 8643e92b5266a34cb9c66388983fc15ac37171d5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 26 Feb 2014 13:03:32 -0500 Subject: tracing: Fix event header migrate.h to include tracepoint.h The trace event headers are required to include tracepoint.h. The only reason they worked now is because module.h included tracepoint.h, and that will soon change. Link: http://lkml.kernel.org/r/20140226190644.591040764@goodmis.org Fixes: 7b2a2d4a18ff "mm: migrate: Add a tracepoint for migrate_pages" Acked-by: Mel Gorman Signed-off-by: Steven Rostedt --- include/trace/events/migrate.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 3075ffbb9a83..4e4f2f8b1ac2 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -4,6 +4,8 @@ #if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MIGRATE_H +#include + #define MIGRATE_MODE \ {MIGRATE_ASYNC, "MIGRATE_ASYNC"}, \ {MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT"}, \ -- cgit From b2e285fcb46ab4d91ebbc3a9bd5900f544972a47 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 26 Feb 2014 13:12:06 -0500 Subject: tracing/module: Replace include of tracepoint.h with jump_label.h in module.h There's nothing in the module.h header that requires tracepoint.h to be included, and there may be cases that tracepoint.h may need to include module.h, which will cause recursive header issues. But module.h requires seeing HAVE_JUMP_LABEL which is set in jump_label.h which it just coincidentally gets from tracepoint.h. Link: http://lkml.kernel.org/r/20140307084712.5c68641a@gandalf.local.home Acked-by: Rusty Russell Signed-off-by: Steven Rostedt --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/module.h b/include/linux/module.h index eaf60ff9ba94..5a5053975114 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include -- cgit From 5859fa1a146ef5bf79953767f4ceb546fe4214b3 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Fri, 28 Feb 2014 21:32:16 -0800 Subject: tracing: Correctly expand len expressions from __dynamic_array macro This fixes expansion of the len argument in __dynamic_array macros. The previous code from commit 7d536cb3f would not fully evaluate the expression before multiplying its result by the size of the type. This went unnoticed because the length stored in the high 16 bits of the offset (which is the one that was broken here) is only used by filter_pred_strloc which only acts on strings for which the size of the type is 1. Link: http://lkml.kernel.org/r/1393651938-16418-2-git-send-email-filbranden@google.com Signed-off-by: Filipe Brandenburger Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1cc2265caa52..21250056b7da 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -363,7 +363,7 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #define __dynamic_array(type, item, len) \ __data_offsets->item = __data_size + \ offsetof(typeof(*entry), __data); \ - __data_offsets->item |= (len * sizeof(type)) << 16; \ + __data_offsets->item |= ((len) * sizeof(type)) << 16; \ __data_size += (len) * sizeof(type); #undef __string -- cgit From 114e7b52dee69ce47dd55b8e520e0a48ba7cdae3 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Fri, 28 Feb 2014 21:32:17 -0800 Subject: tracing: Evaluate len expression only once in __dynamic_array macro Use a temporary variable to store the expansion of the len expression. If the evaluation is expensive, this commit will ensure it is evaluated only once inside ftrace_get_offsets_. Link: http://lkml.kernel.org/r/1393651938-16418-3-git-send-email-filbranden@google.com Signed-off-by: Filipe Brandenburger Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 21250056b7da..e15ae4010d51 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -361,10 +361,11 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __dynamic_array #define __dynamic_array(type, item, len) \ + __item_length = (len) * sizeof(type); \ __data_offsets->item = __data_size + \ offsetof(typeof(*entry), __data); \ - __data_offsets->item |= ((len) * sizeof(type)) << 16; \ - __data_size += (len) * sizeof(type); + __data_offsets->item |= __item_length << 16; \ + __data_size += __item_length; #undef __string #define __string(item, src) __dynamic_array(char, item, \ @@ -376,6 +377,7 @@ static inline notrace int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ int __data_size = 0; \ + int __maybe_unused __item_length; \ struct ftrace_raw_##call __maybe_unused *entry; \ \ tstruct; \ -- cgit From 1dc43cf0be9a94a6a7273db284152db15c526106 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 24 Feb 2014 19:59:56 +0100 Subject: ftrace: Cleanup of global variables ftrace_new_pgs and ftrace_update_cnt Some of them can be local to functions, so make them local and pass them as parameters where needed: * __start_mcount_loc+__stop_mcount_loc are local to ftrace_init * ftrace_new_pgs -> new_pgs/start_pg * ftrace_update_cnt -> local update_cnt in ftrace_update_code Link: http://lkml.kernel.org/r/1393268401-24379-1-git-send-email-jslaby@suse.cz Signed-off-by: Jiri Slaby Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5313c1100d30..3f95bbeb8e8d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1174,8 +1174,6 @@ struct ftrace_page { int size; }; -static struct ftrace_page *ftrace_new_pgs; - #define ENTRY_SIZE sizeof(struct dyn_ftrace) #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) @@ -2246,7 +2244,6 @@ static void ftrace_shutdown_sysctl(void) } static cycle_t ftrace_update_time; -static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; static inline int ops_traces_mod(struct ftrace_ops *ops) @@ -2302,11 +2299,12 @@ static int referenced_filters(struct dyn_ftrace *rec) return cnt; } -static int ftrace_update_code(struct module *mod) +static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { struct ftrace_page *pg; struct dyn_ftrace *p; cycle_t start, stop; + unsigned long update_cnt = 0; unsigned long ref = 0; bool test = false; int i; @@ -2332,9 +2330,8 @@ static int ftrace_update_code(struct module *mod) } start = ftrace_now(raw_smp_processor_id()); - ftrace_update_cnt = 0; - for (pg = ftrace_new_pgs; pg; pg = pg->next) { + for (pg = new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { int cnt = ref; @@ -2355,7 +2352,7 @@ static int ftrace_update_code(struct module *mod) if (!ftrace_code_disable(mod, p)) break; - ftrace_update_cnt++; + update_cnt++; /* * If the tracing is enabled, go ahead and enable the record. @@ -2374,11 +2371,9 @@ static int ftrace_update_code(struct module *mod) } } - ftrace_new_pgs = NULL; - stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; - ftrace_update_tot_cnt += ftrace_update_cnt; + ftrace_update_tot_cnt += update_cnt; return 0; } @@ -4270,9 +4265,6 @@ static int ftrace_process_locs(struct module *mod, /* Assign the last page to ftrace_pages */ ftrace_pages = pg; - /* These new locations need to be initialized */ - ftrace_new_pgs = start_pg; - /* * We only need to disable interrupts on start up * because we are modifying code that an interrupt @@ -4283,7 +4275,7 @@ static int ftrace_process_locs(struct module *mod, */ if (!mod) local_irq_save(flags); - ftrace_update_code(mod); + ftrace_update_code(mod, start_pg); if (!mod) local_irq_restore(flags); ret = 0; @@ -4392,11 +4384,10 @@ struct notifier_block ftrace_module_exit_nb = { .priority = INT_MIN, /* Run after anything that can remove kprobes */ }; -extern unsigned long __start_mcount_loc[]; -extern unsigned long __stop_mcount_loc[]; - void __init ftrace_init(void) { + extern unsigned long __start_mcount_loc[]; + extern unsigned long __stop_mcount_loc[]; unsigned long count, addr, flags; int ret; -- cgit From c867ccd8388d1c1a31bef9c54544b2ef32f0ebca Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 24 Feb 2014 19:59:57 +0100 Subject: ftrace: Inline the code from ftrace_dyn_table_alloc() The function used to do allocations some time ago. This no longer happens and it only checks the count and prints some info. This patch inlines the body to the only caller. There are two reasons: * the name of the function was misleading * it's clear what is going on in ftrace_init now Link: http://lkml.kernel.org/r/1393268401-24379-2-git-send-email-jslaby@suse.cz Signed-off-by: Jiri Slaby Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3f95bbeb8e8d..76b6ed29d856 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2465,22 +2465,6 @@ ftrace_allocate_pages(unsigned long num_to_init) return NULL; } -static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) -{ - int cnt; - - if (!num_to_init) { - pr_info("ftrace: No functions to be traced?\n"); - return -1; - } - - cnt = num_to_init / ENTRIES_PER_PAGE; - pr_info("ftrace: allocating %ld entries in %d pages\n", - num_to_init, cnt + 1); - - return 0; -} - #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ struct ftrace_iterator { @@ -4403,10 +4387,13 @@ void __init ftrace_init(void) goto failed; count = __stop_mcount_loc - __start_mcount_loc; - - ret = ftrace_dyn_table_alloc(count); - if (ret) + if (!count) { + pr_info("ftrace: No functions to be traced?\n"); goto failed; + } + + pr_info("ftrace: allocating %ld entries in %ld pages\n", + count, count / ENTRIES_PER_PAGE + 1); last_ftrace_enabled = ftrace_enabled = 1; -- cgit From af64a7cb09db77344c596a0bf3d57d77257e8bf5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 24 Feb 2014 19:59:58 +0100 Subject: ftrace: Pass retval through return in ftrace_dyn_arch_init() No architecture uses the "data" parameter in ftrace_dyn_arch_init() in any way, it just sets the value to 0. And this is used as a return value in the caller -- ftrace_init, which just checks the retval against zero. Note there is also "return 0" in every ftrace_dyn_arch_init. So it is enough to check the retval and remove all the indirect sets of data on all archs. Link: http://lkml.kernel.org/r/1393268401-24379-3-git-send-email-jslaby@suse.cz Cc: linux-arch@vger.kernel.org Signed-off-by: Jiri Slaby Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace-design.txt | 3 --- arch/arm/kernel/ftrace.c | 2 -- arch/blackfin/kernel/ftrace.c | 3 --- arch/ia64/kernel/ftrace.c | 2 -- arch/metag/kernel/ftrace.c | 3 --- arch/microblaze/kernel/ftrace.c | 3 --- arch/mips/kernel/ftrace.c | 3 --- arch/powerpc/kernel/ftrace.c | 5 ----- arch/s390/kernel/ftrace.c | 1 - arch/sh/kernel/ftrace.c | 3 --- arch/sparc/kernel/ftrace.c | 4 ---- arch/tile/kernel/ftrace.c | 2 -- arch/x86/kernel/ftrace.c | 3 --- kernel/trace/ftrace.c | 6 ++---- 14 files changed, 2 insertions(+), 41 deletions(-) diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 79fcafc7fd64..117168884023 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -360,9 +360,6 @@ function below should be sufficient for most people: int __init ftrace_dyn_arch_init(void *data) { - /* return value is done indirectly via data */ - *(unsigned long *)data = 0; - return 0; } diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 34e56647dcee..5cd0d05edf35 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -158,8 +158,6 @@ int ftrace_make_nop(struct module *mod, int __init ftrace_dyn_arch_init(void *data) { - *(unsigned long *)data = 0; - return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/blackfin/kernel/ftrace.c b/arch/blackfin/kernel/ftrace.c index 9277905b82cf..f74c5ae6a25b 100644 --- a/arch/blackfin/kernel/ftrace.c +++ b/arch/blackfin/kernel/ftrace.c @@ -67,9 +67,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int __init ftrace_dyn_arch_init(void *data) { - /* return value is done indirectly via data */ - *(unsigned long *)data = 0; - return 0; } diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index 7fc8c961b1f7..cfaa93a8bbdf 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -200,7 +200,5 @@ int ftrace_update_ftrace_func(ftrace_func_t func) /* run from kstop_machine */ int __init ftrace_dyn_arch_init(void *data) { - *(unsigned long *)data = 0; - return 0; } diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c index a774f321643f..bf593932b353 100644 --- a/arch/metag/kernel/ftrace.c +++ b/arch/metag/kernel/ftrace.c @@ -119,8 +119,5 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* run from kstop_machine */ int __init ftrace_dyn_arch_init(void *data) { - /* The return code is returned via data */ - writel(0, data); - return 0; } diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index e8a5e9cf4ed1..ffa595c7fec2 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c @@ -173,9 +173,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int __init ftrace_dyn_arch_init(void *data) { - /* The return code is retured via data */ - *(unsigned long *)data = 0; - return 0; } diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 185ba258361b..013016bec9e1 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -206,9 +206,6 @@ int __init ftrace_dyn_arch_init(void *data) /* Remove "b ftrace_stub" to ensure ftrace_caller() is executed */ ftrace_modify_code(MCOUNT_ADDR, INSN_NOP); - /* The return code is retured via data */ - *(unsigned long *)data = 0; - return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 9b27b293a922..d059664cdf16 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -533,11 +533,6 @@ void arch_ftrace_update_code(int command) int __init ftrace_dyn_arch_init(void *data) { - /* caller expects data to be zero */ - unsigned long *p = data; - - *p = 0; - return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 224db03e9518..77b2f3a1f50a 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -132,7 +132,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int __init ftrace_dyn_arch_init(void *data) { - *(unsigned long *) data = 0; return 0; } diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 30e13196d35b..493997541d2c 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -274,9 +274,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int __init ftrace_dyn_arch_init(void *data) { - /* The return code is retured via data */ - __raw_writel(0, (unsigned long)data); - return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 03ab022e51c5..ee813b82da49 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -84,10 +84,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int __init ftrace_dyn_arch_init(void *data) { - unsigned long *p = data; - - *p = 0; - return 0; } #endif diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c index f1c452092eeb..34d9ea0bca9f 100644 --- a/arch/tile/kernel/ftrace.c +++ b/arch/tile/kernel/ftrace.c @@ -169,8 +169,6 @@ int ftrace_make_nop(struct module *mod, int __init ftrace_dyn_arch_init(void *data) { - *(unsigned long *)data = 0; - return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8cabf638cb63..bbe5a5b88aad 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -670,9 +670,6 @@ void arch_ftrace_update_code(int command) int __init ftrace_dyn_arch_init(void *data) { - /* The return code is retured via data */ - *(unsigned long *)data = 0; - return 0; } #endif diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 76b6ed29d856..083c6d5fce25 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4379,11 +4379,9 @@ void __init ftrace_init(void) addr = (unsigned long)ftrace_stub; local_irq_save(flags); - ftrace_dyn_arch_init(&addr); + ret = ftrace_dyn_arch_init(&addr); local_irq_restore(flags); - - /* ftrace_dyn_arch_init places the return code in addr */ - if (addr) + if (ret) goto failed; count = __stop_mcount_loc - __start_mcount_loc; -- cgit From 3a36cb11ca65cd6804972eaf1000378ba4384ea7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 24 Feb 2014 19:59:59 +0100 Subject: ftrace: Do not pass data to ftrace_dyn_arch_init As the data parameter is not really used by any ftrace_dyn_arch_init, remove that from ftrace_dyn_arch_init. This also removes the addr local variable from ftrace_init which is now unused. Note the documentation was imprecise as it did not suggest to set (*data) to 0. Link: http://lkml.kernel.org/r/1393268401-24379-4-git-send-email-jslaby@suse.cz Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: linux-arch@vger.kernel.org Signed-off-by: Jiri Slaby Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace-design.txt | 2 +- arch/arm/kernel/ftrace.c | 2 +- arch/blackfin/kernel/ftrace.c | 2 +- arch/ia64/kernel/ftrace.c | 2 +- arch/metag/kernel/ftrace.c | 2 +- arch/microblaze/kernel/ftrace.c | 2 +- arch/mips/kernel/ftrace.c | 2 +- arch/powerpc/kernel/ftrace.c | 2 +- arch/s390/kernel/ftrace.c | 2 +- arch/sh/kernel/ftrace.c | 2 +- arch/sparc/kernel/ftrace.c | 2 +- arch/tile/kernel/ftrace.c | 2 +- arch/x86/kernel/ftrace.c | 2 +- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 7 ++----- 15 files changed, 16 insertions(+), 19 deletions(-) diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 117168884023..3f669b9e8852 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -358,7 +358,7 @@ Every arch has an init callback function. If you need to do something early on to initialize some state, this is the time to do that. Otherwise, this simple function below should be sufficient for most people: -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 5cd0d05edf35..c108ddcb9ba4 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -156,7 +156,7 @@ int ftrace_make_nop(struct module *mod, return ret; } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/blackfin/kernel/ftrace.c b/arch/blackfin/kernel/ftrace.c index f74c5ae6a25b..095de0fa044d 100644 --- a/arch/blackfin/kernel/ftrace.c +++ b/arch/blackfin/kernel/ftrace.c @@ -65,7 +65,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(ip, call, sizeof(call)); } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index cfaa93a8bbdf..3b0c2aa07857 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -198,7 +198,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) } /* run from kstop_machine */ -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c index bf593932b353..ed1d685157c2 100644 --- a/arch/metag/kernel/ftrace.c +++ b/arch/metag/kernel/ftrace.c @@ -117,7 +117,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* run from kstop_machine */ -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index ffa595c7fec2..bbcd2533766c 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c @@ -171,7 +171,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ret; } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 013016bec9e1..1ba7afe6ab74 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -198,7 +198,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(FTRACE_CALL_IP, new); } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { /* Encode the instructions when booting */ ftrace_dyn_arch_init_insns(); diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index d059664cdf16..71ce4cbb7e9f 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -531,7 +531,7 @@ void arch_ftrace_update_code(int command) ftrace_disable_ftrace_graph_caller(); } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 77b2f3a1f50a..54d6493c4a56 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -130,7 +130,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return 0; } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 493997541d2c..3c74f53db6db 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -272,7 +272,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ftrace_modify_code(rec->ip, old, new); } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index ee813b82da49..0a2d2ddff543 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -82,7 +82,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(ip, old, new); } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c index 34d9ea0bca9f..8d52d83cc516 100644 --- a/arch/tile/kernel/ftrace.c +++ b/arch/tile/kernel/ftrace.c @@ -167,7 +167,7 @@ int ftrace_make_nop(struct module *mod, return ret; } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bbe5a5b88aad..4b66adf17369 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -668,7 +668,7 @@ void arch_ftrace_update_code(int command) atomic_dec(&modifying_ftrace_code); } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { return 0; } diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e6141be2fad5..1bbb2cd631de 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -423,7 +423,7 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); -extern int ftrace_dyn_arch_init(void *data); +extern int ftrace_dyn_arch_init(void); extern void ftrace_replace_code(int enable); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 083c6d5fce25..5bd70e8b09b0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4372,14 +4372,11 @@ void __init ftrace_init(void) { extern unsigned long __start_mcount_loc[]; extern unsigned long __stop_mcount_loc[]; - unsigned long count, addr, flags; + unsigned long count, flags; int ret; - /* Keep the ftrace pointer to the stub */ - addr = (unsigned long)ftrace_stub; - local_irq_save(flags); - ret = ftrace_dyn_arch_init(&addr); + ret = ftrace_dyn_arch_init(); local_irq_restore(flags); if (ret) goto failed; -- cgit From a762782d780b341b1836489692190d26be624ed7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 24 Feb 2014 20:00:00 +0100 Subject: ftrace: Remove freelist from struct dyn_ftrace The 'freelist' member was introduced to 'struct dyn_ftrace' in commit ee000b7f9fe429d2470c674ccec8d344f6789e0d (tracing: use union for multi-usages field), but the use of this member was later removed in 3208230983a0ee3d95be22d463257e530c684956 (ftrace: Remove usage of "freed" records). Remove also the 'freelist' member now. Link: http://lkml.kernel.org/r/1393268401-24379-5-git-send-email-jslaby@suse.cz Signed-off-by: Jiri Slaby Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1bbb2cd631de..20da0561b868 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -330,12 +330,9 @@ enum { #define FTRACE_REF_MAX ((1UL << 29) - 1) struct dyn_ftrace { - union { - unsigned long ip; /* address of mcount call-site */ - struct dyn_ftrace *freelist; - }; + unsigned long ip; /* address of mcount call-site */ unsigned long flags; - struct dyn_arch_ftrace arch; + struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); -- cgit From cd21067f69240041d36e491ff5597e0217615465 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 24 Feb 2014 17:12:21 +0100 Subject: ftrace: Warn on error when modifying ftrace function We should print some warning and kill ftrace functionality when the ftrace function is not set correctly. Otherwise, ftrace might do crazy things without an explanation. The error value has been ignored so far. Note that an error that happens during updating all the traced calls is handled in ftrace_replace_code(). We print more details about the particular failing address via ftrace_bug() there. Link: http://lkml.kernel.org/r/1393258342-29978-3-git-send-email-pmladek@suse.cz Signed-off-by: Petr Mladek Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5bd70e8b09b0..0e48ff4cefa5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1994,6 +1994,7 @@ int __weak ftrace_arch_code_modify_post_process(void) void ftrace_modify_all_code(int command) { int update = command & FTRACE_UPDATE_TRACE_FUNC; + int err = 0; /* * If the ftrace_caller calls a ftrace_ops func directly, @@ -2005,8 +2006,11 @@ void ftrace_modify_all_code(int command) * to make sure the ops are having the right functions * traced. */ - if (update) - ftrace_update_ftrace_func(ftrace_ops_list_func); + if (update) { + err = ftrace_update_ftrace_func(ftrace_ops_list_func); + if (FTRACE_WARN_ON(err)) + return; + } if (command & FTRACE_UPDATE_CALLS) ftrace_replace_code(1); @@ -2019,13 +2023,16 @@ void ftrace_modify_all_code(int command) /* If irqs are disabled, we are in stop machine */ if (!irqs_disabled()) smp_call_function(ftrace_sync_ipi, NULL, 1); - ftrace_update_ftrace_func(ftrace_trace_function); + err = ftrace_update_ftrace_func(ftrace_trace_function); + if (FTRACE_WARN_ON(err)) + return; } if (command & FTRACE_START_FUNC_RET) - ftrace_enable_ftrace_graph_caller(); + err = ftrace_enable_ftrace_graph_caller(); else if (command & FTRACE_STOP_FUNC_RET) - ftrace_disable_ftrace_graph_caller(); + err = ftrace_disable_ftrace_graph_caller(); + FTRACE_WARN_ON(err); } static int __ftrace_modify_code(void *data) -- cgit From 7f11f5ecf4ae09815dc2de267c5e04d1de01d862 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 24 Feb 2014 17:12:22 +0100 Subject: ftrace/x86: BUG when ftrace recovery fails Ftrace modifies function calls using Int3 breakpoints on x86. The breakpoints are handled only when the patching is in progress. If something goes wrong, there is a recovery code that removes the breakpoints. If this fails, the system might get silently rebooted when a remaining break is not handled or an invalid instruction is proceed. We should BUG() when the breakpoint could not be removed. Otherwise, the system silently crashes when the function finishes the Int3 handler is disabled. Note that we need to modify remove_breakpoint() to return non-zero value only when there is an error. The return value was ignored before, so it does not cause any troubles. Link: http://lkml.kernel.org/r/1393258342-29978-4-git-send-email-pmladek@suse.cz Signed-off-by: Petr Mladek Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4b66adf17369..52819e816f87 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -425,7 +425,7 @@ static int remove_breakpoint(struct dyn_ftrace *rec) /* If this does not have a breakpoint, we are done */ if (ins[0] != brk) - return -1; + return 0; nop = ftrace_nop_replace(); @@ -625,7 +625,12 @@ void ftrace_replace_code(int enable) printk(KERN_WARNING "Failed on %s (%d):\n", report, count); for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); - remove_breakpoint(rec); + /* + * Breakpoints are handled only when this function is in + * progress. The system could not work with them. + */ + if (remove_breakpoint(rec)) + BUG(); } run_sync(); } @@ -649,12 +654,19 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code, run_sync(); ret = ftrace_write(ip, new_code, 1); + /* + * The breakpoint is handled only when this function is in progress. + * The system could not work if we could not remove it. + */ + BUG_ON(ret); out: run_sync(); return ret; fail_update: - ftrace_write(ip, old_code, 1); + /* Also here the system could not work with the breakpoint */ + if (ftrace_write(ip, old_code, 1)) + BUG(); goto out; } -- cgit From db0fbadcbd0c288525ea9f76488b324642a78c7f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 10 Mar 2014 21:42:11 +0100 Subject: ftrace: Fix compilation warning about control_ops_free With CONFIG_DYNAMIC_FTRACE=n, I see a warning: kernel/trace/ftrace.c:240:13: warning: 'control_ops_free' defined but not used static void control_ops_free(struct ftrace_ops *ops) ^ Move that function around to an already existing #ifdef CONFIG_DYNAMIC_FTRACE block as the function is used solely from the dynamic function tracing functions. Link: http://lkml.kernel.org/r/1394484131-5107-1-git-send-email-jslaby@suse.cz Signed-off-by: Jiri Slaby Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0e48ff4cefa5..b4531b228180 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -237,11 +237,6 @@ static int control_ops_alloc(struct ftrace_ops *ops) return 0; } -static void control_ops_free(struct ftrace_ops *ops) -{ - free_percpu(ops->disabled); -} - static void update_global_ops(void) { ftrace_func_t func = ftrace_global_list_func; @@ -2100,6 +2095,11 @@ static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; static int global_start_up; +static void control_ops_free(struct ftrace_ops *ops) +{ + free_percpu(ops->disabled); +} + static void ftrace_startup_enable(int command) { if (saved_ftrace_func != ftrace_trace_function) { -- cgit From 4c11628a16506a8a8e030515f601771df07bba97 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 11 Mar 2014 21:32:28 -0400 Subject: tracepoints: API doc update to data argument Describe the @data argument (probe private data). Link: http://lkml.kernel.org/r/1394587948-27878-1-git-send-email-mathieu.desnoyers@efficios.com Fixes: 38516ab59fbc "tracing: Let tracepoints have data passed to tracepoint callbacks" CC: Ingo Molnar CC: Frederic Weisbecker CC: Andrew Morton Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 0058f33d05c1..a4f629da3011 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -376,6 +376,7 @@ tracepoint_add_probe(const char *name, void *probe, void *data) * tracepoint_probe_register - Connect a probe to a tracepoint * @name: tracepoint name * @probe: probe handler + * @data: probe private data * * Returns 0 if ok, error value on error. * The probe address must at least be aligned on the architecture pointer size. @@ -424,6 +425,7 @@ tracepoint_remove_probe(const char *name, void *probe, void *data) * tracepoint_probe_unregister - Disconnect a probe from a tracepoint * @name: tracepoint name * @probe: probe function pointer + * @data: probe private data * * We do not need to call a synchronize_sched to make sure the probes have * finished running before doing a module unload, because the module unload @@ -464,6 +466,7 @@ static void tracepoint_add_old_probes(void *old) * tracepoint_probe_register_noupdate - register a probe but not connect * @name: tracepoint name * @probe: probe handler + * @data: probe private data * * caller must call tracepoint_probe_update_all() */ @@ -488,6 +491,7 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate); * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect * @name: tracepoint name * @probe: probe function pointer + * @data: probe private data * * caller must call tracepoint_probe_update_all() */ -- cgit From 3bbc8db341773eb6aa5576eaabca4e95170fbe34 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 10 Mar 2014 21:04:58 -0400 Subject: tracepoints: API doc update to tracepoint_probe_register() return value Describe the return values of tracepoint_probe_register(), including -ENODEV added by commit: Author: Steven Rostedt tracing: Warn if a tracepoint is not set via debugfs Link: http://lkml.kernel.org/r/1394499898-1537-2-git-send-email-mathieu.desnoyers@efficios.com CC: Ingo Molnar CC: Frederic Weisbecker CC: Andrew Morton Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index a4f629da3011..e2a58a22b0f4 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -378,7 +378,17 @@ tracepoint_add_probe(const char *name, void *probe, void *data) * @probe: probe handler * @data: probe private data * - * Returns 0 if ok, error value on error. + * Returns: + * - 0 if the probe was successfully registered, and tracepoint + * callsites are currently loaded for that probe, + * - -ENODEV if the probe was successfully registered, but no tracepoint + * callsite is currently loaded for that probe, + * - other negative error value on error. + * + * When tracepoint_probe_register() returns either 0 or -ENODEV, + * parameters @name, @probe, and @data may be used by the tracepoint + * infrastructure until the probe is unregistered. + * * The probe address must at least be aligned on the architecture pointer size. */ int tracepoint_probe_register(const char *name, void *probe, void *data) -- cgit From d88471cb8b17a72b1edf5ab62e1704d78373c066 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 9 Jan 2013 18:09:20 -0500 Subject: ftrace: Constify ftrace_text_reserved Link: http://lkml.kernel.org/r/1357772960-4436-5-git-send-email-sasha.levin@oracle.com Signed-off-by: Sasha Levin Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 4 ++-- kernel/trace/ftrace.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 20da0561b868..9212b017bc72 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -299,7 +299,7 @@ extern void unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); extern void unregister_ftrace_function_probe_all(char *glob); -extern int ftrace_text_reserved(void *start, void *end); +extern int ftrace_text_reserved(const void *start, const void *end); extern int ftrace_nr_registered_ops(void); @@ -552,7 +552,7 @@ static inline __init int unregister_ftrace_command(char *cmd_name) { return -EINVAL; } -static inline int ftrace_text_reserved(void *start, void *end) +static inline int ftrace_text_reserved(const void *start, const void *end) { return 0; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b4531b228180..1fd4b9479210 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1555,7 +1555,7 @@ unsigned long ftrace_location(unsigned long ip) * the function tracer. It checks the ftrace internal tables to * determine if the address belongs or not. */ -int ftrace_text_reserved(void *start, void *end) +int ftrace_text_reserved(const void *start, const void *end) { unsigned long ret; -- cgit From bc4c426ee2431d1f717004d3bbaacbd819b544fd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 21 Mar 2014 08:23:38 -0400 Subject: Revert "tracing: Move event storage for array from macro to standalone function" I originally wrote commit 35bb4399bd0e to shrink the size of the overhead of tracepoints by several kilobytes. Later, I received a patch from Vaibhav Nagarnaik that fixed a bug in the same code that this commit touches. Not only did it fix a bug, it also removed code and shrunk the size of the overhead of trace events even more than this commit did. Since this commit is scheduled for 3.15 and Vaibhav's patch is already in mainline, I need to revert this patch in order to keep it from conflicting with Vaibhav's patch. Not to mention, Vaibhav's patch makes this patch obsolete. Link: http://lkml.kernel.org/r/20140320225637.0226041b@gandalf.local.home Cc: Vaibhav Nagarnaik Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 8 ++++++++ include/trace/ftrace.h | 12 ++++++++---- kernel/trace/trace_events.c | 6 ++++++ kernel/trace/trace_export.c | 12 ++++++++---- kernel/trace/trace_output.c | 21 --------------------- 5 files changed, 30 insertions(+), 29 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 9d3fe0658398..cdc975929d15 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -221,6 +221,10 @@ void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer); +int ftrace_event_define_field(struct ftrace_event_call *call, + char *type, int len, char *item, int offset, + int field_size, int sign, int filter); + enum { TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, @@ -519,6 +523,10 @@ enum { FILTER_TRACE_FN, }; +#define EVENT_STORAGE_SIZE 128 +extern struct mutex event_storage_mutex; +extern char event_storage[EVENT_STORAGE_SIZE]; + extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index e15ae4010d51..d1d91875faa5 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -302,11 +302,15 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ #undef __array #define __array(type, item, len) \ do { \ + mutex_lock(&event_storage_mutex); \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = ftrace_event_define_field(event_call, #type, len, \ - #item, offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), FILTER_OTHER); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ + mutex_unlock(&event_storage_mutex); \ if (ret) \ return ret; \ } while (0); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index b8f73b333a3c..2f7b8e31e3a4 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,6 +27,12 @@ DEFINE_MUTEX(event_mutex); +DEFINE_MUTEX(event_storage_mutex); +EXPORT_SYMBOL_GPL(event_storage_mutex); + +char event_storage[EVENT_STORAGE_SIZE]; +EXPORT_SYMBOL_GPL(event_storage); + LIST_HEAD(ftrace_events); static LIST_HEAD(ftrace_common_fields); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 39c746c5ae73..7c3e3e72e2b6 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -96,10 +96,14 @@ static void __always_unused ____ftrace_check_##name(void) \ #define __array(type, item, len) \ do { \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = ftrace_event_define_field(event_call, #type, len, \ - #item, offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), filter_type); \ + mutex_lock(&event_storage_mutex); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), \ + is_signed_type(type), filter_type); \ + mutex_unlock(&event_storage_mutex); \ if (ret) \ return ret; \ } while (0); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index ee8d74840b88..ca0e79e2abaa 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -20,10 +20,6 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; static int next_event_type = __TRACE_LAST_TYPE + 1; -#define EVENT_STORAGE_SIZE 128 -static DEFINE_MUTEX(event_storage_mutex); -static char event_storage[EVENT_STORAGE_SIZE]; - int trace_print_seq(struct seq_file *m, struct trace_seq *s) { int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; @@ -474,23 +470,6 @@ int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) } EXPORT_SYMBOL_GPL(ftrace_output_call); -int ftrace_event_define_field(struct ftrace_event_call *call, - char *type, int len, char *item, int offset, - int field_size, int sign, int filter) -{ - int ret; - - mutex_lock(&event_storage_mutex); - snprintf(event_storage, sizeof(event_storage), - "%s[%d]", type, len); - ret = trace_define_field(call, event_storage, item, offset, - field_size, sign, filter); - mutex_unlock(&event_storage_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(ftrace_event_define_field); - #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) { -- cgit From 0dea6d52638b2693b18cd2ed8938b236e0789ddb Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 21 Mar 2014 01:19:01 -0400 Subject: tracepoint: Remove unused API functions After the following commit: commit b75ef8b44b1cb95f5a26484b0e2fe37a63b12b44 Author: Mathieu Desnoyers Date: Wed Aug 10 15:18:39 2011 -0400 Tracepoint: Dissociate from module mutex The following functions became unnecessary: - tracepoint_probe_register_noupdate, - tracepoint_probe_unregister_noupdate, - tracepoint_probe_update_all. In fact, none of the in-kernel tracers, nor LTTng, nor SystemTAP use them. Remove those. Moreover, the functions: - tracepoint_iter_start, - tracepoint_iter_next, - tracepoint_iter_stop, - tracepoint_iter_reset. are unused by in-kernel tracers, LTTng and SystemTAP. Remove those too. Link: http://lkml.kernel.org/r/1395379142-2118-2-git-send-email-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 18 ---- kernel/tracepoint.c | 222 +-------------------------------------------- 2 files changed, 5 insertions(+), 235 deletions(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index accc497f8d72..a3b2837d8dd1 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -48,12 +48,6 @@ extern int tracepoint_probe_register(const char *name, void *probe, void *data); extern int tracepoint_probe_unregister(const char *name, void *probe, void *data); -extern int tracepoint_probe_register_noupdate(const char *name, void *probe, - void *data); -extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe, - void *data); -extern void tracepoint_probe_update_all(void); - #ifdef CONFIG_MODULES struct tp_module { struct list_head list; @@ -62,18 +56,6 @@ struct tp_module { }; #endif /* CONFIG_MODULES */ -struct tracepoint_iter { -#ifdef CONFIG_MODULES - struct tp_module *module; -#endif /* CONFIG_MODULES */ - struct tracepoint * const *tracepoint; -}; - -extern void tracepoint_iter_start(struct tracepoint_iter *iter); -extern void tracepoint_iter_next(struct tracepoint_iter *iter); -extern void tracepoint_iter_stop(struct tracepoint_iter *iter); -extern void tracepoint_iter_reset(struct tracepoint_iter *iter); - /* * tracepoint_synchronize_unregister must be called between the last tracepoint * probe unregistration and the end of module exit to make sure there is no diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e2a58a22b0f4..65d9f9459a75 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -67,10 +67,7 @@ struct tracepoint_entry { }; struct tp_probes { - union { - struct rcu_head rcu; - struct list_head list; - } u; + struct rcu_head rcu; struct tracepoint_func probes[0]; }; @@ -83,7 +80,7 @@ static inline void *allocate_probes(int count) static void rcu_free_old_probes(struct rcu_head *head) { - kfree(container_of(head, struct tp_probes, u.rcu)); + kfree(container_of(head, struct tp_probes, rcu)); } static inline void release_probes(struct tracepoint_func *old) @@ -91,7 +88,7 @@ static inline void release_probes(struct tracepoint_func *old) if (old) { struct tp_probes *tp_probes = container_of(old, struct tp_probes, probes[0]); - call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes); + call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes); } } @@ -459,204 +456,11 @@ int tracepoint_probe_unregister(const char *name, void *probe, void *data) } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); -static LIST_HEAD(old_probes); -static int need_update; - -static void tracepoint_add_old_probes(void *old) -{ - need_update = 1; - if (old) { - struct tp_probes *tp_probes = container_of(old, - struct tp_probes, probes[0]); - list_add(&tp_probes->u.list, &old_probes); - } -} - -/** - * tracepoint_probe_register_noupdate - register a probe but not connect - * @name: tracepoint name - * @probe: probe handler - * @data: probe private data - * - * caller must call tracepoint_probe_update_all() - */ -int tracepoint_probe_register_noupdate(const char *name, void *probe, - void *data) -{ - struct tracepoint_func *old; - - mutex_lock(&tracepoints_mutex); - old = tracepoint_add_probe(name, probe, data); - if (IS_ERR(old)) { - mutex_unlock(&tracepoints_mutex); - return PTR_ERR(old); - } - tracepoint_add_old_probes(old); - mutex_unlock(&tracepoints_mutex); - return 0; -} -EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate); - -/** - * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect - * @name: tracepoint name - * @probe: probe function pointer - * @data: probe private data - * - * caller must call tracepoint_probe_update_all() - */ -int tracepoint_probe_unregister_noupdate(const char *name, void *probe, - void *data) -{ - struct tracepoint_func *old; - - mutex_lock(&tracepoints_mutex); - old = tracepoint_remove_probe(name, probe, data); - if (IS_ERR(old)) { - mutex_unlock(&tracepoints_mutex); - return PTR_ERR(old); - } - tracepoint_add_old_probes(old); - mutex_unlock(&tracepoints_mutex); - return 0; -} -EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate); - -/** - * tracepoint_probe_update_all - update tracepoints - */ -void tracepoint_probe_update_all(void) -{ - LIST_HEAD(release_probes); - struct tp_probes *pos, *next; - - mutex_lock(&tracepoints_mutex); - if (!need_update) { - mutex_unlock(&tracepoints_mutex); - return; - } - if (!list_empty(&old_probes)) - list_replace_init(&old_probes, &release_probes); - need_update = 0; - tracepoint_update_probes(); - mutex_unlock(&tracepoints_mutex); - list_for_each_entry_safe(pos, next, &release_probes, u.list) { - list_del(&pos->u.list); - call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); - } -} -EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); - -/** - * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. - * @tracepoint: current tracepoints (in), next tracepoint (out) - * @begin: beginning of the range - * @end: end of the range - * - * Returns whether a next tracepoint has been found (1) or not (0). - * Will return the first tracepoint in the range if the input tracepoint is - * NULL. - */ -static int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, - struct tracepoint * const *begin, struct tracepoint * const *end) -{ - if (!*tracepoint && begin != end) { - *tracepoint = begin; - return 1; - } - if (*tracepoint >= begin && *tracepoint < end) - return 1; - return 0; -} - -#ifdef CONFIG_MODULES -static void tracepoint_get_iter(struct tracepoint_iter *iter) -{ - int found = 0; - struct tp_module *iter_mod; - - /* Core kernel tracepoints */ - if (!iter->module) { - found = tracepoint_get_iter_range(&iter->tracepoint, - __start___tracepoints_ptrs, - __stop___tracepoints_ptrs); - if (found) - goto end; - } - /* Tracepoints in modules */ - mutex_lock(&tracepoints_mutex); - list_for_each_entry(iter_mod, &tracepoint_module_list, list) { - /* - * Sorted module list - */ - if (iter_mod < iter->module) - continue; - else if (iter_mod > iter->module) - iter->tracepoint = NULL; - found = tracepoint_get_iter_range(&iter->tracepoint, - iter_mod->tracepoints_ptrs, - iter_mod->tracepoints_ptrs - + iter_mod->num_tracepoints); - if (found) { - iter->module = iter_mod; - break; - } - } - mutex_unlock(&tracepoints_mutex); -end: - if (!found) - tracepoint_iter_reset(iter); -} -#else /* CONFIG_MODULES */ -static void tracepoint_get_iter(struct tracepoint_iter *iter) -{ - int found = 0; - - /* Core kernel tracepoints */ - found = tracepoint_get_iter_range(&iter->tracepoint, - __start___tracepoints_ptrs, - __stop___tracepoints_ptrs); - if (!found) - tracepoint_iter_reset(iter); -} -#endif /* CONFIG_MODULES */ - -void tracepoint_iter_start(struct tracepoint_iter *iter) -{ - tracepoint_get_iter(iter); -} -EXPORT_SYMBOL_GPL(tracepoint_iter_start); - -void tracepoint_iter_next(struct tracepoint_iter *iter) -{ - iter->tracepoint++; - /* - * iter->tracepoint may be invalid because we blindly incremented it. - * Make sure it is valid by marshalling on the tracepoints, getting the - * tracepoints from following modules if necessary. - */ - tracepoint_get_iter(iter); -} -EXPORT_SYMBOL_GPL(tracepoint_iter_next); - -void tracepoint_iter_stop(struct tracepoint_iter *iter) -{ -} -EXPORT_SYMBOL_GPL(tracepoint_iter_stop); - -void tracepoint_iter_reset(struct tracepoint_iter *iter) -{ -#ifdef CONFIG_MODULES - iter->module = NULL; -#endif /* CONFIG_MODULES */ - iter->tracepoint = NULL; -} -EXPORT_SYMBOL_GPL(tracepoint_iter_reset); #ifdef CONFIG_MODULES static int tracepoint_module_coming(struct module *mod) { - struct tp_module *tp_mod, *iter; + struct tp_module *tp_mod; int ret = 0; if (!mod->num_tracepoints) @@ -677,23 +481,7 @@ static int tracepoint_module_coming(struct module *mod) } tp_mod->num_tracepoints = mod->num_tracepoints; tp_mod->tracepoints_ptrs = mod->tracepoints_ptrs; - - /* - * tracepoint_module_list is kept sorted by struct module pointer - * address for iteration on tracepoints from a seq_file that can release - * the mutex between calls. - */ - list_for_each_entry_reverse(iter, &tracepoint_module_list, list) { - BUG_ON(iter == tp_mod); /* Should never be in the list twice */ - if (iter < tp_mod) { - /* We belong to the location right after iter. */ - list_add(&tp_mod->list, &iter->list); - goto module_added; - } - } - /* We belong to the beginning of the list */ - list_add(&tp_mod->list, &tracepoint_module_list); -module_added: + list_add_tail(&tp_mod->list, &tracepoint_module_list); tracepoint_update_probe_range(mod->tracepoints_ptrs, mod->tracepoints_ptrs + mod->num_tracepoints); end: -- cgit From 3862807880acc0adaef6749738d210c9f45c3049 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Mon, 24 Mar 2014 14:03:57 +0000 Subject: tracing: Add BUG_ON when stack end location is over written It is difficult to detect a stack overrun when it actually occurs. We have observed that this type of corruption is often silent and can go unnoticed. Once the corrupted region is examined, the outcome is undefined and often results in sporadic system crashes. When the stack tracing feature is enabled, let's check for this condition and take appropriate action. Note: init_task doesn't get its stack end location set to STACK_END_MAGIC. Link: http://lkml.kernel.org/r/1395669837-30209-1-git-send-email-atomlin@redhat.com Signed-off-by: Aaron Tomlin Signed-off-by: Steven Rostedt --- kernel/trace/trace_stack.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index e6be585cf06a..21b320e5d163 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -144,6 +145,8 @@ check_stack(unsigned long ip, unsigned long *stack) i++; } + BUG_ON(current != &init_task && + *(end_of_stack(current)) != STACK_END_MAGIC); out: arch_spin_unlock(&max_stack_lock); local_irq_restore(flags); -- cgit