diff options
Diffstat (limited to 'kernel/trace/ftrace.c')
| -rw-r--r-- | kernel/trace/ftrace.c | 3899 |
1 files changed, 3107 insertions, 792 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index aac7847c0214..3ec2033c0774 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -18,6 +18,7 @@ #include <linux/clocksource.h> #include <linux/sched/task.h> #include <linux/kallsyms.h> +#include <linux/security.h> #include <linux/seq_file.h> #include <linux/tracefs.h> #include <linux/hardirq.h> @@ -33,6 +34,7 @@ #include <linux/list.h> #include <linux/hash.h> #include <linux/rcupdate.h> +#include <linux/kprobes.h> #include <trace/events/sched.h> @@ -43,6 +45,12 @@ #include "trace_output.h" #include "trace_stat.h" +/* Flags that do not get reset */ +#define FTRACE_NOCLEAR_FLAGS (FTRACE_FL_DISABLED | FTRACE_FL_TOUCHED | \ + FTRACE_FL_MODIFIED) + +#define FTRACE_INVALID_FUNCTION "__ftrace_invalid_address__" + #define FTRACE_WARN_ON(cond) \ ({ \ int ___r = cond; \ @@ -60,21 +68,16 @@ }) /* hash bits for specific function selection */ -#define FTRACE_HASH_BITS 7 -#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 #ifdef CONFIG_DYNAMIC_FTRACE #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), -#define ASSIGN_OPS_HASH(opsname, val) \ - .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), + .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), \ + .subop_list = LIST_HEAD_INIT(opsname.subop_list), #else #define INIT_OPS_HASH(opsname) -#define ASSIGN_OPS_HASH(opsname, val) #endif enum { @@ -84,20 +87,20 @@ enum { struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, + .flags = FTRACE_OPS_FL_STUB, INIT_OPS_HASH(ftrace_list_end) }; /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; -static int last_ftrace_enabled; +static int __maybe_unused last_ftrace_enabled; /* Current function tracing op */ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; /* What to set function_trace_op to */ static struct ftrace_ops *set_function_trace_op; -static bool ftrace_pids_enabled(struct ftrace_ops *ops) +bool ftrace_pids_enabled(struct ftrace_ops *ops) { struct trace_array *tr; @@ -106,7 +109,7 @@ static bool ftrace_pids_enabled(struct ftrace_ops *ops) tr = ops->private; - return tr->function_pids != NULL; + return tr->function_pids != NULL || tr->function_no_pids != NULL; } static void ftrace_update_trampoline(struct ftrace_ops *ops); @@ -119,17 +122,39 @@ static int ftrace_disabled __read_mostly; DEFINE_MUTEX(ftrace_lock); -struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; +struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = (struct ftrace_ops __rcu *)&ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; struct ftrace_ops global_ops; -#if ARCH_SUPPORTS_FTRACE_OPS -static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); -#else -/* See comment below, where ftrace_ops_list_func is defined */ -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); -#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) +/* Defined by vmlinux.lds.h see the comment above arch_ftrace_ops_list_func for details */ +void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +/* + * Stub used to invoke the list ops without requiring a separate trampoline. + */ +const struct ftrace_ops ftrace_list_ops = { + .func = ftrace_ops_list_func, + .flags = FTRACE_OPS_FL_STUB, +}; + +static void ftrace_ops_nop_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs) +{ + /* do nothing */ +} + +/* + * Stub used when a call site is disabled. May be called transiently by threads + * which have made it into ftrace_caller but haven't yet recovered the ops at + * the point the call site is disabled. + */ +const struct ftrace_ops ftrace_nop_ops = { + .func = ftrace_ops_nop_func, + .flags = FTRACE_OPS_FL_STUB, +}; #endif static inline void ftrace_ops_init(struct ftrace_ops *ops) @@ -137,35 +162,33 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) { mutex_init(&ops->local_hash.regex_lock); + INIT_LIST_HEAD(&ops->subop_list); ops->func_hash = &ops->local_hash; ops->flags |= FTRACE_OPS_FL_INITIALIZED; } #endif } +/* Call this function for when a callback filters on set_ftrace_pid */ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = op->private; + int pid; - if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid)) - return; + if (tr) { + pid = this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid); + if (pid == FTRACE_PID_IGNORE) + return; + if (pid != FTRACE_PID_TRACE && + pid != current->pid) + return; + } - op->saved_func(ip, parent_ip, op, regs); + op->saved_func(ip, parent_ip, op, fregs); } -static void ftrace_sync(struct work_struct *work) -{ - /* - * This function is just a stub to implement a hard force - * of synchronize_rcu(). This requires synchronizing - * tasks even in userspace and idle. - * - * Yes, function tracing is rude. - */ -} - -static void ftrace_sync_ipi(void *data) +void ftrace_sync_ipi(void *data) { /* Probably not needed, but do it anyway */ smp_rmb(); @@ -174,7 +197,7 @@ static void ftrace_sync_ipi(void *data) static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) { /* - * If this is a dynamic, RCU, or per CPU ops, or we force list func, + * If this is a dynamic or RCU ops, or we force list func, * then it needs to call the list anyway. */ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) || @@ -215,8 +238,6 @@ static void update_ftrace_function(void) func = ftrace_ops_list_func; } - update_function_graph_func(); - /* If there's no change, then do nothing more here */ if (ftrace_trace_function == func) return; @@ -238,7 +259,7 @@ static void update_ftrace_function(void) /* * For static tracing, we need to be a bit more careful. * The function change takes affect immediately. Thus, - * we need to coorditate the setting of the function_trace_ops + * we need to coordinate the setting of the function_trace_ops * with the setting of the ftrace_trace_function. * * Set the function to the list ops, which will call the @@ -250,7 +271,7 @@ static void update_ftrace_function(void) * Make sure all CPUs see this. Yes this is slow, but static * tracing is slow and nasty to have enabled. */ - schedule_on_each_cpu(ftrace_sync); + synchronize_rcu_tasks_rude(); /* Now all cpus are using the list ops. */ function_trace_op = set_function_trace_op; /* Make sure the function_trace_op is visible on all CPUs */ @@ -290,7 +311,7 @@ static int remove_ftrace_ops(struct ftrace_ops __rcu **list, lockdep_is_held(&ftrace_lock)) == ops && rcu_dereference_protected(ops->next, lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { - *list = &ftrace_list_end; + rcu_assign_pointer(*list, &ftrace_list_end); return 0; } @@ -328,8 +349,10 @@ int __register_ftrace_function(struct ftrace_ops *ops) if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) ops->flags |= FTRACE_OPS_FL_SAVE_REGS; #endif + if (!ftrace_enabled && (ops->flags & FTRACE_OPS_FL_PERMANENT)) + return -EBUSY; - if (!core_kernel_data((unsigned long)ops)) + if (!is_kernel_core_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; add_ftrace_ops(&ftrace_ops_list, ops); @@ -384,6 +407,8 @@ static void ftrace_update_pid_func(void) } } while_for_each_ftrace_op(op); + fgraph_update_pid_func(); + update_ftrace_function(); } @@ -465,10 +490,10 @@ static void *function_stat_start(struct tracer_stat *trace) #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* function graph compares on total time */ -static int function_stat_cmp(void *p1, void *p2) +static int function_stat_cmp(const void *p1, const void *p2) { - struct ftrace_profile *a = p1; - struct ftrace_profile *b = p2; + const struct ftrace_profile *a = p1; + const struct ftrace_profile *b = p2; if (a->time < b->time) return -1; @@ -479,10 +504,10 @@ static int function_stat_cmp(void *p1, void *p2) } #else /* not function graph compares against hits */ -static int function_stat_cmp(void *p1, void *p2) +static int function_stat_cmp(const void *p1, const void *p2) { - struct ftrace_profile *a = p1; - struct ftrace_profile *b = p2; + const struct ftrace_profile *a = p1; + const struct ftrace_profile *b = p2; if (a->counter < b->counter) return -1; @@ -509,51 +534,69 @@ static int function_stat_headers(struct seq_file *m) static int function_stat_show(struct seq_file *m, void *v) { + struct trace_array *tr = trace_get_global_array(); struct ftrace_profile *rec = v; + const char *refsymbol = NULL; char str[KSYM_SYMBOL_LEN]; - int ret = 0; #ifdef CONFIG_FUNCTION_GRAPH_TRACER static struct trace_seq s; unsigned long long avg; unsigned long long stddev; + unsigned long long stddev_denom; #endif - mutex_lock(&ftrace_profile_lock); + guard(mutex)(&ftrace_profile_lock); /* we raced with function_profile_reset() */ - if (unlikely(rec->counter == 0)) { - ret = -EBUSY; - goto out; - } + if (unlikely(rec->counter == 0)) + return -EBUSY; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - avg = rec->time; - do_div(avg, rec->counter); + avg = div64_ul(rec->time, rec->counter); if (tracing_thresh && (avg < tracing_thresh)) - goto out; + return 0; #endif - kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + if (tr->trace_flags & TRACE_ITER(PROF_TEXT_OFFSET)) { + unsigned long offset; + + if (core_kernel_text(rec->ip)) { + refsymbol = "_text"; + offset = rec->ip - (unsigned long)_text; + } else { + struct module *mod; + + guard(rcu)(); + mod = __module_text_address(rec->ip); + if (mod) { + refsymbol = mod->name; + /* Calculate offset from module's text entry address. */ + offset = rec->ip - (unsigned long)mod->mem[MOD_TEXT].base; + } + } + if (refsymbol) + snprintf(str, sizeof(str), " %s+%#lx", refsymbol, offset); + } + if (!refsymbol) + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + seq_printf(m, " %-30.30s %10lu", str, rec->counter); #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_puts(m, " "); - /* Sample standard deviation (s^2) */ - if (rec->counter <= 1) - stddev = 0; - else { - /* - * Apply Welford's method: - * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) - */ + /* + * Variance formula: + * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) + * Maybe Welford's method is better here? + * Divide only by 1000 for ns^2 -> us^2 conversion. + * trace_print_graph_duration will divide by 1000 again. + */ + stddev = 0; + stddev_denom = rec->counter * (rec->counter - 1) * 1000; + if (stddev_denom) { stddev = rec->counter * rec->time_squared - rec->time * rec->time; - - /* - * Divide only 1000 for ns^2 -> us^2 conversion. - * trace_print_graph_duration will divide 1000 again. - */ - do_div(stddev, rec->counter * (rec->counter - 1) * 1000); + stddev = div64_ul(stddev, stddev_denom); } trace_seq_init(&s); @@ -565,10 +608,8 @@ static int function_stat_show(struct seq_file *m, void *v) trace_print_seq(m, &s); #endif seq_putc(m, '\n'); -out: - mutex_unlock(&ftrace_profile_lock); - return ret; + return 0; } static void ftrace_profile_reset(struct ftrace_profile_stat *stat) @@ -587,7 +628,7 @@ static void ftrace_profile_reset(struct ftrace_profile_stat *stat) FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); } -int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) +static int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) { struct ftrace_profile_page *pg; int functions; @@ -760,31 +801,28 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) static void function_profile_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; - unsigned long flags; if (!ftrace_profile_enabled) return; - local_irq_save(flags); + guard(preempt_notrace)(); stat = this_cpu_ptr(&ftrace_profile_stats); if (!stat->hash || !ftrace_profile_enabled) - goto out; + return; rec = ftrace_find_profiled_func(stat, ip); if (!rec) { rec = ftrace_profile_alloc(stat, ip); if (!rec) - goto out; + return; } rec->counter++; - out: - local_irq_restore(flags); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -795,9 +833,17 @@ void ftrace_graph_graph_time_control(bool enable) fgraph_graph_time = enable; } -static int profile_graph_entry(struct ftrace_graph_ent *trace) +struct profile_fgraph_data { + unsigned long long calltime; + unsigned long long subtime; + unsigned long long sleeptime; +}; + +static int profile_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops, + struct ftrace_regs *fregs) { - struct ftrace_ret_stack *ret_stack; + struct profile_fgraph_data *profile_data; function_profile_call(trace->func, 0, NULL, NULL); @@ -805,42 +851,59 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) if (!current->ret_stack) return 0; - ret_stack = ftrace_graph_get_ret_stack(current, 0); - if (ret_stack) - ret_stack->subtime = 0; + profile_data = fgraph_reserve_data(gops->idx, sizeof(*profile_data)); + if (!profile_data) + return 0; + + profile_data->subtime = 0; + profile_data->sleeptime = current->ftrace_sleeptime; + profile_data->calltime = trace_clock_local(); return 1; } -static void profile_graph_return(struct ftrace_graph_ret *trace) +bool fprofile_no_sleep_time; + +static void profile_graph_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops, + struct ftrace_regs *fregs) { - struct ftrace_ret_stack *ret_stack; + struct profile_fgraph_data *profile_data; struct ftrace_profile_stat *stat; unsigned long long calltime; + unsigned long long rettime = trace_clock_local(); struct ftrace_profile *rec; - unsigned long flags; + int size; + + guard(preempt_notrace)(); - local_irq_save(flags); stat = this_cpu_ptr(&ftrace_profile_stats); if (!stat->hash || !ftrace_profile_enabled) - goto out; + return; + + profile_data = fgraph_retrieve_data(gops->idx, &size); /* If the calltime was zero'd ignore it */ - if (!trace->calltime) - goto out; + if (!profile_data || !profile_data->calltime) + return; + + calltime = rettime - profile_data->calltime; - calltime = trace->rettime - trace->calltime; + if (fprofile_no_sleep_time) { + if (current->ftrace_sleeptime) + calltime -= current->ftrace_sleeptime - profile_data->sleeptime; + } if (!fgraph_graph_time) { + struct profile_fgraph_data *parent_data; /* Append this call time to the parent time to subtract */ - ret_stack = ftrace_graph_get_ret_stack(current, 1); - if (ret_stack) - ret_stack->subtime += calltime; + parent_data = fgraph_retrieve_parent_data(gops->idx, &size, 1); + if (parent_data) + parent_data->subtime += calltime; - ret_stack = ftrace_graph_get_ret_stack(current, 0); - if (ret_stack && ret_stack->subtime < calltime) - calltime -= ret_stack->subtime; + if (profile_data->subtime && profile_data->subtime < calltime) + calltime -= profile_data->subtime; else calltime = 0; } @@ -850,9 +913,6 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) rec->time += calltime; rec->time_squared += calltime * calltime; } - - out: - local_irq_restore(flags); } static struct fgraph_ops fprofiler_ops = { @@ -862,6 +922,7 @@ static struct fgraph_ops fprofiler_ops = { static int register_ftrace_profiler(void) { + ftrace_ops_set_global_filter(&fprofiler_ops.ops); return register_ftrace_graph(&fprofiler_ops); } @@ -872,12 +933,11 @@ static void unregister_ftrace_profiler(void) #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, - INIT_OPS_HASH(ftrace_profile_ops) }; static int register_ftrace_profiler(void) { + ftrace_ops_set_global_filter(&ftrace_profile_ops); return register_ftrace_function(&ftrace_profile_ops); } @@ -900,20 +960,16 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, val = !!val; - mutex_lock(&ftrace_profile_lock); + guard(mutex)(&ftrace_profile_lock); if (ftrace_profile_enabled ^ val) { if (val) { ret = ftrace_profile_init(); - if (ret < 0) { - cnt = ret; - goto out; - } + if (ret < 0) + return ret; ret = register_ftrace_profiler(); - if (ret < 0) { - cnt = ret; - goto out; - } + if (ret < 0) + return ret; ftrace_profile_enabled = 1; } else { ftrace_profile_enabled = 0; @@ -924,8 +980,6 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, unregister_ftrace_profiler(); } } - out: - mutex_unlock(&ftrace_profile_lock); *ppos += cnt; @@ -963,7 +1017,6 @@ static struct tracer_stat function_stats __initdata = { static __init void ftrace_profile_tracefs(struct dentry *d_tracer) { struct ftrace_profile_stat *stat; - struct dentry *entry; char *name; int ret; int cpu; @@ -994,10 +1047,9 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer) } } - entry = tracefs_create_file("function_profile_enabled", 0644, - d_tracer, NULL, &ftrace_profile_fops); - if (!entry) - pr_warn("Could not create tracefs 'function_profile_enabled' entry\n"); + trace_create_file("function_profile_enabled", + TRACE_MODE_WRITE, d_tracer, NULL, + &ftrace_profile_fops); } #else /* CONFIG_FUNCTION_PROFILER */ @@ -1016,15 +1068,6 @@ static struct ftrace_ops *removed_ops; */ static bool update_all_ops; -#ifndef CONFIG_FTRACE_MCOUNT_RECORD -# error Dynamic ftrace depends on MCOUNT_RECORD -#endif - -struct ftrace_func_entry { - struct hlist_node hlist; - unsigned long ip; -}; - struct ftrace_func_probe { struct ftrace_probe_ops *probe_ops; struct ftrace_ops ops; @@ -1051,13 +1094,12 @@ struct ftrace_ops global_ops = { .local_hash.notrace_hash = EMPTY_HASH, .local_hash.filter_hash = EMPTY_HASH, INIT_OPS_HASH(global_ops) - .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED | + .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID, }; /* - * Used by the stack undwinder to know about dynamic ftrace trampolines. + * Used by the stack unwinder to know about dynamic ftrace trampolines. */ struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) { @@ -1102,15 +1144,12 @@ struct ftrace_page { struct ftrace_page *next; struct dyn_ftrace *records; int index; - int size; + int order; }; #define ENTRY_SIZE sizeof(struct dyn_ftrace) #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; @@ -1149,7 +1188,7 @@ __ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) * Search a given @hash to see if a given instruction pointer (@ip) * exists in it. * - * Returns the entry that holds the @ip if found. NULL otherwise. + * Returns: the entry that holds the @ip if found. NULL otherwise. */ struct ftrace_func_entry * ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) @@ -1172,18 +1211,19 @@ static void __add_hash_entry(struct ftrace_hash *hash, hash->count++; } -static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip) +static struct ftrace_func_entry * +add_hash_entry(struct ftrace_hash *hash, unsigned long ip) { struct ftrace_func_entry *entry; entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) - return -ENOMEM; + return NULL; entry->ip = ip; __add_hash_entry(hash, entry); - return 0; + return entry; } static void @@ -1268,12 +1308,21 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) call_rcu(&hash->rcu, __free_ftrace_hash_rcu); } +/** + * ftrace_free_filter - remove all filters for an ftrace_ops + * @ops: the ops to remove the filters from + */ void ftrace_free_filter(struct ftrace_ops *ops) { ftrace_ops_init(ops); + if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) + return; free_ftrace_hash(ops->func_hash->filter_hash); free_ftrace_hash(ops->func_hash->notrace_hash); + ops->func_hash->filter_hash = EMPTY_HASH; + ops->func_hash->notrace_hash = EMPTY_HASH; } +EXPORT_SYMBOL_GPL(ftrace_free_filter); static struct ftrace_hash *alloc_ftrace_hash(int size_bits) { @@ -1297,7 +1346,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits) return hash; } - +/* Used to save filters on functions for modules not loaded yet */ static int ftrace_add_mod(struct trace_array *tr, const char *func, const char *module, int enable) @@ -1309,6 +1358,7 @@ static int ftrace_add_mod(struct trace_array *tr, if (!ftrace_mod) return -ENOMEM; + INIT_LIST_HEAD(&ftrace_mod->list); ftrace_mod->func = kstrdup(func, GFP_KERNEL); ftrace_mod->module = kstrdup(module, GFP_KERNEL); ftrace_mod->enable = enable; @@ -1332,7 +1382,6 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) struct ftrace_func_entry *entry; struct ftrace_hash *new_hash; int size; - int ret; int i; new_hash = alloc_ftrace_hash(size_bits); @@ -1349,8 +1398,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) size = 1 << hash->size_bits; for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { - ret = add_hash_entry(new_hash, entry->ip); - if (ret < 0) + if (add_hash_entry(new_hash, entry->ip) == NULL) goto free_hash; } } @@ -1364,36 +1412,30 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) return NULL; } -static void -ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, int filter_hash); -static void -ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash); +static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops); +static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops); static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, struct ftrace_hash *new_hash); -static struct ftrace_hash * -__ftrace_hash_move(struct ftrace_hash *src) +/* + * Allocate a new hash and remove entries from @src and move them to the new hash. + * On success, the @src hash will be empty and should be freed. + */ +static struct ftrace_hash *__move_hash(struct ftrace_hash *src, int size) { struct ftrace_func_entry *entry; - struct hlist_node *tn; - struct hlist_head *hhd; struct ftrace_hash *new_hash; - int size = src->count; + struct hlist_head *hhd; + struct hlist_node *tn; int bits = 0; int i; /* - * If the new source is empty, just return the empty_hash. - */ - if (ftrace_hash_empty(src)) - return EMPTY_HASH; - - /* - * Make the hash size about 1/2 the # found + * Use around half the size (max bit of it), but + * a minimum of 2 is fine (as size of 0 or 1 both give 1 for bits). */ - for (size /= 2; size; size >>= 1) - bits++; + bits = fls(size / 2); /* Don't allocate too much */ if (bits > FTRACE_HASH_MAX_BITS) @@ -1413,10 +1455,44 @@ __ftrace_hash_move(struct ftrace_hash *src) __add_hash_entry(new_hash, entry); } } - return new_hash; } +/* Move the @src entries to a newly allocated hash */ +static struct ftrace_hash * +__ftrace_hash_move(struct ftrace_hash *src) +{ + int size = src->count; + + /* + * If the new source is empty, just return the empty_hash. + */ + if (ftrace_hash_empty(src)) + return EMPTY_HASH; + + return __move_hash(src, size); +} + +/** + * ftrace_hash_move - move a new hash to a filter and do updates + * @ops: The ops with the hash that @dst points to + * @enable: True if for the filter hash, false for the notrace hash + * @dst: Points to the @ops hash that should be updated + * @src: The hash to update @dst with + * + * This is called when an ftrace_ops hash is being updated and the + * the kernel needs to reflect this. Note, this only updates the kernel + * function callbacks if the @ops is enabled (not to be confused with + * @enable above). If the @ops is enabled, its hash determines what + * callbacks get called. This function gets called when the @ops hash + * is updated and it requires new callbacks. + * + * On success the elements of @src is moved to @dst, and @dst is updated + * properly, as well as the functions determined by the @ops hashes + * are now calling the @ops callback function. + * + * Regardless of return type, @src should be freed with free_ftrace_hash(). + */ static int ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash **dst, struct ftrace_hash *src) @@ -1446,11 +1522,11 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, * Remove the current set, update the hash and add * them back. */ - ftrace_hash_rec_disable_modify(ops, enable); + ftrace_hash_rec_disable_modify(ops); rcu_assign_pointer(*dst, new_hash); - ftrace_hash_rec_enable_modify(ops, enable); + ftrace_hash_rec_enable_modify(ops); return 0; } @@ -1460,7 +1536,7 @@ static bool hash_contains_ip(unsigned long ip, { /* * The function record is a match if it exists in the filter - * hash and not in the notrace hash. Note, an emty hash is + * hash and not in the notrace hash. Note, an empty hash is * considered a match for the filter hash, but an empty * notrace hash is considered not in the notrace hash. */ @@ -1536,53 +1612,80 @@ static int ftrace_cmp_recs(const void *a, const void *b) return 0; } -/** - * ftrace_location_range - return the first address of a traced location - * if it touches the given ip range - * @start: start of range to search. - * @end: end of range to search (inclusive). @end points to the last byte - * to check. - * - * Returns rec->ip if the related ftrace location is a least partly within - * the given address range. That is, the first address of the instruction - * that is either a NOP or call to the function tracer. It checks the ftrace - * internal tables to determine if the address belongs or not. - */ -unsigned long ftrace_location_range(unsigned long start, unsigned long end) +static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end) { struct ftrace_page *pg; - struct dyn_ftrace *rec; + struct dyn_ftrace *rec = NULL; struct dyn_ftrace key; key.ip = start; key.flags = end; /* overload flags, as it is unsigned long */ for (pg = ftrace_pages_start; pg; pg = pg->next) { - if (end < pg->records[0].ip || + if (pg->index == 0 || + end < pg->records[0].ip || start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) continue; rec = bsearch(&key, pg->records, pg->index, sizeof(struct dyn_ftrace), ftrace_cmp_recs); if (rec) - return rec->ip; + break; } + return rec; +} - return 0; +/** + * ftrace_location_range - return the first address of a traced location + * if it touches the given ip range + * @start: start of range to search. + * @end: end of range to search (inclusive). @end points to the last byte + * to check. + * + * Returns: rec->ip if the related ftrace location is a least partly within + * the given address range. That is, the first address of the instruction + * that is either a NOP or call to the function tracer. It checks the ftrace + * internal tables to determine if the address belongs or not. + */ +unsigned long ftrace_location_range(unsigned long start, unsigned long end) +{ + struct dyn_ftrace *rec; + unsigned long ip = 0; + + rcu_read_lock(); + rec = lookup_rec(start, end); + if (rec) + ip = rec->ip; + rcu_read_unlock(); + + return ip; } /** - * ftrace_location - return true if the ip giving is a traced location + * ftrace_location - return the ftrace location * @ip: the instruction pointer to check * - * Returns rec->ip if @ip given is a pointer to a ftrace location. - * That is, the instruction that is either a NOP or call to - * the function tracer. It checks the ftrace internal tables to - * determine if the address belongs or not. + * Returns: + * * If @ip matches the ftrace location, return @ip. + * * If @ip matches sym+0, return sym's ftrace location. + * * Otherwise, return 0. */ unsigned long ftrace_location(unsigned long ip) { - return ftrace_location_range(ip, ip); + unsigned long loc; + unsigned long offset; + unsigned long size; + + loc = ftrace_location_range(ip, ip); + if (!loc) { + if (!kallsyms_lookup_size_offset(ip, &size, &offset)) + return 0; + + /* map sym+0 to __fentry__ */ + if (!offset) + loc = ftrace_location_range(ip, ip + size - 1); + } + return loc; } /** @@ -1590,7 +1693,7 @@ unsigned long ftrace_location(unsigned long ip) * @start: start of range to search * @end: end of range to search (inclusive). @end points to the last byte to check. * - * Returns 1 if @start and @end contains a ftrace location. + * Returns: 1 if @start and @end contains a ftrace location. * That is, the instruction that is either a NOP or call to * the function tracer. It checks the ftrace internal tables to * determine if the address belongs or not. @@ -1625,12 +1728,40 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) return keep_regs; } +static struct ftrace_ops * +ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); +static struct ftrace_ops * +ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude); +static struct ftrace_ops * +ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); + +static bool skip_record(struct dyn_ftrace *rec) +{ + /* + * At boot up, weak functions are set to disable. Function tracing + * can be enabled before they are, and they still need to be disabled now. + * If the record is disabled, still continue if it is marked as already + * enabled (this is needed to keep the accounting working). + */ + return rec->flags & FTRACE_FL_DISABLED && + !(rec->flags & FTRACE_FL_ENABLED); +} + +/* + * This is the main engine to the ftrace updates to the dyn_ftrace records. + * + * It will iterate through all the available ftrace functions + * (the ones that ftrace can have callbacks to) and set the flags + * in the associated dyn_ftrace records. + * + * @inc: If true, the functions associated to @ops are added to + * the dyn_ftrace records, otherwise they are removed. + */ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, - int filter_hash, bool inc) { struct ftrace_hash *hash; - struct ftrace_hash *other_hash; + struct ftrace_hash *notrace_hash; struct ftrace_page *pg; struct dyn_ftrace *rec; bool update = false; @@ -1642,39 +1773,20 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, return false; /* - * In the filter_hash case: * If the count is zero, we update all records. * Otherwise we just update the items in the hash. - * - * In the notrace_hash case: - * We enable the update in the hash. - * As disabling notrace means enabling the tracing, - * and enabling notrace means disabling, the inc variable - * gets inversed. */ - if (filter_hash) { - hash = ops->func_hash->filter_hash; - other_hash = ops->func_hash->notrace_hash; - if (ftrace_hash_empty(hash)) - all = true; - } else { - inc = !inc; - hash = ops->func_hash->notrace_hash; - other_hash = ops->func_hash->filter_hash; - /* - * If the notrace hash has no items, - * then there's nothing to do. - */ - if (ftrace_hash_empty(hash)) - return false; - } + hash = ops->func_hash->filter_hash; + notrace_hash = ops->func_hash->notrace_hash; + if (ftrace_hash_empty(hash)) + all = true; do_for_each_ftrace_rec(pg, rec) { - int in_other_hash = 0; + int in_notrace_hash = 0; int in_hash = 0; int match = 0; - if (rec->flags & FTRACE_FL_DISABLED) + if (skip_record(rec)) continue; if (all) { @@ -1682,26 +1794,17 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, * Only the filter_hash affects all records. * Update if the record is not in the notrace hash. */ - if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) + if (!notrace_hash || !ftrace_lookup_ip(notrace_hash, rec->ip)) match = 1; } else { in_hash = !!ftrace_lookup_ip(hash, rec->ip); - in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); + in_notrace_hash = !!ftrace_lookup_ip(notrace_hash, rec->ip); /* - * If filter_hash is set, we want to match all functions - * that are in the hash but not in the other hash. - * - * If filter_hash is not set, then we are decrementing. - * That means we match anything that is in the hash - * and also in the other_hash. That is, we need to turn - * off functions in the other hash because they are disabled - * by this hash. + * We want to match all functions that are in the hash but + * not in the other hash. */ - if (filter_hash && in_hash && !in_other_hash) - match = 1; - else if (!filter_hash && in_hash && - (in_other_hash || ftrace_hash_empty(other_hash))) + if (in_hash && !in_notrace_hash) match = 1; } if (!match) @@ -1712,6 +1815,9 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX)) return false; + if (ops->flags & FTRACE_OPS_FL_DIRECT) + rec->flags |= FTRACE_FL_DIRECT; + /* * If there's only a single callback registered to a * function, and the ops has a trampoline registered @@ -1740,6 +1846,15 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, rec->flags--; /* + * Only the internal direct_ops should have the + * DIRECT flag set. Thus, if it is removing a + * function, then that function should no longer + * be direct. + */ + if (ops->flags & FTRACE_OPS_FL_DIRECT) + rec->flags &= ~FTRACE_FL_DIRECT; + + /* * If the rec had REGS enabled and the ops that is * being removed had REGS set, then see if there is * still any ops for this record that wants regs. @@ -1753,25 +1868,39 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, } /* - * If the rec had TRAMP enabled, then it needs to - * be cleared. As TRAMP can only be enabled iff - * there is only a single ops attached to it. - * In otherwords, always disable it on decrementing. - * In the future, we may set it if rec count is - * decremented to one, and the ops that is left - * has a trampoline. + * The TRAMP needs to be set only if rec count + * is decremented to one, and the ops that is + * left has a trampoline. As TRAMP can only be + * enabled if there is only a single ops attached + * to it. */ - rec->flags &= ~FTRACE_FL_TRAMP; + if (ftrace_rec_count(rec) == 1 && + ftrace_find_tramp_ops_any_other(rec, ops)) + rec->flags |= FTRACE_FL_TRAMP; + else + rec->flags &= ~FTRACE_FL_TRAMP; /* * flags will be cleared in ftrace_check_record() * if rec count is zero. */ } + + /* + * If the rec has a single associated ops, and ops->func can be + * called directly, allow the call site to call via the ops. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS) && + ftrace_rec_count(rec) == 1 && + ftrace_ops_get_func(ops) == ops->func) + rec->flags |= FTRACE_FL_CALL_OPS; + else + rec->flags &= ~FTRACE_FL_CALL_OPS; + count++; /* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */ - update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE; + update |= ftrace_test_record(rec, true) != FTRACE_UPDATE_IGNORE; /* Shortcut, if we handled all records, we are done. */ if (!all && count == hash->count) @@ -1781,24 +1910,48 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, return update; } -static bool ftrace_hash_rec_disable(struct ftrace_ops *ops, - int filter_hash) +/* + * This is called when an ops is removed from tracing. It will decrement + * the counters of the dyn_ftrace records for all the functions that + * the @ops attached to. + */ +static bool ftrace_hash_rec_disable(struct ftrace_ops *ops) { - return __ftrace_hash_rec_update(ops, filter_hash, 0); + return __ftrace_hash_rec_update(ops, false); } -static bool ftrace_hash_rec_enable(struct ftrace_ops *ops, - int filter_hash) +/* + * This is called when an ops is added to tracing. It will increment + * the counters of the dyn_ftrace records for all the functions that + * the @ops attached to. + */ +static bool ftrace_hash_rec_enable(struct ftrace_ops *ops) { - return __ftrace_hash_rec_update(ops, filter_hash, 1); + return __ftrace_hash_rec_update(ops, true); } -static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops, - int filter_hash, int inc) +/* + * This function will update what functions @ops traces when its filter + * changes. + * + * The @inc states if the @ops callbacks are going to be added or removed. + * When one of the @ops hashes are updated to a "new_hash" the dyn_ftrace + * records are update via: + * + * ftrace_hash_rec_disable_modify(ops); + * ops->hash = new_hash + * ftrace_hash_rec_enable_modify(ops); + * + * Where the @ops is removed from all the records it is tracing using + * its old hash. The @ops hash is updated to the new hash, and then + * the @ops is added back to the records so that it is tracing all + * the new functions. + */ +static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops, bool inc) { struct ftrace_ops *op; - __ftrace_hash_rec_update(ops, filter_hash, inc); + __ftrace_hash_rec_update(ops, inc); if (ops->func_hash != &global_ops.local_hash) return; @@ -1812,20 +1965,18 @@ static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops, if (op == ops) continue; if (op->func_hash == &global_ops.local_hash) - __ftrace_hash_rec_update(op, filter_hash, inc); + __ftrace_hash_rec_update(op, inc); } while_for_each_ftrace_op(op); } -static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, - int filter_hash) +static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops) { - ftrace_hash_rec_update_modify(ops, filter_hash, 0); + ftrace_hash_rec_update_modify(ops, false); } -static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, - int filter_hash) +static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops) { - ftrace_hash_rec_update_modify(ops, filter_hash, 1); + ftrace_hash_rec_update_modify(ops, true); } /* @@ -1836,25 +1987,42 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, * - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected) * - If the hash is EMPTY_HASH, it hits nothing * - Anything else hits the recs which match the hash entries. + * + * DIRECT ops does not have IPMODIFY flag, but we still need to check it + * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call + * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with + * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate + * the return value to the caller and eventually to the owner of the DIRECT + * ops. */ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, struct ftrace_hash *old_hash, - struct ftrace_hash *new_hash) + struct ftrace_hash *new_hash, + bool update_target) { struct ftrace_page *pg; struct dyn_ftrace *rec, *end = NULL; int in_old, in_new; + bool is_ipmodify, is_direct; /* Only update if the ops has been registered */ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) return 0; - if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY; + is_direct = ops->flags & FTRACE_OPS_FL_DIRECT; + + /* neither IPMODIFY nor DIRECT, skip */ + if (!is_ipmodify && !is_direct) + return 0; + + if (WARN_ON_ONCE(is_ipmodify && is_direct)) return 0; /* - * Since the IPMODIFY is a very address sensitive action, we do not - * allow ftrace_ops to set all functions to new hash. + * Since the IPMODIFY and DIRECT are very address sensitive + * actions, we do not allow ftrace_ops to set all functions to new + * hash. */ if (!new_hash || !old_hash) return -EINVAL; @@ -1865,19 +2033,51 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, if (rec->flags & FTRACE_FL_DISABLED) continue; - /* We need to update only differences of filter_hash */ + /* + * Unless we are updating the target of a direct function, + * we only need to update differences of filter_hash + */ in_old = !!ftrace_lookup_ip(old_hash, rec->ip); in_new = !!ftrace_lookup_ip(new_hash, rec->ip); - if (in_old == in_new) + if (!update_target && (in_old == in_new)) continue; if (in_new) { - /* New entries must ensure no others are using it */ - if (rec->flags & FTRACE_FL_IPMODIFY) - goto rollback; - rec->flags |= FTRACE_FL_IPMODIFY; - } else /* Removed entry */ + if (rec->flags & FTRACE_FL_IPMODIFY) { + int ret; + + /* Cannot have two ipmodify on same rec */ + if (is_ipmodify) + goto rollback; + + /* + * If this is called by __modify_ftrace_direct() + * then it is only changing where the direct + * pointer is jumping to, and the record already + * points to a direct trampoline. If it isn't, + * then it is a bug to update ipmodify on a direct + * caller. + */ + FTRACE_WARN_ON(!update_target && + (rec->flags & FTRACE_FL_DIRECT)); + + /* + * Another ops with IPMODIFY is already + * attached. We are now attaching a direct + * ops. Run SHARE_IPMODIFY_SELF, to check + * whether sharing is supported. + */ + if (!ops->ops_func) + return -EBUSY; + ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF); + if (ret) + return ret; + } else if (is_ipmodify) { + rec->flags |= FTRACE_FL_IPMODIFY; + } + } else if (is_ipmodify) { rec->flags &= ~FTRACE_FL_IPMODIFY; + } } while_for_each_ftrace_rec(); return 0; @@ -1892,7 +2092,7 @@ rollback: continue; if (rec == end) - goto err_out; + return -EBUSY; in_old = !!ftrace_lookup_ip(old_hash, rec->ip); in_new = !!ftrace_lookup_ip(new_hash, rec->ip); @@ -1905,7 +2105,6 @@ rollback: rec->flags |= FTRACE_FL_IPMODIFY; } while_for_each_ftrace_rec(); -err_out: return -EBUSY; } @@ -1916,7 +2115,7 @@ static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops) if (ftrace_hash_empty(hash)) hash = NULL; - return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash); + return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash, false); } /* Disabling always succeeds */ @@ -1927,7 +2126,7 @@ static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops) if (ftrace_hash_empty(hash)) hash = NULL; - __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH); + __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH, false); } static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, @@ -1941,24 +2140,22 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, if (ftrace_hash_empty(new_hash)) new_hash = NULL; - return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash); + return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash, false); } static void print_ip_ins(const char *fmt, const unsigned char *p) { - int i; + char ins[MCOUNT_INSN_SIZE]; - printk(KERN_CONT "%s", fmt); + if (copy_from_kernel_nofault(ins, p, MCOUNT_INSN_SIZE)) { + printk(KERN_CONT "%s[FAULT] %px\n", fmt, p); + return; + } - for (i = 0; i < MCOUNT_INSN_SIZE; i++) - printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); + printk(KERN_CONT "%s", fmt); + pr_cont("%*phC", MCOUNT_INSN_SIZE, ins); } -static struct ftrace_ops * -ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); -static struct ftrace_ops * -ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); - enum ftrace_bug_type ftrace_bug_type; const void *ftrace_expected; @@ -1992,22 +2189,22 @@ static void print_bug_type(void) * modifying the code. @failed should be one of either: * EFAULT - if the problem happens on reading the @ip address * EINVAL - if what is read at @ip is not what was expected - * EPERM - if the problem happens on writting to the @ip address + * EPERM - if the problem happens on writing to the @ip address */ void ftrace_bug(int failed, struct dyn_ftrace *rec) { unsigned long ip = rec ? rec->ip : 0; + pr_info("------------[ ftrace bug ]------------\n"); + switch (failed) { case -EFAULT: - FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); break; case -EINVAL: - FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); print_ip_ins(" actual: ", (unsigned char *)ip); pr_cont("\n"); if (ftrace_expected) { @@ -2016,22 +2213,21 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) } break; case -EPERM: - FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on writing "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); break; default: - FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on unknown error "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); } print_bug_type(); if (rec) { struct ftrace_ops *ops = NULL; pr_info("ftrace record flags: %lx\n", rec->flags); - pr_cont(" (%ld)%s", ftrace_rec_count(rec), - rec->flags & FTRACE_FL_REGS ? " R" : " "); + pr_cont(" (%ld)%s%s", ftrace_rec_count(rec), + rec->flags & FTRACE_FL_REGS ? " R" : " ", + rec->flags & FTRACE_FL_CALL_OPS ? " O" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); if (ops) { @@ -2048,15 +2244,17 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) ip = ftrace_get_addr_curr(rec); pr_cont("\n expected tramp: %lx\n", ip); } + + FTRACE_WARN_ON_ONCE(1); } -static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) +static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) { unsigned long flag = 0UL; ftrace_bug_type = FTRACE_BUG_UNKNOWN; - if (rec->flags & FTRACE_FL_DISABLED) + if (skip_record(rec)) return FTRACE_UPDATE_IGNORE; /* @@ -2077,15 +2275,48 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) * If enabling and the REGS flag does not match the REGS_EN, or * the TRAMP flag doesn't match the TRAMP_EN, then do not ignore * this record. Set flags to fail the compare against ENABLED. + * Same for direct calls. */ if (flag) { - if (!(rec->flags & FTRACE_FL_REGS) != + if (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN)) flag |= FTRACE_FL_REGS; - if (!(rec->flags & FTRACE_FL_TRAMP) != + if (!(rec->flags & FTRACE_FL_TRAMP) != !(rec->flags & FTRACE_FL_TRAMP_EN)) flag |= FTRACE_FL_TRAMP; + + /* + * Direct calls are special, as count matters. + * We must test the record for direct, if the + * DIRECT and DIRECT_EN do not match, but only + * if the count is 1. That's because, if the + * count is something other than one, we do not + * want the direct enabled (it will be done via the + * direct helper). But if DIRECT_EN is set, and + * the count is not one, we need to clear it. + * + */ + if (ftrace_rec_count(rec) == 1) { + if (!(rec->flags & FTRACE_FL_DIRECT) != + !(rec->flags & FTRACE_FL_DIRECT_EN)) + flag |= FTRACE_FL_DIRECT; + } else if (rec->flags & FTRACE_FL_DIRECT_EN) { + flag |= FTRACE_FL_DIRECT; + } + + /* + * Ops calls are special, as count matters. + * As with direct calls, they must only be enabled when count + * is one, otherwise they'll be handled via the list ops. + */ + if (ftrace_rec_count(rec) == 1) { + if (!(rec->flags & FTRACE_FL_CALL_OPS) != + !(rec->flags & FTRACE_FL_CALL_OPS_EN)) + flag |= FTRACE_FL_CALL_OPS; + } else if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + flag |= FTRACE_FL_CALL_OPS; + } } /* If the state of this record hasn't changed, then do nothing */ @@ -2097,7 +2328,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) flag ^= rec->flags & FTRACE_FL_ENABLED; if (update) { - rec->flags |= FTRACE_FL_ENABLED; + rec->flags |= FTRACE_FL_ENABLED | FTRACE_FL_TOUCHED; if (flag & FTRACE_FL_REGS) { if (rec->flags & FTRACE_FL_REGS) rec->flags |= FTRACE_FL_REGS_EN; @@ -2110,6 +2341,45 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) else rec->flags &= ~FTRACE_FL_TRAMP_EN; } + + /* Keep track of anything that modifies the function */ + if (rec->flags & (FTRACE_FL_DIRECT | FTRACE_FL_IPMODIFY)) + rec->flags |= FTRACE_FL_MODIFIED; + + if (flag & FTRACE_FL_DIRECT) { + /* + * If there's only one user (direct_ops helper) + * then we can call the direct function + * directly (no ftrace trampoline). + */ + if (ftrace_rec_count(rec) == 1) { + if (rec->flags & FTRACE_FL_DIRECT) + rec->flags |= FTRACE_FL_DIRECT_EN; + else + rec->flags &= ~FTRACE_FL_DIRECT_EN; + } else { + /* + * Can only call directly if there's + * only one callback to the function. + */ + rec->flags &= ~FTRACE_FL_DIRECT_EN; + } + } + + if (flag & FTRACE_FL_CALL_OPS) { + if (ftrace_rec_count(rec) == 1) { + if (rec->flags & FTRACE_FL_CALL_OPS) + rec->flags |= FTRACE_FL_CALL_OPS_EN; + else + rec->flags &= ~FTRACE_FL_CALL_OPS_EN; + } else { + /* + * Can only call directly if there's + * only one set of associated ops. + */ + rec->flags &= ~FTRACE_FL_CALL_OPS_EN; + } + } } /* @@ -2132,14 +2402,15 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) if (update) { /* If there's no more users, clear all flags */ if (!ftrace_rec_count(rec)) - rec->flags = 0; + rec->flags &= FTRACE_NOCLEAR_FLAGS; else /* * Just disable the record, but keep the ops TRAMP * and REGS states. The _EN flags must be disabled though. */ rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN | - FTRACE_FL_REGS_EN); + FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN | + FTRACE_FL_CALL_OPS_EN); } ftrace_bug_type = FTRACE_BUG_NOP; @@ -2147,30 +2418,30 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) } /** - * ftrace_update_record, set a record that now is tracing or not + * ftrace_update_record - set a record that now is tracing or not * @rec: the record to update - * @enable: set to 1 if the record is tracing, zero to force disable + * @enable: set to true if the record is tracing, false to force disable * * The records that represent all functions that can be traced need * to be updated when tracing has been enabled. */ -int ftrace_update_record(struct dyn_ftrace *rec, int enable) +int ftrace_update_record(struct dyn_ftrace *rec, bool enable) { - return ftrace_check_record(rec, enable, 1); + return ftrace_check_record(rec, enable, true); } /** - * ftrace_test_record, check if the record has been enabled or not + * ftrace_test_record - check if the record has been enabled or not * @rec: the record to test - * @enable: set to 1 to check if enabled, 0 if it is disabled + * @enable: set to true to check if enabled, false if it is disabled * * The arch code may need to test if a record is already set to * tracing to determine how to modify the function code that it * represents. */ -int ftrace_test_record(struct dyn_ftrace *rec, int enable) +int ftrace_test_record(struct dyn_ftrace *rec, bool enable) { - return ftrace_check_record(rec, enable, 0); + return ftrace_check_record(rec, enable, false); } static struct ftrace_ops * @@ -2192,6 +2463,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec) } static struct ftrace_ops * +ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude) +{ + struct ftrace_ops *op; + unsigned long ip = rec->ip; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + + if (op == op_exclude || !op->trampoline) + continue; + + if (hash_contains_ip(ip, op->func_hash)) + return op; + } while_for_each_ftrace_op(op); + + return NULL; +} + +static struct ftrace_ops * ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *op) { @@ -2204,7 +2493,7 @@ ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, if (hash_contains_ip(ip, op->func_hash)) return op; - } + } return NULL; } @@ -2294,19 +2583,79 @@ ftrace_find_tramp_ops_new(struct dyn_ftrace *rec) return NULL; } +struct ftrace_ops * +ftrace_find_unique_ops(struct dyn_ftrace *rec) +{ + struct ftrace_ops *op, *found = NULL; + unsigned long ip = rec->ip; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + + if (hash_contains_ip(ip, op->func_hash)) { + if (found) + return NULL; + found = op; + } + + } while_for_each_ftrace_op(op); + + return found; +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* Protected by rcu_tasks for reading, and direct_mutex for writing */ +static struct ftrace_hash __rcu *direct_functions = EMPTY_HASH; +static DEFINE_MUTEX(direct_mutex); + +/* + * Search the direct_functions hash to see if the given instruction pointer + * has a direct caller attached to it. + */ +unsigned long ftrace_find_rec_direct(unsigned long ip) +{ + struct ftrace_func_entry *entry; + + entry = __ftrace_lookup_ip(direct_functions, ip); + if (!entry) + return 0; + + return entry->direct; +} + +static void call_direct_funcs(unsigned long ip, unsigned long pip, + struct ftrace_ops *ops, struct ftrace_regs *fregs) +{ + unsigned long addr = READ_ONCE(ops->direct_call); + + if (!addr) + return; + + arch_ftrace_set_direct_caller(fregs, addr); +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + /** * ftrace_get_addr_new - Get the call address to set to * @rec: The ftrace record descriptor * * If the record has the FTRACE_FL_REGS set, that means that it * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS - * is not not set, then it wants to convert to the normal callback. + * is not set, then it wants to convert to the normal callback. * - * Returns the address of the trampoline to set to + * Returns: the address of the trampoline to set to */ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) { struct ftrace_ops *ops; + unsigned long addr; + + if ((rec->flags & FTRACE_FL_DIRECT) && + (ftrace_rec_count(rec) == 1)) { + addr = ftrace_find_rec_direct(rec->ip); + if (addr) + return addr; + WARN_ON_ONCE(1); + } /* Trampolines take precedence over regs */ if (rec->flags & FTRACE_FL_TRAMP) { @@ -2334,11 +2683,20 @@ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) * a function that saves all the regs. Basically the '_EN' version * represents the current state of the function. * - * Returns the address of the trampoline that is currently being called + * Returns: the address of the trampoline that is currently being called */ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) { struct ftrace_ops *ops; + unsigned long addr; + + /* Direct calls take precedence over trampolines */ + if (rec->flags & FTRACE_FL_DIRECT_EN) { + addr = ftrace_find_rec_direct(rec->ip); + if (addr) + return addr; + WARN_ON_ONCE(1); + } /* Trampolines take precedence over regs */ if (rec->flags & FTRACE_FL_TRAMP_EN) { @@ -2359,7 +2717,7 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) } static int -__ftrace_replace_code(struct dyn_ftrace *rec, int enable) +__ftrace_replace_code(struct dyn_ftrace *rec, bool enable) { unsigned long ftrace_old_addr; unsigned long ftrace_addr; @@ -2391,14 +2749,14 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } - return -1; /* unknow ftrace bug */ + return -1; /* unknown ftrace bug */ } void __weak ftrace_replace_code(int mod_flags) { struct dyn_ftrace *rec; struct ftrace_page *pg; - int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL; + bool enable = mod_flags & FTRACE_MODIFY_ENABLE_FL; int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL; int failed; @@ -2407,7 +2765,7 @@ void __weak ftrace_replace_code(int mod_flags) do_for_each_ftrace_rec(pg, rec) { - if (rec->flags & FTRACE_FL_DISABLED) + if (skip_record(rec)) continue; failed = __ftrace_replace_code(rec, enable); @@ -2427,9 +2785,9 @@ struct ftrace_rec_iter { }; /** - * ftrace_rec_iter_start, start up iterating over traced functions + * ftrace_rec_iter_start - start up iterating over traced functions * - * Returns an iterator handle that is used to iterate over all + * Returns: an iterator handle that is used to iterate over all * the records that represent address locations where functions * are traced. * @@ -2458,10 +2816,10 @@ struct ftrace_rec_iter *ftrace_rec_iter_start(void) } /** - * ftrace_rec_iter_next, get the next record to process. + * ftrace_rec_iter_next - get the next record to process. * @iter: The handle to the iterator. * - * Returns the next iterator after the given iterator @iter. + * Returns: the next iterator after the given iterator @iter. */ struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter) { @@ -2483,10 +2841,10 @@ struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter) } /** - * ftrace_rec_iter_record, get the record at the iterator location + * ftrace_rec_iter_record - get the record at the iterator location * @iter: The current iterator location * - * Returns the record that the current @iter is at. + * Returns: the record that the current @iter is at. */ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) { @@ -2494,14 +2852,14 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) } static int -ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) +ftrace_nop_initialize(struct module *mod, struct dyn_ftrace *rec) { int ret; if (unlikely(ftrace_disabled)) return 0; - ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); + ret = ftrace_init_nop(mod, rec); if (ret) { ftrace_bug_type = FTRACE_BUG_INIT; ftrace_bug(ret, rec); @@ -2514,18 +2872,29 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) * archs can override this function if they must do something * before the modifying code is performed. */ -int __weak ftrace_arch_code_modify_prepare(void) +void __weak ftrace_arch_code_modify_prepare(void) { - return 0; } /* * archs can override this function if they must do something * after the modifying code is performed. */ -int __weak ftrace_arch_code_modify_post_process(void) +void __weak ftrace_arch_code_modify_post_process(void) { - return 0; +} + +static int update_ftrace_func(ftrace_func_t func) +{ + static ftrace_func_t save_func; + + /* Avoid updating if it hasn't changed */ + if (func == save_func) + return 0; + + save_func = func; + + return ftrace_update_ftrace_func(func); } void ftrace_modify_all_code(int command) @@ -2548,7 +2917,7 @@ void ftrace_modify_all_code(int command) * traced. */ if (update) { - err = ftrace_update_ftrace_func(ftrace_ops_list_func); + err = update_ftrace_func(ftrace_ops_list_func); if (FTRACE_WARN_ON(err)) return; } @@ -2564,7 +2933,7 @@ void ftrace_modify_all_code(int command) /* If irqs are disabled, we are in stop machine */ if (!irqs_disabled()) smp_call_function(ftrace_sync_ipi, NULL, 1); - err = ftrace_update_ftrace_func(ftrace_trace_function); + err = update_ftrace_func(ftrace_trace_function); if (FTRACE_WARN_ON(err)) return; } @@ -2586,7 +2955,7 @@ static int __ftrace_modify_code(void *data) } /** - * ftrace_run_stop_machine, go back to the stop machine method + * ftrace_run_stop_machine - go back to the stop machine method * @command: The command to tell ftrace what to do * * If an arch needs to fall back to the stop machine method, the @@ -2598,7 +2967,7 @@ void ftrace_run_stop_machine(int command) } /** - * arch_ftrace_update_code, modify the code to trace or not trace + * arch_ftrace_update_code - modify the code to trace or not trace * @command: The command that needs to be done * * Archs can override this function if it does not need to @@ -2611,12 +2980,7 @@ void __weak arch_ftrace_update_code(int command) static void ftrace_run_update_code(int command) { - int ret; - - ret = ftrace_arch_code_modify_prepare(); - FTRACE_WARN_ON(ret); - if (ret) - return; + ftrace_arch_code_modify_prepare(); /* * By default we use stop_machine() to modify the code. @@ -2626,8 +2990,7 @@ static void ftrace_run_update_code(int command) */ arch_ftrace_update_code(command); - ret = ftrace_arch_code_modify_post_process(); - FTRACE_WARN_ON(ret); + ftrace_arch_code_modify_post_process(); } static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, @@ -2649,6 +3012,51 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) { } +/* List of trace_ops that have allocated trampolines */ +static LIST_HEAD(ftrace_ops_trampoline_list); + +static void ftrace_add_trampoline_to_kallsyms(struct ftrace_ops *ops) +{ + lockdep_assert_held(&ftrace_lock); + list_add_rcu(&ops->list, &ftrace_ops_trampoline_list); +} + +static void ftrace_remove_trampoline_from_kallsyms(struct ftrace_ops *ops) +{ + lockdep_assert_held(&ftrace_lock); + list_del_rcu(&ops->list); + synchronize_rcu(); +} + +/* + * "__builtin__ftrace" is used as a module name in /proc/kallsyms for symbols + * for pages allocated for ftrace purposes, even though "__builtin__ftrace" is + * not a module. + */ +#define FTRACE_TRAMPOLINE_MOD "__builtin__ftrace" +#define FTRACE_TRAMPOLINE_SYM "ftrace_trampoline" + +static void ftrace_trampoline_free(struct ftrace_ops *ops) +{ + if (ops && (ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP) && + ops->trampoline) { + /* + * Record the text poke event before the ksymbol unregister + * event. + */ + perf_event_text_poke((void *)ops->trampoline, + (void *)ops->trampoline, + ops->trampoline_size, NULL, 0); + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, + ops->trampoline, ops->trampoline_size, + true, FTRACE_TRAMPOLINE_SYM); + /* Remove from kallsyms after the perf events */ + ftrace_remove_trampoline_from_kallsyms(ops); + } + + arch_ftrace_trampoline_free(ops); +} + static void ftrace_startup_enable(int command) { if (saved_ftrace_func != ftrace_trace_function) { @@ -2698,14 +3106,26 @@ int ftrace_startup(struct ftrace_ops *ops, int command) __unregister_ftrace_function(ops); ftrace_start_up--; ops->flags &= ~FTRACE_OPS_FL_ENABLED; + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) + ftrace_trampoline_free(ops); return ret; } - if (ftrace_hash_rec_enable(ops, 1)) + if (ftrace_hash_rec_enable(ops)) command |= FTRACE_UPDATE_CALLS; ftrace_startup_enable(command); + /* + * If ftrace is in an undefined state, we just remove ops from list + * to prevent the NULL pointer, instead of totally rolling it back and + * free trampoline, because those actions could cause further damage. + */ + if (unlikely(ftrace_disabled)) { + __unregister_ftrace_function(ops); + return -ENODEV; + } + ops->flags &= ~FTRACE_OPS_FL_ADDING; return 0; @@ -2733,7 +3153,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) /* Disabling ipmodify never fails */ ftrace_hash_ipmodify_disable(ops); - if (ftrace_hash_rec_disable(ops, 1)) + if (ftrace_hash_rec_disable(ops)) command |= FTRACE_UPDATE_CALLS; ops->flags &= ~FTRACE_OPS_FL_ENABLED; @@ -2743,18 +3163,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) command |= FTRACE_UPDATE_TRACE_FUNC; } - if (!command || !ftrace_enabled) { - /* - * If these are dynamic or per_cpu ops, they still - * need their data freed. Since, function tracing is - * not currently active, we can just free them - * without synchronizing all CPUs. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - goto free_ops; - - return 0; - } + if (!command || !ftrace_enabled) + goto out; /* * If the ops uses a trampoline, then it needs to be @@ -2779,7 +3189,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) struct dyn_ftrace *rec; do_for_each_ftrace_rec(pg, rec) { - if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_FL_DISABLED)) + if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_NOCLEAR_FLAGS)) pr_warn(" %pS flags:%lx\n", (void *)rec->ip, rec->flags); } while_for_each_ftrace_rec(); @@ -2791,11 +3201,10 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) removed_ops = NULL; ops->flags &= ~FTRACE_OPS_FL_REMOVING; +out: /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. - * The same goes for freeing the per_cpu data of the per_cpu - * ops. */ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) { /* @@ -2806,107 +3215,572 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) * infrastructure to do the synchronization, thus we must do it * ourselves. */ - schedule_on_each_cpu(ftrace_sync); + synchronize_rcu_tasks_rude(); /* - * When the kernel is preeptive, tasks can be preempted + * When the kernel is preemptive, tasks can be preempted * while on a ftrace trampoline. Just scheduling a task on * a CPU is not good enough to flush them. Calling - * synchornize_rcu_tasks() will wait for those tasks to + * synchronize_rcu_tasks() will wait for those tasks to * execute and either schedule voluntarily or enter user space. */ - if (IS_ENABLED(CONFIG_PREEMPT)) - synchronize_rcu_tasks(); + synchronize_rcu_tasks(); - free_ops: - arch_ftrace_trampoline_free(ops); + ftrace_trampoline_free(ops); } return 0; } -static void ftrace_startup_sysctl(void) +/* Simply make a copy of @src and return it */ +static struct ftrace_hash *copy_hash(struct ftrace_hash *src) { - int command; + if (ftrace_hash_empty(src)) + return EMPTY_HASH; - if (unlikely(ftrace_disabled)) - return; + return alloc_and_copy_ftrace_hash(src->size_bits, src); +} - /* Force update next time */ - saved_ftrace_func = NULL; - /* ftrace_start_up is true if we want ftrace running */ - if (ftrace_start_up) { - command = FTRACE_UPDATE_CALLS; - if (ftrace_graph_active) - command |= FTRACE_START_FUNC_RET; - ftrace_startup_enable(command); +/* + * Append @new_hash entries to @hash: + * + * If @hash is the EMPTY_HASH then it traces all functions and nothing + * needs to be done. + * + * If @new_hash is the EMPTY_HASH, then make *hash the EMPTY_HASH so + * that it traces everything. + * + * Otherwise, go through all of @new_hash and add anything that @hash + * doesn't already have, to @hash. + * + * The filter_hash updates uses just the append_hash() function + * and the notrace_hash does not. + */ +static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash, + int size_bits) +{ + struct ftrace_func_entry *entry; + int size; + int i; + + if (*hash) { + /* An empty hash does everything */ + if (ftrace_hash_empty(*hash)) + return 0; + } else { + *hash = alloc_ftrace_hash(size_bits); + if (!*hash) + return -ENOMEM; + } + + /* If new_hash has everything make hash have everything */ + if (ftrace_hash_empty(new_hash)) { + free_ftrace_hash(*hash); + *hash = EMPTY_HASH; + return 0; } + + size = 1 << new_hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &new_hash->buckets[i], hlist) { + /* Only add if not already in hash */ + if (!__ftrace_lookup_ip(*hash, entry->ip) && + add_hash_entry(*hash, entry->ip) == NULL) + return -ENOMEM; + } + } + return 0; } -static void ftrace_shutdown_sysctl(void) +/* + * Remove functions from @hash that are in @notrace_hash + */ +static void remove_hash(struct ftrace_hash *hash, struct ftrace_hash *notrace_hash) { - int command; + struct ftrace_func_entry *entry; + struct hlist_node *tmp; + int size; + int i; - if (unlikely(ftrace_disabled)) + /* If the notrace hash is empty, there's nothing to do */ + if (ftrace_hash_empty(notrace_hash)) return; - /* ftrace_start_up is true if ftrace is running */ - if (ftrace_start_up) { - command = FTRACE_DISABLE_CALLS; - if (ftrace_graph_active) - command |= FTRACE_STOP_FUNC_RET; - ftrace_run_update_code(command); + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) { + if (!__ftrace_lookup_ip(notrace_hash, entry->ip)) + continue; + remove_hash_entry(hash, entry); + kfree(entry); + } } } -static u64 ftrace_update_time; -unsigned long ftrace_update_tot_cnt; +/* + * Add to @hash only those that are in both @new_hash1 and @new_hash2 + * + * The notrace_hash updates uses just the intersect_hash() function + * and the filter_hash does not. + */ +static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash1, + struct ftrace_hash *new_hash2) +{ + struct ftrace_func_entry *entry; + int size; + int i; -static inline int ops_traces_mod(struct ftrace_ops *ops) + /* + * If new_hash1 or new_hash2 is the EMPTY_HASH then make the hash + * empty as well as empty for notrace means none are notraced. + */ + if (ftrace_hash_empty(new_hash1) || ftrace_hash_empty(new_hash2)) { + free_ftrace_hash(*hash); + *hash = EMPTY_HASH; + return 0; + } + + size = 1 << new_hash1->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &new_hash1->buckets[i], hlist) { + /* Only add if in both @new_hash1 and @new_hash2 */ + if (__ftrace_lookup_ip(new_hash2, entry->ip) && + add_hash_entry(*hash, entry->ip) == NULL) + return -ENOMEM; + } + } + /* If nothing intersects, make it the empty set */ + if (ftrace_hash_empty(*hash)) { + free_ftrace_hash(*hash); + *hash = EMPTY_HASH; + } + return 0; +} + +static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B) { + struct ftrace_func_entry *entry; + int size; + int i; + + if (ftrace_hash_empty(A)) + return ftrace_hash_empty(B); + + if (ftrace_hash_empty(B)) + return ftrace_hash_empty(A); + + if (A->count != B->count) + return false; + + size = 1 << A->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &A->buckets[i], hlist) { + if (!__ftrace_lookup_ip(B, entry->ip)) + return false; + } + } + + return true; +} + +static void ftrace_ops_update_code(struct ftrace_ops *ops, + struct ftrace_ops_hash *old_hash); + +static int __ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, + struct ftrace_hash **orig_hash, + struct ftrace_hash *hash, + int enable) +{ + struct ftrace_ops_hash old_hash_ops; + struct ftrace_hash *old_hash; + int ret; + + old_hash = *orig_hash; + old_hash_ops.filter_hash = ops->func_hash->filter_hash; + old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; + ret = ftrace_hash_move(ops, enable, orig_hash, hash); + if (!ret) { + ftrace_ops_update_code(ops, &old_hash_ops); + free_ftrace_hash_rcu(old_hash); + } + return ret; +} + +static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_hash, + struct ftrace_hash *notrace_hash) +{ + int ret; + + if (!ops_equal(filter_hash, ops->func_hash->filter_hash)) { + ret = __ftrace_hash_move_and_update_ops(ops, &ops->func_hash->filter_hash, + filter_hash, 1); + if (ret < 0) + return ret; + } + + if (!ops_equal(notrace_hash, ops->func_hash->notrace_hash)) { + ret = __ftrace_hash_move_and_update_ops(ops, &ops->func_hash->notrace_hash, + notrace_hash, 0); + if (ret < 0) + return ret; + } + + return 0; +} + +static int add_first_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops_hash *func_hash) +{ + /* If the filter hash is not empty, simply remove the nohash from it */ + if (!ftrace_hash_empty(func_hash->filter_hash)) { + *filter_hash = copy_hash(func_hash->filter_hash); + if (!*filter_hash) + return -ENOMEM; + remove_hash(*filter_hash, func_hash->notrace_hash); + *notrace_hash = EMPTY_HASH; + + } else { + *notrace_hash = copy_hash(func_hash->notrace_hash); + if (!*notrace_hash) + return -ENOMEM; + *filter_hash = EMPTY_HASH; + } + return 0; +} + +static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops_hash *ops_hash, struct ftrace_ops_hash *subops_hash) +{ + int size_bits; + int ret; + + /* If the subops trace all functions so must the main ops */ + if (ftrace_hash_empty(ops_hash->filter_hash) || + ftrace_hash_empty(subops_hash->filter_hash)) { + *filter_hash = EMPTY_HASH; + } else { + /* + * The main ops filter hash is not empty, so its + * notrace_hash had better be, as the notrace hash + * is only used for empty main filter hashes. + */ + WARN_ON_ONCE(!ftrace_hash_empty(ops_hash->notrace_hash)); + + size_bits = max(ops_hash->filter_hash->size_bits, + subops_hash->filter_hash->size_bits); + + /* Copy the subops hash */ + *filter_hash = alloc_and_copy_ftrace_hash(size_bits, subops_hash->filter_hash); + if (!*filter_hash) + return -ENOMEM; + /* Remove any notrace functions from the copy */ + remove_hash(*filter_hash, subops_hash->notrace_hash); + + ret = append_hash(filter_hash, ops_hash->filter_hash, + size_bits); + if (ret < 0) { + free_ftrace_hash(*filter_hash); + *filter_hash = EMPTY_HASH; + return ret; + } + } + /* - * Filter_hash being empty will default to trace module. - * But notrace hash requires a test of individual module functions. + * Only process notrace hashes if the main filter hash is empty + * (tracing all functions), otherwise the filter hash will just + * remove the notrace hash functions, and the notrace hash is + * not needed. */ - return ftrace_hash_empty(ops->func_hash->filter_hash) && - ftrace_hash_empty(ops->func_hash->notrace_hash); + if (ftrace_hash_empty(*filter_hash)) { + /* + * Intersect the notrace functions. That is, if two + * subops are not tracing a set of functions, the + * main ops will only not trace the functions that are + * in both subops, but has to trace the functions that + * are only notrace in one of the subops, for the other + * subops to be able to trace them. + */ + size_bits = max(ops_hash->notrace_hash->size_bits, + subops_hash->notrace_hash->size_bits); + *notrace_hash = alloc_ftrace_hash(size_bits); + if (!*notrace_hash) + return -ENOMEM; + + ret = intersect_hash(notrace_hash, ops_hash->notrace_hash, + subops_hash->notrace_hash); + if (ret < 0) { + free_ftrace_hash(*notrace_hash); + *notrace_hash = EMPTY_HASH; + return ret; + } + } + return 0; } -/* - * Check if the current ops references the record. +/** + * ftrace_startup_subops - enable tracing for subops of an ops + * @ops: Manager ops (used to pick all the functions of its subops) + * @subops: A new ops to add to @ops + * @command: Extra commands to use to enable tracing * - * If the ops traces all functions, then it was already accounted for. - * If the ops does not trace the current record function, skip it. - * If the ops ignores the function via notrace filter, skip it. + * The @ops is a manager @ops that has the filter that includes all the functions + * that its list of subops are tracing. Adding a new @subops will add the + * functions of @subops to @ops. */ -static inline bool -ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) +int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) { - /* If ops isn't enabled, ignore it */ - if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) - return false; + struct ftrace_hash *filter_hash = EMPTY_HASH; + struct ftrace_hash *notrace_hash = EMPTY_HASH; + struct ftrace_hash *save_filter_hash; + struct ftrace_hash *save_notrace_hash; + int ret; - /* If ops traces all then it includes this function */ - if (ops_traces_mod(ops)) - return true; + if (unlikely(ftrace_disabled)) + return -ENODEV; - /* The function must be in the filter */ - if (!ftrace_hash_empty(ops->func_hash->filter_hash) && - !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) - return false; + ftrace_ops_init(ops); + ftrace_ops_init(subops); - /* If in notrace hash, we ignore it too */ - if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) - return false; + if (WARN_ON_ONCE(subops->flags & FTRACE_OPS_FL_ENABLED)) + return -EBUSY; - return true; + /* Make everything canonical (Just in case!) */ + if (!ops->func_hash->filter_hash) + ops->func_hash->filter_hash = EMPTY_HASH; + if (!ops->func_hash->notrace_hash) + ops->func_hash->notrace_hash = EMPTY_HASH; + if (!subops->func_hash->filter_hash) + subops->func_hash->filter_hash = EMPTY_HASH; + if (!subops->func_hash->notrace_hash) + subops->func_hash->notrace_hash = EMPTY_HASH; + + /* For the first subops to ops just enable it normally */ + if (list_empty(&ops->subop_list)) { + + /* The ops was empty, should have empty hashes */ + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->filter_hash)); + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->notrace_hash)); + + ret = add_first_hash(&filter_hash, ¬race_hash, subops->func_hash); + if (ret < 0) + return ret; + + save_filter_hash = ops->func_hash->filter_hash; + save_notrace_hash = ops->func_hash->notrace_hash; + + ops->func_hash->filter_hash = filter_hash; + ops->func_hash->notrace_hash = notrace_hash; + list_add(&subops->list, &ops->subop_list); + ret = ftrace_startup(ops, command); + if (ret < 0) { + list_del(&subops->list); + ops->func_hash->filter_hash = save_filter_hash; + ops->func_hash->notrace_hash = save_notrace_hash; + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + } else { + free_ftrace_hash(save_filter_hash); + free_ftrace_hash(save_notrace_hash); + subops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP; + subops->managed = ops; + } + return ret; + } + + /* + * Here there's already something attached. Here are the rules: + * If the new subops and main ops filter hashes are not empty: + * o Make a copy of the subops filter hash + * o Remove all functions in the nohash from it. + * o Add in the main hash filter functions + * o Remove any of these functions from the main notrace hash + */ + + ret = add_next_hash(&filter_hash, ¬race_hash, ops->func_hash, subops->func_hash); + if (ret < 0) + return ret; + + list_add(&subops->list, &ops->subop_list); + + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + if (ret < 0) { + list_del(&subops->list); + } else { + subops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP; + subops->managed = ops; + } + return ret; +} + +static int rebuild_hashes(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops *ops) +{ + struct ftrace_ops_hash temp_hash; + struct ftrace_ops *subops; + bool first = true; + int ret; + + temp_hash.filter_hash = EMPTY_HASH; + temp_hash.notrace_hash = EMPTY_HASH; + + list_for_each_entry(subops, &ops->subop_list, list) { + *filter_hash = EMPTY_HASH; + *notrace_hash = EMPTY_HASH; + + if (first) { + ret = add_first_hash(filter_hash, notrace_hash, subops->func_hash); + if (ret < 0) + return ret; + first = false; + } else { + ret = add_next_hash(filter_hash, notrace_hash, + &temp_hash, subops->func_hash); + if (ret < 0) { + free_ftrace_hash(temp_hash.filter_hash); + free_ftrace_hash(temp_hash.notrace_hash); + return ret; + } + } + + free_ftrace_hash(temp_hash.filter_hash); + free_ftrace_hash(temp_hash.notrace_hash); + + temp_hash.filter_hash = *filter_hash; + temp_hash.notrace_hash = *notrace_hash; + } + return 0; +} + +/** + * ftrace_shutdown_subops - Remove a subops from a manager ops + * @ops: A manager ops to remove @subops from + * @subops: The subops to remove from @ops + * @command: Any extra command flags to add to modifying the text + * + * Removes the functions being traced by the @subops from @ops. Note, it + * will not affect functions that are being traced by other subops that + * still exist in @ops. + * + * If the last subops is removed from @ops, then @ops is shutdown normally. + */ +int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) +{ + struct ftrace_hash *filter_hash = EMPTY_HASH; + struct ftrace_hash *notrace_hash = EMPTY_HASH; + int ret; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + if (WARN_ON_ONCE(!(subops->flags & FTRACE_OPS_FL_ENABLED))) + return -EINVAL; + + list_del(&subops->list); + + if (list_empty(&ops->subop_list)) { + /* Last one, just disable the current ops */ + + ret = ftrace_shutdown(ops, command); + if (ret < 0) { + list_add(&subops->list, &ops->subop_list); + return ret; + } + + subops->flags &= ~FTRACE_OPS_FL_ENABLED; + + free_ftrace_hash(ops->func_hash->filter_hash); + free_ftrace_hash(ops->func_hash->notrace_hash); + ops->func_hash->filter_hash = EMPTY_HASH; + ops->func_hash->notrace_hash = EMPTY_HASH; + subops->flags &= ~(FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP); + subops->managed = NULL; + + return 0; + } + + /* Rebuild the hashes without subops */ + ret = rebuild_hashes(&filter_hash, ¬race_hash, ops); + if (ret < 0) + return ret; + + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); + if (ret < 0) { + list_add(&subops->list, &ops->subop_list); + } else { + subops->flags &= ~(FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP); + subops->managed = NULL; + } + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + return ret; +} + +static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, + struct ftrace_hash **orig_subhash, + struct ftrace_hash *hash) +{ + struct ftrace_ops *ops = subops->managed; + struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash; + struct ftrace_hash *save_hash; + struct ftrace_hash *new_hash; + int ret; + + /* Manager ops can not be subops (yet) */ + if (WARN_ON_ONCE(!ops || ops->flags & FTRACE_OPS_FL_SUBOP)) + return -EINVAL; + + /* Move the new hash over to the subops hash */ + save_hash = *orig_subhash; + *orig_subhash = __ftrace_hash_move(hash); + if (!*orig_subhash) { + *orig_subhash = save_hash; + return -ENOMEM; + } + + ret = rebuild_hashes(&filter_hash, ¬race_hash, ops); + if (!ret) { + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + } + + if (ret) { + /* Put back the original hash */ + new_hash = *orig_subhash; + *orig_subhash = save_hash; + free_ftrace_hash_rcu(new_hash); + } else { + free_ftrace_hash_rcu(save_hash); + } + return ret; +} + + +u64 ftrace_update_time; +u64 ftrace_total_mod_time; +unsigned long ftrace_update_tot_cnt; +unsigned long ftrace_number_of_pages; +unsigned long ftrace_number_of_groups; + +static inline int ops_traces_mod(struct ftrace_ops *ops) +{ + /* + * Filter_hash being empty will default to trace module. + * But notrace hash requires a test of individual module functions. + */ + return ftrace_hash_empty(ops->func_hash->filter_hash) && + ftrace_hash_empty(ops->func_hash->notrace_hash); } static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { + bool init_nop = ftrace_need_init_nop(); struct ftrace_page *pg; struct dyn_ftrace *p; - u64 start, stop; + u64 start, stop, update_time; unsigned long update_cnt = 0; unsigned long rec_flags = 0; int i; @@ -2938,21 +3812,23 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) p = &pg->records[i]; p->flags = rec_flags; -#ifndef CC_USING_NOP_MCOUNT /* * Do the initial record conversion from mcount jump * to the NOP instructions. */ - if (!ftrace_code_disable(mod, p)) + if (init_nop && !ftrace_nop_initialize(mod, p)) break; -#endif update_cnt++; } } stop = ftrace_now(raw_smp_processor_id()); - ftrace_update_time = stop - start; + update_time = stop - start; + if (mod) + ftrace_total_mod_time += update_time; + else + ftrace_update_time = update_time; ftrace_update_tot_cnt += update_cnt; return 0; @@ -2961,19 +3837,15 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) static int ftrace_allocate_records(struct ftrace_page *pg, int count) { int order; + int pages; int cnt; if (WARN_ON(!count)) return -EINVAL; - order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); - - /* - * We want to fill as much as possible. No more than a page - * may be empty. - */ - while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE) - order--; + /* We want to fill as much as possible, with no empty pages */ + pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE); + order = fls(pages) - 1; again: pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); @@ -2982,12 +3854,15 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) /* if we can't allocate this size, try something smaller */ if (!order) return -ENOMEM; - order >>= 1; + order--; goto again; } + ftrace_number_of_pages += 1 << order; + ftrace_number_of_groups++; + cnt = (PAGE_SIZE << order) / ENTRY_SIZE; - pg->size = cnt; + pg->order = order; if (cnt > count) cnt = count; @@ -2995,16 +3870,31 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) return cnt; } +static void ftrace_free_pages(struct ftrace_page *pages) +{ + struct ftrace_page *pg = pages; + + while (pg) { + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } + pages = pg->next; + kfree(pg); + pg = pages; + ftrace_number_of_groups--; + } +} + static struct ftrace_page * ftrace_allocate_pages(unsigned long num_to_init) { struct ftrace_page *start_pg; struct ftrace_page *pg; - int order; int cnt; if (!num_to_init) - return 0; + return NULL; start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL); if (!pg) @@ -3034,14 +3924,7 @@ ftrace_allocate_pages(unsigned long num_to_init) return start_pg; free_pages: - pg = start_pg; - while (pg) { - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - free_pages((unsigned long)pg->records, order); - start_pg = pg->next; - kfree(pg); - pg = start_pg; - } + ftrace_free_pages(start_pg); pr_info("ftrace: FAILED to allocate memory for functions\n"); return NULL; } @@ -3097,6 +3980,14 @@ t_probe_next(struct seq_file *m, loff_t *pos) hnd = &iter->probe_entry->hlist; hash = iter->probe->ops.func_hash->filter_hash; + + /* + * A probe being registered may temporarily have an empty hash + * and it's at the end of the func_probes list. + */ + if (!hash || hash == EMPTY_HASH) + return NULL; + size = 1 << hash->size_bits; retry: @@ -3286,7 +4177,10 @@ t_func_next(struct seq_file *m, loff_t *pos) !ftrace_lookup_ip(iter->hash, rec->ip)) || ((iter->flags & FTRACE_ITER_ENABLED) && - !(rec->flags & FTRACE_FL_ENABLED))) { + !(rec->flags & FTRACE_FL_ENABLED)) || + + ((iter->flags & FTRACE_ITER_TOUCHED) && + !(rec->flags & FTRACE_FL_TOUCHED))) { rec = NULL; goto retry; @@ -3415,6 +4309,141 @@ static void add_trampoline_func(struct seq_file *m, struct ftrace_ops *ops, seq_printf(m, " ->%pS", ptr); } +#ifdef FTRACE_MCOUNT_MAX_OFFSET +/* + * Weak functions can still have an mcount/fentry that is saved in + * the __mcount_loc section. These can be detected by having a + * symbol offset of greater than FTRACE_MCOUNT_MAX_OFFSET, as the + * symbol found by kallsyms is not the function that the mcount/fentry + * is part of. The offset is much greater in these cases. + * + * Test the record to make sure that the ip points to a valid kallsyms + * and if not, mark it disabled. + */ +static int test_for_valid_rec(struct dyn_ftrace *rec) +{ + char str[KSYM_SYMBOL_LEN]; + unsigned long offset; + const char *ret; + + ret = kallsyms_lookup(rec->ip, NULL, &offset, NULL, str); + + /* Weak functions can cause invalid addresses */ + if (!ret || offset > FTRACE_MCOUNT_MAX_OFFSET) { + rec->flags |= FTRACE_FL_DISABLED; + return 0; + } + return 1; +} + +static struct workqueue_struct *ftrace_check_wq __initdata; +static struct work_struct ftrace_check_work __initdata; + +/* + * Scan all the mcount/fentry entries to make sure they are valid. + */ +static __init void ftrace_check_work_func(struct work_struct *work) +{ + struct ftrace_page *pg; + struct dyn_ftrace *rec; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_rec(pg, rec) { + test_for_valid_rec(rec); + } while_for_each_ftrace_rec(); + mutex_unlock(&ftrace_lock); +} + +static int __init ftrace_check_for_weak_functions(void) +{ + INIT_WORK(&ftrace_check_work, ftrace_check_work_func); + + ftrace_check_wq = alloc_workqueue("ftrace_check_wq", WQ_UNBOUND, 0); + + queue_work(ftrace_check_wq, &ftrace_check_work); + return 0; +} + +static int __init ftrace_check_sync(void) +{ + /* Make sure the ftrace_check updates are finished */ + if (ftrace_check_wq) + destroy_workqueue(ftrace_check_wq); + return 0; +} + +late_initcall_sync(ftrace_check_sync); +subsys_initcall(ftrace_check_for_weak_functions); + +static int print_rec(struct seq_file *m, unsigned long ip) +{ + unsigned long offset; + char str[KSYM_SYMBOL_LEN]; + char *modname; + const char *ret; + + ret = kallsyms_lookup(ip, NULL, &offset, &modname, str); + /* Weak functions can cause invalid addresses */ + if (!ret || offset > FTRACE_MCOUNT_MAX_OFFSET) { + snprintf(str, KSYM_SYMBOL_LEN, "%s_%ld", + FTRACE_INVALID_FUNCTION, offset); + ret = NULL; + } + + seq_puts(m, str); + if (modname) + seq_printf(m, " [%s]", modname); + return ret == NULL ? -1 : 0; +} +#else +static inline int test_for_valid_rec(struct dyn_ftrace *rec) +{ + return 1; +} + +static inline int print_rec(struct seq_file *m, unsigned long ip) +{ + seq_printf(m, "%ps", (void *)ip); + return 0; +} +#endif + +static void print_subops(struct seq_file *m, struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + struct ftrace_ops *subops; + bool first = true; + + list_for_each_entry(subops, &ops->subop_list, list) { + if (!((subops->flags & FTRACE_OPS_FL_ENABLED) && + hash_contains_ip(rec->ip, subops->func_hash))) + continue; + if (first) { + seq_printf(m, "\tsubops:"); + first = false; + } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (subops->flags & FTRACE_OPS_FL_GRAPH) { + struct fgraph_ops *gops; + + gops = container_of(subops, struct fgraph_ops, ops); + seq_printf(m, " {ent:%pS ret:%pS}", + (void *)gops->entryfunc, + (void *)gops->retfunc); + continue; + } +#endif + if (subops->trampoline) { + seq_printf(m, " {%pS (%pS)}", + (void *)subops->trampoline, + (void *)subops->func); + add_trampoline_func(m, subops, rec); + } else { + seq_printf(m, " {%pS}", + (void *)subops->func); + } + } +} + static int t_show(struct seq_file *m, void *v) { struct ftrace_iterator *iter = m->private; @@ -3439,14 +4468,26 @@ static int t_show(struct seq_file *m, void *v) if (!rec) return 0; - seq_printf(m, "%ps", (void *)rec->ip); - if (iter->flags & FTRACE_ITER_ENABLED) { + if (iter->flags & FTRACE_ITER_ADDRS) + seq_printf(m, "%lx ", rec->ip); + + if (print_rec(m, rec->ip)) { + /* This should only happen when a rec is disabled */ + WARN_ON_ONCE(!(rec->flags & FTRACE_FL_DISABLED)); + seq_putc(m, '\n'); + return 0; + } + + if (iter->flags & (FTRACE_ITER_ENABLED | FTRACE_ITER_TOUCHED)) { struct ftrace_ops *ops; - seq_printf(m, " (%ld)%s%s", + seq_printf(m, " (%ld)%s%s%s%s%s", ftrace_rec_count(rec), rec->flags & FTRACE_FL_REGS ? " R" : " ", - rec->flags & FTRACE_FL_IPMODIFY ? " I" : " "); + rec->flags & FTRACE_FL_IPMODIFY ? " I" : " ", + rec->flags & FTRACE_FL_DIRECT ? " D" : " ", + rec->flags & FTRACE_FL_CALL_OPS ? " O" : " ", + rec->flags & FTRACE_FL_MODIFIED ? " M " : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); if (ops) { @@ -3455,6 +4496,7 @@ static int t_show(struct seq_file *m, void *v) (void *)ops->trampoline, (void *)ops->func); add_trampoline_func(m, ops, rec); + print_subops(m, ops, rec); ops = ftrace_find_tramp_ops_next(rec, ops); } while (ops); } else @@ -3462,7 +4504,24 @@ static int t_show(struct seq_file *m, void *v) } else { add_trampoline_func(m, NULL, rec); } - } + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + if (ops) { + seq_printf(m, "\tops: %pS (%pS)", + ops, ops->func); + print_subops(m, ops, rec); + } else { + seq_puts(m, "\tops: ERROR!"); + } + } + if (rec->flags & FTRACE_FL_DIRECT) { + unsigned long direct; + + direct = ftrace_find_rec_direct(rec->ip); + if (direct) + seq_printf(m, "\n\tdirect-->%pS", (void *)direct); + } + } seq_putc(m, '\n'); @@ -3480,6 +4539,11 @@ static int ftrace_avail_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; if (unlikely(ftrace_disabled)) return -ENODEV; @@ -3499,6 +4563,15 @@ ftrace_enabled_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; + /* + * This shows us what functions are currently being + * traced and by what. Not sure if we want lockdown + * to hide such critical information for an admin. + * Although, perhaps it can show information we don't + * want people to see, but if something is tracing + * something, we probably want to know about it. + */ + iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); if (!iter) return -ENOMEM; @@ -3510,6 +4583,55 @@ ftrace_enabled_open(struct inode *inode, struct file *file) return 0; } +static int +ftrace_touched_open(struct inode *inode, struct file *file) +{ + struct ftrace_iterator *iter; + + /* + * This shows us what functions have ever been enabled + * (traced, direct, patched, etc). Not sure if we want lockdown + * to hide such critical information for an admin. + * Although, perhaps it can show information we don't + * want people to see, but if something had traced + * something, we probably want to know about it. + */ + + iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); + if (!iter) + return -ENOMEM; + + iter->pg = ftrace_pages_start; + iter->flags = FTRACE_ITER_TOUCHED; + iter->ops = &global_ops; + + return 0; +} + +static int +ftrace_avail_addrs_open(struct inode *inode, struct file *file) +{ + struct ftrace_iterator *iter; + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); + if (!iter) + return -ENOMEM; + + iter->pg = ftrace_pages_start; + iter->flags = FTRACE_ITER_ADDRS; + iter->ops = &global_ops; + + return 0; +} + /** * ftrace_regex_open - initialize function tracer filter files * @ops: The ftrace_ops that hold the hash filters @@ -3525,6 +4647,8 @@ ftrace_enabled_open(struct inode *inode, struct file *file) * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. * tracing_lseek() should be used as the lseek routine, and * release must call ftrace_regex_release(). + * + * Returns: 0 on success or a negative errno value on failure */ int ftrace_regex_open(struct ftrace_ops *ops, int flag, @@ -3534,21 +4658,22 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, struct ftrace_hash *hash; struct list_head *mod_head; struct trace_array *tr = ops->private; - int ret = 0; + int ret = -ENOMEM; ftrace_ops_init(ops); if (unlikely(ftrace_disabled)) return -ENODEV; + if (tracing_check_open_get_tr(tr)) + return -ENODEV; + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) - return -ENOMEM; + goto out; - if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) { - kfree(iter); - return -ENOMEM; - } + if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) + goto out; iter->ops = ops; iter->flags = flag; @@ -3575,15 +4700,19 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, } else { iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash); } + } else { + if (hash) + iter->hash = alloc_and_copy_ftrace_hash(hash->size_bits, hash); + else + iter->hash = EMPTY_HASH; + } - if (!iter->hash) { - trace_parser_put(&iter->parser); - kfree(iter); - ret = -ENOMEM; - goto out_unlock; - } - } else - iter->hash = hash; + if (!iter->hash) { + trace_parser_put(&iter->parser); + goto out_unlock; + } + + ret = 0; if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; @@ -3596,7 +4725,6 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, /* Failed */ free_ftrace_hash(iter->hash); trace_parser_put(&iter->parser); - kfree(iter); } } else file->private_data = iter; @@ -3604,6 +4732,13 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, out_unlock: mutex_unlock(&ops->func_hash->regex_lock); + out: + if (ret) { + kfree(iter); + if (tr) + trace_array_put(tr); + } + return ret; } @@ -3612,6 +4747,7 @@ ftrace_filter_open(struct inode *inode, struct file *file) { struct ftrace_ops *ops = inode->i_private; + /* Checks for tracefs lockdown */ return ftrace_regex_open(ops, FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES, inode, file); @@ -3622,6 +4758,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file) { struct ftrace_ops *ops = inode->i_private; + /* Checks for tracefs lockdown */ return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE, inode, file); } @@ -3695,20 +4832,68 @@ enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int clear_filter) /* Do nothing if it exists */ if (entry) return 0; - - ret = add_hash_entry(hash, rec->ip); + if (add_hash_entry(hash, rec->ip) == NULL) + ret = -ENOMEM; } return ret; } static int +add_rec_by_index(struct ftrace_hash *hash, struct ftrace_glob *func_g, + int clear_filter) +{ + long index; + struct ftrace_page *pg; + struct dyn_ftrace *rec; + + /* The index starts at 1 */ + if (kstrtoul(func_g->search, 0, &index) || --index < 0) + return 0; + + do_for_each_ftrace_rec(pg, rec) { + if (pg->index <= index) { + index -= pg->index; + /* this is a double loop, break goes to the next page */ + break; + } + rec = &pg->records[index]; + enter_record(hash, rec, clear_filter); + return 1; + } while_for_each_ftrace_rec(); + return 0; +} + +#ifdef FTRACE_MCOUNT_MAX_OFFSET +static int lookup_ip(unsigned long ip, char **modname, char *str) +{ + unsigned long offset; + + kallsyms_lookup(ip, NULL, &offset, modname, str); + if (offset > FTRACE_MCOUNT_MAX_OFFSET) + return -1; + return 0; +} +#else +static int lookup_ip(unsigned long ip, char **modname, char *str) +{ + kallsyms_lookup(ip, NULL, NULL, modname, str); + return 0; +} +#endif + +static int ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g, struct ftrace_glob *mod_g, int exclude_mod) { char str[KSYM_SYMBOL_LEN]; char *modname; - kallsyms_lookup(rec->ip, NULL, NULL, &modname, str); + if (lookup_ip(rec->ip, &modname, str)) { + /* This should only happen when a rec is disabled */ + WARN_ON_ONCE(system_state == SYSTEM_RUNNING && + !(rec->flags & FTRACE_FL_DISABLED)); + return 0; + } if (mod_g) { int mod_matches = (modname) ? ftrace_match(modname, mod_g) : 0; @@ -3764,10 +4949,13 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) mod_g.len = strlen(mod_g.search); } - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); if (unlikely(ftrace_disabled)) - goto out_unlock; + return 0; + + if (func_g.type == MATCH_INDEX) + return add_rec_by_index(hash, &func_g, clear_filter); do_for_each_ftrace_rec(pg, rec) { @@ -3776,15 +4964,12 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { ret = enter_record(hash, rec, clear_filter); - if (ret < 0) { - found = ret; - goto out_unlock; - } + if (ret < 0) + return ret; found = 1; } + cond_resched(); } while_for_each_ftrace_rec(); - out_unlock: - mutex_unlock(&ftrace_lock); return found; } @@ -3831,36 +5016,33 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, struct ftrace_hash *hash, int enable) { - struct ftrace_ops_hash old_hash_ops; - struct ftrace_hash *old_hash; - int ret; - - old_hash = *orig_hash; - old_hash_ops.filter_hash = ops->func_hash->filter_hash; - old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; - ret = ftrace_hash_move(ops, enable, orig_hash, hash); - if (!ret) { - ftrace_ops_update_code(ops, &old_hash_ops); - free_ftrace_hash_rcu(old_hash); - } - return ret; -} + if (ops->flags & FTRACE_OPS_FL_SUBOP) + return ftrace_hash_move_and_update_subops(ops, orig_hash, hash); -static bool module_exists(const char *module) -{ - /* All modules have the symbol __this_module */ - const char this_mod[] = "__this_module"; - char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; - unsigned long val; - int n; + /* + * If this ops is not enabled, it could be sharing its filters + * with a subop. If that's the case, update the subop instead of + * this ops. Shared filters are only allowed to have one ops set + * at a time, and if we update the ops that is not enabled, + * it will not affect subops that share it. + */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) { + struct ftrace_ops *op; - n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); + /* Check if any other manager subops maps to this hash */ + do_for_each_ftrace_op(op, ftrace_ops_list) { + struct ftrace_ops *subops; - if (n > sizeof(modname) - 1) - return false; + list_for_each_entry(subops, &op->subop_list, list) { + if ((subops->flags & FTRACE_OPS_FL_ENABLED) && + subops->func_hash == ops->func_hash) { + return ftrace_hash_move_and_update_subops(subops, orig_hash, hash); + } + } + } while_for_each_ftrace_op(op); + } - val = module_kallsyms_lookup_name(modname); - return val != 0; + return __ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable); } static int cache_mod(struct trace_array *tr, @@ -3868,14 +5050,14 @@ static int cache_mod(struct trace_array *tr, { struct ftrace_mod_load *ftrace_mod, *n; struct list_head *head = enable ? &tr->mod_trace : &tr->mod_notrace; - int ret; - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); /* We do not cache inverse filters */ if (func[0] == '!') { + int ret = -EINVAL; + func++; - ret = -EINVAL; /* Look to remove this hash */ list_for_each_entry_safe(ftrace_mod, n, head, list) { @@ -3891,26 +5073,17 @@ static int cache_mod(struct trace_array *tr, continue; } } - goto out; + return ret; } - ret = -EINVAL; /* We only care about modules that have not been loaded yet */ if (module_exists(module)) - goto out; + return -EINVAL; /* Save this string off, and execute it when the module is loaded */ - ret = ftrace_add_mod(tr, func, module, enable); - out: - mutex_unlock(&ftrace_lock); - - return ret; + return ftrace_add_mod(tr, func, module, enable); } -static int -ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, - int reset, int enable); - #ifdef CONFIG_MODULES static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, char *mod, bool enable) @@ -3919,7 +5092,6 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, struct ftrace_hash **orig_hash, *new_hash; LIST_HEAD(process_mods); char *func; - int ret; mutex_lock(&ops->func_hash->regex_lock); @@ -3948,8 +5120,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, if (!func) /* warn? */ continue; - list_del(&ftrace_mod->list); - list_add(&ftrace_mod->list, &process_mods); + list_move(&ftrace_mod->list, &process_mods); /* Use the newly allocated func, as it may be "*" */ kfree(ftrace_mod->func); @@ -3972,7 +5143,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, mutex_lock(&ftrace_lock); - ret = ftrace_hash_move_and_update_ops(ops, orig_hash, + ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable); mutex_unlock(&ftrace_lock); @@ -4016,6 +5187,9 @@ ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash, char *func; int ret; + if (!tr) + return -ENODEV; + /* match_records() modifies func, and we need the original */ func = kstrdup(func_orig, GFP_KERNEL); if (!func) @@ -4050,7 +5224,7 @@ static int __init ftrace_mod_cmd_init(void) core_initcall(ftrace_mod_cmd_init); static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct ftrace_probe_ops *probe_ops; struct ftrace_func_probe *probe; @@ -4073,14 +5247,18 @@ struct ftrace_func_map { void *data; }; +/* + * Note, ftrace_func_mapper is freed by free_ftrace_hash(&mapper->hash). + * The hash field must be the first field. + */ struct ftrace_func_mapper { - struct ftrace_hash hash; + struct ftrace_hash hash; /* Must be first! */ }; /** * allocate_ftrace_func_mapper - allocate a new ftrace_func_mapper * - * Returns a ftrace_func_mapper descriptor that can be used to map ips to data. + * Returns: a ftrace_func_mapper descriptor that can be used to map ips to data. */ struct ftrace_func_mapper *allocate_ftrace_func_mapper(void) { @@ -4100,7 +5278,7 @@ struct ftrace_func_mapper *allocate_ftrace_func_mapper(void) * @mapper: The mapper that has the ip maps * @ip: the instruction pointer to find the data for * - * Returns the data mapped to @ip if found otherwise NULL. The return + * Returns: the data mapped to @ip if found otherwise NULL. The return * is actually the address of the mapper data pointer. The address is * returned for use cases where the data is no bigger than a long, and * the user can use the data pointer as its data instead of having to @@ -4126,7 +5304,7 @@ void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, * @ip: The instruction pointer address to map @data to * @data: The data to map to @ip * - * Returns 0 on succes otherwise an error. + * Returns: 0 on success otherwise an error. */ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, unsigned long ip, void *data) @@ -4155,8 +5333,8 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, * @mapper: The mapper that has the ip maps * @ip: The instruction pointer address to remove the data from * - * Returns the data if it is found, otherwise NULL. - * Note, if the data pointer is used as the data itself, (see + * Returns: the data if it is found, otherwise NULL. + * Note, if the data pointer is used as the data itself, (see * ftrace_func_mapper_find_ip(), then the return value may be meaningless, * if the data pointer was set to zero. */ @@ -4194,10 +5372,13 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, struct ftrace_func_entry *entry; struct ftrace_func_map *map; struct hlist_head *hhd; - int size = 1 << mapper->hash.size_bits; - int i; + int size, i; + + if (!mapper) + return; if (free_func && mapper->hash.count) { + size = 1 << mapper->hash.size_bits; for (i = 0; i < size; i++) { hhd = &mapper->hash.buckets[i]; hlist_for_each_entry(entry, hhd, hlist) { @@ -4206,6 +5387,7 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, } } } + /* This also frees the mapper itself */ free_ftrace_hash(&mapper->hash); } @@ -4213,7 +5395,7 @@ static void release_probe(struct ftrace_func_probe *probe) { struct ftrace_probe_ops *probe_ops; - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); WARN_ON(probe->ref <= 0); @@ -4231,7 +5413,6 @@ static void release_probe(struct ftrace_func_probe *probe) list_del(&probe->list); kfree(probe); } - mutex_unlock(&ftrace_lock); } static void acquire_probe_locked(struct ftrace_func_probe *probe) @@ -4248,8 +5429,8 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, struct ftrace_probe_ops *probe_ops, void *data) { + struct ftrace_func_probe *probe = NULL, *iter; struct ftrace_func_entry *entry; - struct ftrace_func_probe *probe; struct ftrace_hash **orig_hash; struct ftrace_hash *old_hash; struct ftrace_hash *hash; @@ -4268,11 +5449,13 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, mutex_lock(&ftrace_lock); /* Check if the probe_ops is already registered */ - list_for_each_entry(probe, &tr->func_probes, list) { - if (probe->probe_ops == probe_ops) + list_for_each_entry(iter, &tr->func_probes, list) { + if (iter->probe_ops == probe_ops) { + probe = iter; break; + } } - if (&probe->list == &tr->func_probes) { + if (!probe) { probe = kzalloc(sizeof(*probe), GFP_KERNEL); if (!probe) { mutex_unlock(&ftrace_lock); @@ -4289,12 +5472,21 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, mutex_unlock(&ftrace_lock); + /* + * Note, there's a small window here that the func_hash->filter_hash + * may be NULL or empty. Need to be careful when reading the loop. + */ mutex_lock(&probe->ops.func_hash->regex_lock); orig_hash = &probe->ops.func_hash->filter_hash; old_hash = *orig_hash; hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); + if (!hash) { + ret = -ENOMEM; + goto out; + } + ret = ftrace_match_records(hash, glob, strlen(glob)); /* Nothing found? */ @@ -4381,9 +5573,9 @@ int unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, struct ftrace_probe_ops *probe_ops) { + struct ftrace_func_probe *probe = NULL, *iter; struct ftrace_ops_hash old_hash_ops; struct ftrace_func_entry *entry; - struct ftrace_func_probe *probe; struct ftrace_glob func_g; struct ftrace_hash **orig_hash; struct ftrace_hash *old_hash; @@ -4411,11 +5603,13 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, mutex_lock(&ftrace_lock); /* Check if the probe_ops is already registered */ - list_for_each_entry(probe, &tr->func_probes, list) { - if (probe->probe_ops == probe_ops) + list_for_each_entry(iter, &tr->func_probes, list) { + if (iter->probe_ops == probe_ops) { + probe = iter; break; + } } - if (&probe->list == &tr->func_probes) + if (!probe) goto err_unlock_ftrace; ret = -EINVAL; @@ -4524,20 +5718,15 @@ static DEFINE_MUTEX(ftrace_cmd_mutex); __init int register_ftrace_command(struct ftrace_func_command *cmd) { struct ftrace_func_command *p; - int ret = 0; - mutex_lock(&ftrace_cmd_mutex); + guard(mutex)(&ftrace_cmd_mutex); list_for_each_entry(p, &ftrace_commands, list) { - if (strcmp(cmd->name, p->name) == 0) { - ret = -EBUSY; - goto out_unlock; - } + if (strcmp(cmd->name, p->name) == 0) + return -EBUSY; } list_add(&cmd->list, &ftrace_commands); - out_unlock: - mutex_unlock(&ftrace_cmd_mutex); - return ret; + return 0; } /* @@ -4547,20 +5736,17 @@ __init int register_ftrace_command(struct ftrace_func_command *cmd) __init int unregister_ftrace_command(struct ftrace_func_command *cmd) { struct ftrace_func_command *p, *n; - int ret = -ENODEV; - mutex_lock(&ftrace_cmd_mutex); + guard(mutex)(&ftrace_cmd_mutex); + list_for_each_entry_safe(p, n, &ftrace_commands, list) { if (strcmp(cmd->name, p->name) == 0) { - ret = 0; list_del_init(&p->list); - goto out_unlock; + return 0; } } - out_unlock: - mutex_unlock(&ftrace_cmd_mutex); - return ret; + return -ENODEV; } static int ftrace_process_regex(struct ftrace_iterator *iter, @@ -4570,7 +5756,7 @@ static int ftrace_process_regex(struct ftrace_iterator *iter, struct trace_array *tr = iter->ops->private; char *func, *command, *next = buff; struct ftrace_func_command *p; - int ret = -EINVAL; + int ret; func = strsep(&next, ":"); @@ -4587,17 +5773,14 @@ static int ftrace_process_regex(struct ftrace_iterator *iter, command = strsep(&next, ":"); - mutex_lock(&ftrace_cmd_mutex); + guard(mutex)(&ftrace_cmd_mutex); + list_for_each_entry(p, &ftrace_commands, list) { - if (strcmp(p->name, command) == 0) { - ret = p->func(tr, hash, func, command, next, enable); - goto out_unlock; - } + if (strcmp(p->name, command) == 0) + return p->func(tr, hash, func, command, next, enable); } - out_unlock: - mutex_unlock(&ftrace_cmd_mutex); - return ret; + return -EINVAL; } static ssize_t @@ -4631,12 +5814,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, parser->idx, enable); trace_parser_clear(parser); if (ret < 0) - goto out; + return ret; } - ret = read; - out: - return ret; + return read; } ssize_t @@ -4654,11 +5835,12 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf, } static int -ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) +__ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) { struct ftrace_func_entry *entry; - if (!ftrace_location(ip)) + ip = ftrace_location(ip); + if (!ip) return -EINVAL; if (remove) { @@ -4667,14 +5849,39 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) return -ENOENT; free_hash_entry(hash, entry); return 0; + } else if (__ftrace_lookup_ip(hash, ip) != NULL) { + /* Already exists */ + return 0; } - return add_hash_entry(hash, ip); + entry = add_hash_entry(hash, ip); + return entry ? 0 : -ENOMEM; +} + +static int +ftrace_match_addr(struct ftrace_hash *hash, unsigned long *ips, + unsigned int cnt, int remove) +{ + unsigned int i; + int err; + + for (i = 0; i < cnt; i++) { + err = __ftrace_match_addr(hash, ips[i], remove); + if (err) { + /* + * This expects the @hash is a temporary hash and if this + * fails the caller must free the @hash. + */ + return err; + } + } + return 0; } static int ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, - unsigned long ip, int remove, int reset, int enable) + unsigned long *ips, unsigned int cnt, + int remove, int reset, int enable, char *mod) { struct ftrace_hash **orig_hash; struct ftrace_hash *hash; @@ -4700,12 +5907,20 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, goto out_regex_unlock; } - if (buf && !ftrace_match_records(hash, buf, len)) { + if (buf && !match_records(hash, buf, len, mod)) { + /* If this was for a module and nothing was enabled, flag it */ + if (mod) + (*orig_hash)->flags |= FTRACE_HASH_FL_MOD; + + /* + * Even if it is a mod, return error to let caller know + * nothing was added + */ ret = -EINVAL; goto out_regex_unlock; } - if (ip) { - ret = ftrace_match_addr(hash, ip, remove); + if (ips) { + ret = ftrace_match_addr(hash, ips, cnt, remove); if (ret < 0) goto out_regex_unlock; } @@ -4722,33 +5937,402 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, } static int -ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, - int reset, int enable) +ftrace_set_addr(struct ftrace_ops *ops, unsigned long *ips, unsigned int cnt, + int remove, int reset, int enable) +{ + return ftrace_set_hash(ops, NULL, 0, ips, cnt, remove, reset, enable, NULL); +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + +static int register_ftrace_function_nolock(struct ftrace_ops *ops); + +/* + * If there are multiple ftrace_ops, use SAVE_REGS by default, so that direct + * call will be jumped from ftrace_regs_caller. Only if the architecture does + * not support ftrace_regs_caller but direct_call, use SAVE_ARGS so that it + * jumps from ftrace_caller for multiple ftrace_ops. + */ +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS) +#else +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) +#endif + +static int check_direct_multi(struct ftrace_ops *ops) +{ + if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) + return -EINVAL; + if ((ops->flags & MULTI_FLAGS) != MULTI_FLAGS) + return -EINVAL; + return 0; +} + +static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long addr) +{ + struct ftrace_func_entry *entry, *del; + int size, i; + + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + del = __ftrace_lookup_ip(direct_functions, entry->ip); + if (del && ftrace_jmp_get(del->direct) == + ftrace_jmp_get(addr)) { + remove_hash_entry(direct_functions, del); + kfree(del); + } + } + } +} + +static void register_ftrace_direct_cb(struct rcu_head *rhp) { - return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable); + struct ftrace_hash *fhp = container_of(rhp, struct ftrace_hash, rcu); + + free_ftrace_hash(fhp); +} + +static void reset_direct(struct ftrace_ops *ops, unsigned long addr) +{ + struct ftrace_hash *hash = ops->func_hash->filter_hash; + + remove_direct_functions_hash(hash, addr); + + /* cleanup for possible another register call */ + ops->func = NULL; + ops->trampoline = 0; } /** + * register_ftrace_direct - Call a custom trampoline directly + * for multiple functions registered in @ops + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the trampoline to call at @ops functions + * + * This is used to connect a direct calls to @addr from the nop locations + * of the functions registered in @ops (with by ftrace_set_filter_ip + * function). + * + * The location that it calls (@addr) must be able to handle a direct call, + * and save the parameters of the function being traced, and restore them + * (or inject new ones if needed), before returning. + * + * Returns: + * 0 on success + * -EINVAL - The @ops object was already registered with this call or + * when there are no functions in @ops object. + * -EBUSY - Another direct function is already attached (there can be only one) + * -ENODEV - @ip does not point to a ftrace nop location (or not supported) + * -ENOMEM - There was an allocation failure. + */ +int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) +{ + struct ftrace_hash *hash, *new_hash = NULL, *free_hash = NULL; + struct ftrace_func_entry *entry, *new; + int err = -EBUSY, size, i; + + if (ops->func || ops->trampoline) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) + return -EINVAL; + if (ops->flags & FTRACE_OPS_FL_ENABLED) + return -EINVAL; + + hash = ops->func_hash->filter_hash; + if (ftrace_hash_empty(hash)) + return -EINVAL; + + /* This is a "raw" address, and this should never happen. */ + if (WARN_ON_ONCE(ftrace_is_jmp(addr))) + return -EINVAL; + + mutex_lock(&direct_mutex); + + if (ops->flags & FTRACE_OPS_FL_JMP) + addr = ftrace_jmp_set(addr); + + /* Make sure requested entries are not already registered.. */ + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + if (ftrace_find_rec_direct(entry->ip)) + goto out_unlock; + } + } + + err = -ENOMEM; + + /* Make a copy hash to place the new and the old entries in */ + size = hash->count + direct_functions->count; + size = fls(size); + if (size > FTRACE_HASH_MAX_BITS) + size = FTRACE_HASH_MAX_BITS; + new_hash = alloc_ftrace_hash(size); + if (!new_hash) + goto out_unlock; + + /* Now copy over the existing direct entries */ + size = 1 << direct_functions->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &direct_functions->buckets[i], hlist) { + new = add_hash_entry(new_hash, entry->ip); + if (!new) + goto out_unlock; + new->direct = entry->direct; + } + } + + /* ... and add the new entries */ + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + new = add_hash_entry(new_hash, entry->ip); + if (!new) + goto out_unlock; + /* Update both the copy and the hash entry */ + new->direct = addr; + entry->direct = addr; + } + } + + free_hash = direct_functions; + rcu_assign_pointer(direct_functions, new_hash); + new_hash = NULL; + + ops->func = call_direct_funcs; + ops->flags |= MULTI_FLAGS; + ops->trampoline = FTRACE_REGS_ADDR; + ops->direct_call = addr; + + err = register_ftrace_function_nolock(ops); + if (err) + reset_direct(ops, addr); + + out_unlock: + mutex_unlock(&direct_mutex); + + if (free_hash && free_hash != EMPTY_HASH) + call_rcu_tasks(&free_hash->rcu, register_ftrace_direct_cb); + + if (new_hash) + free_ftrace_hash(new_hash); + + return err; +} +EXPORT_SYMBOL_GPL(register_ftrace_direct); + +/** + * unregister_ftrace_direct - Remove calls to custom trampoline + * previously registered by register_ftrace_direct for @ops object. + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the direct function that is called by the @ops functions + * @free_filters: Set to true to remove all filters for the ftrace_ops, false otherwise + * + * This is used to remove a direct calls to @addr from the nop locations + * of the functions registered in @ops (with by ftrace_set_filter_ip + * function). + * + * Returns: + * 0 on success + * -EINVAL - The @ops object was not properly registered. + */ +int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, + bool free_filters) +{ + int err; + + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + mutex_lock(&direct_mutex); + err = unregister_ftrace_function(ops); + reset_direct(ops, addr); + mutex_unlock(&direct_mutex); + + if (free_filters) + ftrace_free_filter(ops); + return err; +} +EXPORT_SYMBOL_GPL(unregister_ftrace_direct); + +static int +__modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) +{ + struct ftrace_hash *hash = ops->func_hash->filter_hash; + struct ftrace_func_entry *entry, *iter; + static struct ftrace_ops tmp_ops = { + .func = ftrace_stub, + .flags = FTRACE_OPS_FL_STUB, + }; + int i, size; + int err; + + lockdep_assert_held_once(&direct_mutex); + + /* This is a "raw" address, and this should never happen. */ + if (WARN_ON_ONCE(ftrace_is_jmp(addr))) + return -EINVAL; + + if (ops->flags & FTRACE_OPS_FL_JMP) + addr = ftrace_jmp_set(addr); + + /* Enable the tmp_ops to have the same functions as the direct ops */ + ftrace_ops_init(&tmp_ops); + tmp_ops.func_hash = ops->func_hash; + tmp_ops.direct_call = addr; + + err = register_ftrace_function_nolock(&tmp_ops); + if (err) + return err; + + /* + * Call __ftrace_hash_update_ipmodify() here, so that we can call + * ops->ops_func for the ops. This is needed because the above + * register_ftrace_function_nolock() worked on tmp_ops. + */ + err = __ftrace_hash_update_ipmodify(ops, hash, hash, true); + if (err) + goto out; + + /* + * Now the ftrace_ops_list_func() is called to do the direct callers. + * We can safely change the direct functions attached to each entry. + */ + mutex_lock(&ftrace_lock); + + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(iter, &hash->buckets[i], hlist) { + entry = __ftrace_lookup_ip(direct_functions, iter->ip); + if (!entry) + continue; + entry->direct = addr; + } + } + /* Prevent store tearing if a trampoline concurrently accesses the value */ + WRITE_ONCE(ops->direct_call, addr); + + mutex_unlock(&ftrace_lock); + +out: + /* Removing the tmp_ops will add the updated direct callers to the functions */ + unregister_ftrace_function(&tmp_ops); + + return err; +} + +/** + * modify_ftrace_direct_nolock - Modify an existing direct 'multi' call + * to call something else + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the new trampoline to call at @ops functions + * + * This is used to unregister currently registered direct caller and + * register new one @addr on functions registered in @ops object. + * + * Note there's window between ftrace_shutdown and ftrace_startup calls + * where there will be no callbacks called. + * + * Caller should already have direct_mutex locked, so we don't lock + * direct_mutex here. + * + * Returns: zero on success. Non zero on error, which includes: + * -EINVAL - The @ops object was not properly registered. + */ +int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr) +{ + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + return __modify_ftrace_direct(ops, addr); +} +EXPORT_SYMBOL_GPL(modify_ftrace_direct_nolock); + +/** + * modify_ftrace_direct - Modify an existing direct 'multi' call + * to call something else + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the new trampoline to call at @ops functions + * + * This is used to unregister currently registered direct caller and + * register new one @addr on functions registered in @ops object. + * + * Note there's window between ftrace_shutdown and ftrace_startup calls + * where there will be no callbacks called. + * + * Returns: zero on success. Non zero on error, which includes: + * -EINVAL - The @ops object was not properly registered. + */ +int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) +{ + int err; + + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + mutex_lock(&direct_mutex); + err = __modify_ftrace_direct(ops, addr); + mutex_unlock(&direct_mutex); + return err; +} +EXPORT_SYMBOL_GPL(modify_ftrace_direct); +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +/** * ftrace_set_filter_ip - set a function to filter on in ftrace by address - * @ops - the ops to set the filter with - * @ip - the address to add to or remove from the filter. - * @remove - non zero to remove the ip from the filter - * @reset - non zero to reset all filters before applying this filter. + * @ops: the ops to set the filter with + * @ip: the address to add to or remove from the filter. + * @remove: non zero to remove the ip from the filter + * @reset: non zero to reset all filters before applying this filter. * * Filters denote which functions should be enabled when tracing is enabled - * If @ip is NULL, it failes to update filter. + * If @ip is NULL, it fails to update filter. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). */ int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset) { ftrace_ops_init(ops); - return ftrace_set_addr(ops, ip, remove, reset, 1); + return ftrace_set_addr(ops, &ip, 1, remove, reset, 1); } EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); /** + * ftrace_set_filter_ips - set functions to filter on in ftrace by addresses + * @ops: the ops to set the filter with + * @ips: the array of addresses to add to or remove from the filter. + * @cnt: the number of addresses in @ips + * @remove: non zero to remove ips from the filter + * @reset: non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled + * If @ips array or any ip specified within is NULL , it fails to update filter. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). +*/ +int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips, + unsigned int cnt, int remove, int reset) +{ + ftrace_ops_init(ops); + return ftrace_set_addr(ops, ips, cnt, remove, reset, 1); +} +EXPORT_SYMBOL_GPL(ftrace_set_filter_ips); + +/** * ftrace_ops_set_global_filter - setup ops to use global filters - * @ops - the ops which will use the global filters + * @ops: the ops which will use the global filters * * ftrace users who need global function trace filtering should call this. * It can set the global filter only if ops were not initialized before. @@ -4767,18 +6351,53 @@ static int ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, int reset, int enable) { - return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable); + char *mod = NULL, *func, *command, *next = buf; + char *tmp __free(kfree) = NULL; + struct trace_array *tr = ops->private; + int ret; + + func = strsep(&next, ":"); + + /* This can also handle :mod: parsing */ + if (next) { + if (!tr) + return -EINVAL; + + command = strsep(&next, ":"); + if (strcmp(command, "mod") != 0) + return -EINVAL; + + mod = next; + len = command - func; + /* Save the original func as ftrace_set_hash() can modify it */ + tmp = kstrdup(func, GFP_KERNEL); + } + + ret = ftrace_set_hash(ops, func, len, NULL, 0, 0, reset, enable, mod); + + if (tr && mod && ret < 0) { + /* Did tmp fail to allocate? */ + if (!tmp) + return -ENOMEM; + ret = cache_mod(tr, tmp, mod, enable); + } + + return ret; } /** * ftrace_set_filter - set a function to filter on in ftrace - * @ops - the ops to set the filter with - * @buf - the string that holds the function filter text. - * @len - the length of the string. - * @reset - non zero to reset all filters before applying this filter. + * @ops: the ops to set the filter with + * @buf: the string that holds the function filter text. + * @len: the length of the string. + * @reset: non-zero to reset all filters before applying this filter. * * Filters denote which functions should be enabled when tracing is enabled. * If @buf is NULL and reset is set, all functions will be enabled for tracing. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). */ int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) @@ -4790,14 +6409,18 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter); /** * ftrace_set_notrace - set a function to not trace in ftrace - * @ops - the ops to set the notrace filter with - * @buf - the string that holds the function notrace text. - * @len - the length of the string. - * @reset - non zero to reset all filters before applying this filter. + * @ops: the ops to set the notrace filter with + * @buf: the string that holds the function notrace text. + * @len: the length of the string. + * @reset: non-zero to reset all filters before applying this filter. * * Notrace Filters denote which functions should not be enabled when tracing * is enabled. If @buf is NULL and reset is set, all functions will be enabled * for tracing. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). */ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) @@ -4808,9 +6431,9 @@ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, EXPORT_SYMBOL_GPL(ftrace_set_notrace); /** * ftrace_set_global_filter - set a function to filter on with global tracers - * @buf - the string that holds the function filter text. - * @len - the length of the string. - * @reset - non zero to reset all filters before applying this filter. + * @buf: the string that holds the function filter text. + * @len: the length of the string. + * @reset: non-zero to reset all filters before applying this filter. * * Filters denote which functions should be enabled when tracing is enabled. * If @buf is NULL and reset is set, all functions will be enabled for tracing. @@ -4823,9 +6446,9 @@ EXPORT_SYMBOL_GPL(ftrace_set_global_filter); /** * ftrace_set_global_notrace - set a function to not trace with global tracers - * @buf - the string that holds the function notrace text. - * @len - the length of the string. - * @reset - non zero to reset all filters before applying this filter. + * @buf: the string that holds the function notrace text. + * @len: the length of the string. + * @reset: non-zero to reset all filters before applying this filter. * * Notrace Filters denote which functions should not be enabled when tracing * is enabled. If @buf is NULL and reset is set, all functions will be enabled @@ -4850,7 +6473,7 @@ bool ftrace_filter_param __initdata; static int __init set_ftrace_notrace(char *str) { ftrace_filter_param = true; - strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); + strscpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_notrace=", set_ftrace_notrace); @@ -4858,7 +6481,7 @@ __setup("ftrace_notrace=", set_ftrace_notrace); static int __init set_ftrace_filter(char *str) { ftrace_filter_param = true; - strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); + strscpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_filter=", set_ftrace_filter); @@ -4870,23 +6493,22 @@ static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer); static int __init set_graph_function(char *str) { - strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); + strscpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_graph_filter=", set_graph_function); static int __init set_graph_notrace_function(char *str) { - strlcpy(ftrace_graph_notrace_buf, str, FTRACE_FILTER_SIZE); + strscpy(ftrace_graph_notrace_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_graph_notrace=", set_graph_notrace_function); static int __init set_graph_max_depth_function(char *str) { - if (!str) + if (!str || kstrtouint(str, 0, &fgraph_max_depth)) return 0; - fgraph_max_depth = simple_strtoul(str, NULL, 0); return 1; } __setup("ftrace_graph_max_depth=", set_graph_max_depth_function); @@ -4898,7 +6520,7 @@ static void __init set_ftrace_early_graph(char *buf, int enable) struct ftrace_hash *hash; hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); - if (WARN_ON(!hash)) + if (MEM_FAIL(!hash, "Failed to allocate hash\n")) return; while (buf) { @@ -4924,6 +6546,14 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable) ftrace_ops_init(ops); + /* The trace_array is needed for caching module function filters */ + if (!ops->private) { + struct trace_array *tr = trace_get_global_array(); + + ops->private = tr; + ftrace_init_trace_array(tr); + } + while (buf) { func = strsep(&buf, ","); ftrace_set_regex(ops, func, strlen(func), 0, enable); @@ -4951,7 +6581,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file) struct ftrace_hash **orig_hash; struct trace_parser *parser; int filter_hash; - int ret; if (file->f_mode & FMODE_READ) { iter = m->private; @@ -4961,7 +6590,10 @@ int ftrace_regex_release(struct inode *inode, struct file *file) parser = &iter->parser; if (trace_parser_loaded(parser)) { - ftrace_match_records(iter->hash, parser->buffer, parser->idx); + int enable = !(iter->flags & FTRACE_ITER_NOTRACE); + + ftrace_process_regex(iter, parser->buffer, + parser->idx, enable); } trace_parser_put(parser); @@ -4973,22 +6605,25 @@ int ftrace_regex_release(struct inode *inode, struct file *file) if (filter_hash) { orig_hash = &iter->ops->func_hash->filter_hash; - if (iter->tr && !list_empty(&iter->tr->mod_trace)) - iter->hash->flags |= FTRACE_HASH_FL_MOD; + if (iter->tr) { + if (list_empty(&iter->tr->mod_trace)) + iter->hash->flags &= ~FTRACE_HASH_FL_MOD; + else + iter->hash->flags |= FTRACE_HASH_FL_MOD; + } } else orig_hash = &iter->ops->func_hash->notrace_hash; mutex_lock(&ftrace_lock); - ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash, + ftrace_hash_move_and_update_ops(iter->ops, orig_hash, iter->hash, filter_hash); mutex_unlock(&ftrace_lock); - } else { - /* For read only, the hash is the ops hash */ - iter->hash = NULL; } mutex_unlock(&iter->ops->func_hash->regex_lock); free_ftrace_hash(iter->hash); + if (iter->tr) + trace_array_put(iter->tr); kfree(iter); return 0; @@ -5008,6 +6643,20 @@ static const struct file_operations ftrace_enabled_fops = { .release = seq_release_private, }; +static const struct file_operations ftrace_touched_fops = { + .open = ftrace_touched_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static const struct file_operations ftrace_avail_addrs_fops = { + .open = ftrace_avail_addrs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + static const struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, .read = seq_read, @@ -5028,8 +6677,8 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); -struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; -struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { GRAPH_FILTER_NOTRACE = 0, @@ -5146,9 +6795,13 @@ static int __ftrace_graph_open(struct inode *inode, struct file *file, struct ftrace_graph_data *fgd) { - int ret = 0; + int ret; struct ftrace_hash *new_hash = NULL; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + if (file->f_mode & FMODE_WRITE) { const int size_bits = FTRACE_HASH_DEFAULT_BITS; @@ -5300,8 +6953,16 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); - /* Wait till all users are no longer using the old hash */ - synchronize_rcu(); + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + if (old_hash != EMPTY_HASH) + synchronize_rcu_tasks_rude(); free_ftrace_hash(old_hash); } @@ -5329,12 +6990,10 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) func_g.len = strlen(func_g.search); - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); - if (unlikely(ftrace_disabled)) { - mutex_unlock(&ftrace_lock); + if (unlikely(ftrace_disabled)) return -ENODEV; - } do_for_each_ftrace_rec(pg, rec) { @@ -5349,8 +7008,8 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) if (entry) continue; - if (add_hash_entry(hash, rec->ip) < 0) - goto out; + if (add_hash_entry(hash, rec->ip) == NULL) + return 0; } else { if (entry) { free_hash_entry(hash, entry); @@ -5358,14 +7017,10 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) } } } + cond_resched(); } while_for_each_ftrace_rec(); -out: - mutex_unlock(&ftrace_lock); - - if (fail) - return -EINVAL; - return 0; + return fail ? -EINVAL : 0; } static ssize_t @@ -5424,16 +7079,16 @@ void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent) { - trace_create_file("set_ftrace_filter", 0644, parent, + trace_create_file("set_ftrace_filter", TRACE_MODE_WRITE, parent, ops, &ftrace_filter_fops); - trace_create_file("set_ftrace_notrace", 0644, parent, + trace_create_file("set_ftrace_notrace", TRACE_MODE_WRITE, parent, ops, &ftrace_notrace_fops); } /* * The name "destroy_filter_files" is really a misnomer. Although - * in the future, it may actualy delete the files, but this is + * in the future, it may actually delete the files, but this is * really intended to make sure the ops passed in are disabled * and that when this function returns, the caller is free to * free the ops. @@ -5454,19 +7109,25 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { - trace_create_file("available_filter_functions", 0444, + trace_create_file("available_filter_functions", TRACE_MODE_READ, d_tracer, NULL, &ftrace_avail_fops); - trace_create_file("enabled_functions", 0444, + trace_create_file("available_filter_functions_addrs", TRACE_MODE_READ, + d_tracer, NULL, &ftrace_avail_addrs_fops); + + trace_create_file("enabled_functions", TRACE_MODE_READ, d_tracer, NULL, &ftrace_enabled_fops); + trace_create_file("touched_functions", TRACE_MODE_READ, + d_tracer, NULL, &ftrace_touched_fops); + ftrace_create_filter_files(&global_ops, d_tracer); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - trace_create_file("set_graph_function", 0644, d_tracer, + trace_create_file("set_graph_function", TRACE_MODE_WRITE, d_tracer, NULL, &ftrace_graph_fops); - trace_create_file("set_graph_notrace", 0644, d_tracer, + trace_create_file("set_graph_notrace", TRACE_MODE_WRITE, d_tracer, NULL, &ftrace_graph_notrace_fops); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -5486,17 +7147,41 @@ static int ftrace_cmp_ips(const void *a, const void *b) return 0; } +#ifdef CONFIG_FTRACE_SORT_STARTUP_TEST +static void test_is_sorted(unsigned long *start, unsigned long count) +{ + int i; + + for (i = 1; i < count; i++) { + if (WARN(start[i - 1] > start[i], + "[%d] %pS at %lx is not sorted with %pS at %lx\n", i, + (void *)start[i - 1], start[i - 1], + (void *)start[i], start[i])) + break; + } + if (i == count) + pr_info("ftrace section at %px sorted properly\n", start); +} +#else +static void test_is_sorted(unsigned long *start, unsigned long count) +{ +} +#endif + static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) { + struct ftrace_page *pg_unuse = NULL; struct ftrace_page *start_pg; struct ftrace_page *pg; struct dyn_ftrace *rec; + unsigned long skipped = 0; unsigned long count; unsigned long *p; unsigned long addr; unsigned long flags = 0; /* Shut up gcc */ + unsigned long pages; int ret = -ENOMEM; count = end - start; @@ -5504,8 +7189,19 @@ static int ftrace_process_locs(struct module *mod, if (!count) return 0; - sort(start, count, sizeof(*start), - ftrace_cmp_ips, NULL); + pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE); + + /* + * Sorting mcount in vmlinux at build time depend on + * CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in + * modules can not be sorted at build time. + */ + if (!IS_ENABLED(CONFIG_BUILDTIME_MCOUNT_SORT) || mod) { + sort(start, count, sizeof(*start), + ftrace_cmp_ips, NULL); + } else { + test_is_sorted(start, count); + } start_pg = ftrace_allocate_pages(count); if (!start_pg) @@ -5538,17 +7234,36 @@ static int ftrace_process_locs(struct module *mod, p = start; pg = start_pg; while (p < end) { - addr = ftrace_call_adjust(*p++); + unsigned long end_offset; + + addr = *p++; + /* * Some architecture linkers will pad between * the different mcount_loc sections of different * object files to satisfy alignments. * Skip any NULL pointers. */ - if (!addr) + if (!addr) { + skipped++; continue; + } - if (pg->index == pg->size) { + /* + * If this is core kernel, make sure the address is in core + * or inittext, as weak functions get zeroed and KASLR can + * move them to something other than zero. It just will not + * move it to an area where kernel text is. + */ + if (!mod && !(is_kernel_text(addr) || is_kernel_inittext(addr))) { + skipped++; + continue; + } + + addr = ftrace_call_adjust(addr); + + end_offset = (pg->index+1) * sizeof(pg->records[0]); + if (end_offset > PAGE_SIZE << pg->order) { /* We should have allocated enough */ if (WARN_ON(!pg->next)) break; @@ -5559,8 +7274,10 @@ static int ftrace_process_locs(struct module *mod, rec->ip = addr; } - /* We should have used all pages */ - WARN_ON(pg->next); + if (pg->next) { + pg_unuse = pg->next; + pg->next = NULL; + } /* Assign the last page to ftrace_pages */ ftrace_pages = pg; @@ -5582,6 +7299,43 @@ static int ftrace_process_locs(struct module *mod, out: mutex_unlock(&ftrace_lock); + /* We should have used all pages unless we skipped some */ + if (pg_unuse) { + unsigned long pg_remaining, remaining = 0; + unsigned long skip; + + /* Count the number of entries unused and compare it to skipped. */ + pg_remaining = (ENTRIES_PER_PAGE << pg->order) - pg->index; + + if (!WARN(skipped < pg_remaining, "Extra allocated pages for ftrace")) { + + skip = skipped - pg_remaining; + + for (pg = pg_unuse; pg; pg = pg->next) + remaining += 1 << pg->order; + + pages -= remaining; + + skip = DIV_ROUND_UP(skip, ENTRIES_PER_PAGE); + + /* + * Check to see if the number of pages remaining would + * just fit the number of entries skipped. + */ + WARN(skip != remaining, "Extra allocated pages for ftrace: %lu with %lu skipped", + remaining, skipped); + } + /* Need to synchronize with ftrace_location_range() */ + synchronize_rcu(); + ftrace_free_pages(pg_unuse); + } + + if (!mod) { + count -= skipped; + pr_info("ftrace: allocating %ld entries in %ld pages\n", + count, pages); + } + return ret; } @@ -5602,6 +7356,59 @@ struct ftrace_mod_map { unsigned int num_funcs; }; +static int ftrace_get_trampoline_kallsym(unsigned int symnum, + unsigned long *value, char *type, + char *name, char *module_name, + int *exported) +{ + struct ftrace_ops *op; + + list_for_each_entry_rcu(op, &ftrace_ops_trampoline_list, list) { + if (!op->trampoline || symnum--) + continue; + *value = op->trampoline; + *type = 't'; + strscpy(name, FTRACE_TRAMPOLINE_SYM, KSYM_NAME_LEN); + strscpy(module_name, FTRACE_TRAMPOLINE_MOD, MODULE_NAME_LEN); + *exported = 0; + return 0; + } + + return -ERANGE; +} + +#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) || defined(CONFIG_MODULES) +/* + * Check if the current ops references the given ip. + * + * If the ops traces all functions, then it was already accounted for. + * If the ops does not trace the current record function, skip it. + * If the ops ignores the function via notrace filter, skip it. + */ +static bool +ops_references_ip(struct ftrace_ops *ops, unsigned long ip) +{ + /* If ops isn't enabled, ignore it */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return false; + + /* If ops traces all then it includes this function */ + if (ops_traces_mod(ops)) + return true; + + /* The function must be in the filter */ + if (!ftrace_hash_empty(ops->func_hash->filter_hash) && + !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip)) + return false; + + /* If in notrace hash, we ignore it too */ + if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip)) + return false; + + return true; +} +#endif + #ifdef CONFIG_MODULES #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) @@ -5614,8 +7421,19 @@ static int referenced_filters(struct dyn_ftrace *rec) int cnt = 0; for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { - if (ops_references_rec(ops, rec)) - cnt++; + if (ops_references_ip(ops, rec->ip)) { + if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + continue; + cnt++; + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + rec->flags |= FTRACE_FL_REGS; + if (cnt == 1 && ops->trampoline) + rec->flags |= FTRACE_FL_TRAMP; + else + rec->flags &= ~FTRACE_FL_TRAMP; + } } return cnt; @@ -5644,7 +7462,7 @@ clear_mod_from_hash(struct ftrace_page *pg, struct ftrace_hash *hash) } } -/* Clear any records from hashs */ +/* Clear any records from hashes */ static void clear_mod_from_hashes(struct ftrace_page *pg) { struct trace_array *tr; @@ -5685,13 +7503,13 @@ void ftrace_release_mod(struct module *mod) struct ftrace_page **last_pg; struct ftrace_page *tmp_page = NULL; struct ftrace_page *pg; - int order; mutex_lock(&ftrace_lock); - if (ftrace_disabled) - goto out_unlock; - + /* + * To avoid the UAF problem after the module is unloaded, the + * 'mod_map' resource needs to be released unconditionally. + */ list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { if (mod_map->mod == mod) { list_del_rcu(&mod_map->list); @@ -5700,6 +7518,9 @@ void ftrace_release_mod(struct module *mod) } } + if (ftrace_disabled) + goto out_unlock; + /* * Each module has its own ftrace_pages, remove * them from the list. @@ -5707,8 +7528,7 @@ void ftrace_release_mod(struct module *mod) last_pg = &ftrace_pages_start; for (pg = ftrace_pages_start; pg; pg = *last_pg) { rec = &pg->records[0]; - if (within_module_core(rec->ip, mod) || - within_module_init(rec->ip, mod)) { + if (within_module(rec->ip, mod)) { /* * As core pages are first, the first * page should never be a module page. @@ -5731,15 +7551,21 @@ void ftrace_release_mod(struct module *mod) out_unlock: mutex_unlock(&ftrace_lock); + /* Need to synchronize with ftrace_location_range() */ + if (tmp_page) + synchronize_rcu(); for (pg = tmp_page; pg; pg = tmp_page) { /* Needs to be called outside of ftrace_lock */ clear_mod_from_hashes(pg); - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - free_pages((unsigned long)pg->records, order); + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } tmp_page = pg->next; kfree(pg); + ftrace_number_of_groups--; } } @@ -5756,7 +7582,7 @@ void ftrace_module_enable(struct module *mod) /* * If the tracing is enabled, go ahead and enable the record. * - * The reason not to enable the record immediatelly is the + * The reason not to enable the record immediately is the * inherent check of ftrace_make_nop/ftrace_make_call for * correct previous instructions. Making first the NOP * conversion puts the module to the correct state, thus @@ -5777,10 +7603,18 @@ void ftrace_module_enable(struct module *mod) * not part of this module, then skip this pg, * which the "break" will do. */ - if (!within_module_core(rec->ip, mod) && - !within_module_init(rec->ip, mod)) + if (!within_module(rec->ip, mod)) break; + cond_resched(); + + /* Weak functions should still be ignored */ + if (!test_for_valid_rec(rec)) { + /* Clear all other flags. Should not be enabled anyway */ + rec->flags = FTRACE_FL_DISABLED; + continue; + } + cnt = 0; /* @@ -5792,8 +7626,8 @@ void ftrace_module_enable(struct module *mod) if (ftrace_start_up) cnt += referenced_filters(rec); - /* This clears FTRACE_FL_DISABLED */ - rec->flags = cnt; + rec->flags &= ~FTRACE_FL_DISABLED; + rec->flags += cnt; if (ftrace_start_up && cnt) { int failed = __ftrace_replace_code(rec, 1); @@ -5817,11 +7651,16 @@ void ftrace_module_enable(struct module *mod) void ftrace_module_init(struct module *mod) { + int ret; + if (ftrace_disabled || !mod->num_ftrace_callsites) return; - ftrace_process_locs(mod, mod->ftrace_callsites, - mod->ftrace_callsites + mod->num_ftrace_callsites); + ret = ftrace_process_locs(mod, mod->ftrace_callsites, + mod->ftrace_callsites + mod->num_ftrace_callsites); + if (ret) + pr_warn("ftrace: failed to allocate entries for module '%s' functions\n", + mod->name); } static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, @@ -5862,6 +7701,9 @@ allocate_ftrace_mod_map(struct module *mod, { struct ftrace_mod_map *mod_map; + if (ftrace_disabled) + return NULL; + mod_map = kmalloc(sizeof(*mod_map), GFP_KERNEL); if (!mod_map) return NULL; @@ -5878,7 +7720,7 @@ allocate_ftrace_mod_map(struct module *mod, return mod_map; } -static const char * +static int ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, unsigned long addr, unsigned long *size, unsigned long *off, char *sym) @@ -5899,21 +7741,18 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, *size = found_func->size; if (off) *off = addr - found_func->ip; - if (sym) - strlcpy(sym, found_func->name, KSYM_NAME_LEN); - - return found_func->name; + return strscpy(sym, found_func->name, KSYM_NAME_LEN); } - return NULL; + return 0; } -const char * +int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { struct ftrace_mod_map *mod_map; - const char *ret = NULL; + int ret = 0; /* mod_map is freed via call_rcu() */ preempt_disable(); @@ -5936,6 +7775,7 @@ int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, { struct ftrace_mod_map *mod_map; struct ftrace_mod_func *mod_func; + int ret; preempt_disable(); list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { @@ -5953,8 +7793,8 @@ int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, *value = mod_func->ip; *type = 'T'; - strlcpy(name, mod_func->name, KSYM_NAME_LEN); - strlcpy(module_name, mod_map->mod->name, MODULE_NAME_LEN); + strscpy(name, mod_func->name, KSYM_NAME_LEN); + strscpy(module_name, mod_map->mod->name, MODULE_NAME_LEN); *exported = 1; preempt_enable(); return 0; @@ -5962,8 +7802,10 @@ int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, WARN_ON(1); break; } + ret = ftrace_get_trampoline_kallsym(symnum, value, type, name, + module_name, exported); preempt_enable(); - return -ERANGE; + return ret; } #else @@ -5975,6 +7817,18 @@ allocate_ftrace_mod_map(struct module *mod, { return NULL; } +int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, char *module_name, + int *exported) +{ + int ret; + + preempt_disable(); + ret = ftrace_get_trampoline_kallsym(symnum, value, type, name, + module_name, exported); + preempt_enable(); + return ret; +} #endif /* CONFIG_MODULES */ struct ftrace_init_func { @@ -5988,11 +7842,7 @@ clear_func_from_hash(struct ftrace_init_func *func, struct ftrace_hash *hash) { struct ftrace_func_entry *entry; - if (ftrace_hash_empty(hash)) - return; - - entry = __ftrace_lookup_ip(hash, func->ip); - + entry = ftrace_lookup_ip(hash, func->ip); /* * Do not allow this rec to match again. * Yeah, it may waste some memory, but will be removed @@ -6026,7 +7876,7 @@ static void add_to_clear_hash_list(struct list_head *clear_list, func = kmalloc(sizeof(*func), GFP_KERNEL); if (!func) { - WARN_ONCE(1, "alloc failure, ftrace filter could be stale\n"); + MEM_FAIL(1, "alloc failure, ftrace filter could be stale\n"); return; } @@ -6039,15 +7889,13 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) unsigned long start = (unsigned long)(start_ptr); unsigned long end = (unsigned long)(end_ptr); struct ftrace_page **last_pg = &ftrace_pages_start; + struct ftrace_page *tmp_page = NULL; struct ftrace_page *pg; struct dyn_ftrace *rec; struct dyn_ftrace key; struct ftrace_mod_map *mod_map = NULL; struct ftrace_init_func *func, *func_next; - struct list_head clear_hash; - int order; - - INIT_LIST_HEAD(&clear_hash); + LIST_HEAD(clear_hash); key.ip = start; key.flags = end; /* overload flags, as it is unsigned long */ @@ -6083,9 +7931,8 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) ftrace_update_tot_cnt--; if (!pg->index) { *last_pg = pg->next; - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - free_pages((unsigned long)pg->records, order); - kfree(pg); + pg->next = tmp_page; + tmp_page = pg; pg = container_of(last_pg, struct ftrace_page, next); if (!(*last_pg)) ftrace_pages = pg; @@ -6102,6 +7949,11 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) clear_func_from_hashes(func); kfree(func); } + /* Need to synchronize with ftrace_location_range() */ + if (tmp_page) { + synchronize_rcu(); + ftrace_free_pages(tmp_page); + } } void __init ftrace_free_init_mem(void) @@ -6109,9 +7961,16 @@ void __init ftrace_free_init_mem(void) void *start = (void *)(&__init_begin); void *end = (void *)(&__init_end); + ftrace_boot_snapshot(); + ftrace_free_mem(NULL, start, end); } +int __init __weak ftrace_dyn_arch_init(void) +{ + return 0; +} + void __init ftrace_init(void) { extern unsigned long __start_mcount_loc[]; @@ -6131,14 +7990,18 @@ void __init ftrace_init(void) goto failed; } - pr_info("ftrace: allocating %ld entries in %ld pages\n", - count, count / ENTRIES_PER_PAGE + 1); - - last_ftrace_enabled = ftrace_enabled = 1; - ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc); + if (ret) { + pr_warn("ftrace: failed to allocate entries for functions\n"); + goto failed; + } + + pr_info("ftrace: allocated %ld pages with %ld groups\n", + ftrace_number_of_pages, ftrace_number_of_groups); + + last_ftrace_enabled = ftrace_enabled = 1; set_ftrace_early_filters(); @@ -6154,21 +8017,42 @@ void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops) static void ftrace_update_trampoline(struct ftrace_ops *ops) { + unsigned long trampoline = ops->trampoline; + arch_ftrace_update_trampoline(ops); + if (ops->trampoline && ops->trampoline != trampoline && + (ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) { + /* Add to kallsyms before the perf events */ + ftrace_add_trampoline_to_kallsyms(ops); + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, + ops->trampoline, ops->trampoline_size, false, + FTRACE_TRAMPOLINE_SYM); + /* + * Record the perf text poke event after the ksymbol register + * event. + */ + perf_event_text_poke((void *)ops->trampoline, NULL, 0, + (void *)ops->trampoline, + ops->trampoline_size); + } } void ftrace_init_trace_array(struct trace_array *tr) { + if (tr->flags & TRACE_ARRAY_FL_MOD_INIT) + return; + INIT_LIST_HEAD(&tr->func_probes); INIT_LIST_HEAD(&tr->mod_trace); INIT_LIST_HEAD(&tr->mod_notrace); + + tr->flags |= TRACE_ARRAY_FL_MOD_INIT; } #else struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED | + .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID, }; @@ -6180,12 +8064,8 @@ static int __init ftrace_nodyn_init(void) core_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; } -static inline void ftrace_startup_enable(int command) { } static inline void ftrace_startup_all(int command) { } -# define ftrace_startup_sysctl() do { } while (0) -# define ftrace_shutdown_sysctl() do { } while (0) - static void ftrace_update_trampoline(struct ftrace_ops *ops) { } @@ -6195,8 +8075,10 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) __init void ftrace_init_global_array_ops(struct trace_array *tr) { tr->ops = &global_ops; - tr->ops->private = tr; + if (!global_ops.private) + global_ops.private = tr; ftrace_init_trace_array(tr); + init_array_fgraph_ops(tr, tr->ops); } void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) @@ -6216,29 +8098,30 @@ void ftrace_reset_array_ops(struct trace_array *tr) tr->ops->func = ftrace_stub; } -static inline void +static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ignored, struct pt_regs *regs) + struct ftrace_ops *ignored, struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); struct ftrace_ops *op; int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); - if (bit < 0) - return; - /* - * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_rcu(). + * The ftrace_test_and_set_recursion() will disable preemption, + * which is required since some of the ops may be dynamically + * allocated, they must be freed after a synchronize_rcu(). */ - preempt_disable_notrace(); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); + if (bit < 0) + return; do_for_each_ftrace_op(op, ftrace_ops_list) { + /* Stub functions don't need to be called nor tested */ + if (op->flags & FTRACE_OPS_FL_STUB) + continue; /* * Check the following for each ops before calling their func: * if RCU flag is set, then rcu_is_watching() must be true - * if PER_CPU is set, then ftrace_function_local_disable() - * must be false * Otherwise test if the ip matches the ops filter * * If any of the above fails then the op->func() is not executed. @@ -6249,11 +8132,10 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, pr_warn("op=%p %pS\n", op, op); goto out; } - op->func(ip, parent_ip, op, regs); + op->func(ip, parent_ip, op, fregs); } } while_for_each_ftrace_op(op); out: - preempt_enable_notrace(); trace_clear_recursion(bit); } @@ -6269,44 +8151,45 @@ out: * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. * An architecture can pass partial regs with ftrace_ops and still * set the ARCH_SUPPORTS_FTRACE_OPS. + * + * In vmlinux.lds.h, ftrace_ops_list_func() is defined to be + * arch_ftrace_ops_list_func. */ #if ARCH_SUPPORTS_FTRACE_OPS -static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - __ftrace_ops_list_func(ip, parent_ip, NULL, regs); + kmsan_unpoison_memory(fregs, ftrace_regs_size()); + __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } #else -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } #endif +NOKPROBE_SYMBOL(arch_ftrace_ops_list_func); /* * If there's only one function registered but it does not support - * recursion, needs RCU protection and/or requires per cpu handling, then - * this function will be called by the mcount trampoline. + * recursion, needs RCU protection, then this function will be called + * by the mcount trampoline. */ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { int bit; - if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching()) - return; - - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); if (bit < 0) return; - preempt_disable_notrace(); - - op->func(ip, parent_ip, op, regs); + if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) + op->func(ip, parent_ip, op, fregs); - preempt_enable_notrace(); trace_clear_recursion(bit); } +NOKPROBE_SYMBOL(ftrace_ops_assist_func); /** * ftrace_ops_get_func - get the function a trampoline should call @@ -6317,16 +8200,16 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, * have its own recursion protection, then it should call the * ftrace_ops_assist_func() instead. * - * Returns the function that the trampoline should call for @ops. + * Returns: the function that the trampoline should call for @ops. */ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) { /* - * If the function does not handle recursion, needs to be RCU safe, - * or does per cpu logic, then we need to call the assist handler. + * If the function does not handle recursion or needs to be RCU safe, + * then we need to call the assist handler. */ - if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) || - ops->flags & FTRACE_OPS_FL_RCU) + if (ops->flags & (FTRACE_OPS_FL_RECURSION | + FTRACE_OPS_FL_RCU)) return ftrace_ops_assist_func; return ops->func; @@ -6334,15 +8217,23 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) static void ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, - struct task_struct *prev, struct task_struct *next) + struct task_struct *prev, + struct task_struct *next, + unsigned int prev_state) { struct trace_array *tr = data; struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; pid_list = rcu_dereference_sched(tr->function_pids); + no_pid_list = rcu_dereference_sched(tr->function_no_pids); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, - trace_ignore_this_task(pid_list, next)); + if (trace_ignore_this_task(pid_list, no_pid_list, next)) + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + FTRACE_PID_IGNORE); + else + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + next->pid); } static void @@ -6355,6 +8246,9 @@ ftrace_pid_follow_sched_process_fork(void *data, pid_list = rcu_dereference_sched(tr->function_pids); trace_filter_add_remove_task(pid_list, self, task); + + pid_list = rcu_dereference_sched(tr->function_no_pids); + trace_filter_add_remove_task(pid_list, self, task); } static void @@ -6365,6 +8259,9 @@ ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task) pid_list = rcu_dereference_sched(tr->function_pids); trace_filter_add_remove_task(pid_list, NULL, task); + + pid_list = rcu_dereference_sched(tr->function_no_pids); + trace_filter_add_remove_task(pid_list, NULL, task); } void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) @@ -6372,52 +8269,67 @@ void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) if (enable) { register_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, tr); - register_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, + register_trace_sched_process_free(ftrace_pid_follow_sched_process_exit, tr); } else { unregister_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, tr); - unregister_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, + unregister_trace_sched_process_free(ftrace_pid_follow_sched_process_exit, tr); } } -static void clear_ftrace_pids(struct trace_array *tr) +static void clear_ftrace_pids(struct trace_array *tr, int type) { struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; int cpu; pid_list = rcu_dereference_protected(tr->function_pids, lockdep_is_held(&ftrace_lock)); - if (!pid_list) + no_pid_list = rcu_dereference_protected(tr->function_no_pids, + lockdep_is_held(&ftrace_lock)); + + /* Make sure there's something to do */ + if (!pid_type_enabled(type, pid_list, no_pid_list)) return; - unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); + /* See if the pids still need to be checked after this */ + if (!still_need_pid_events(type, pid_list, no_pid_list)) { + unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); + for_each_possible_cpu(cpu) + per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = FTRACE_PID_TRACE; + } - for_each_possible_cpu(cpu) - per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = false; + if (type & TRACE_PIDS) + rcu_assign_pointer(tr->function_pids, NULL); - rcu_assign_pointer(tr->function_pids, NULL); + if (type & TRACE_NO_PIDS) + rcu_assign_pointer(tr->function_no_pids, NULL); /* Wait till all users are no longer using pid filtering */ synchronize_rcu(); - trace_free_pid_list(pid_list); + if ((type & TRACE_PIDS) && pid_list) + trace_pid_list_free(pid_list); + + if ((type & TRACE_NO_PIDS) && no_pid_list) + trace_pid_list_free(no_pid_list); } void ftrace_clear_pids(struct trace_array *tr) { mutex_lock(&ftrace_lock); - clear_ftrace_pids(tr); + clear_ftrace_pids(tr, TRACE_PIDS | TRACE_NO_PIDS); mutex_unlock(&ftrace_lock); } -static void ftrace_pid_reset(struct trace_array *tr) +static void ftrace_pid_reset(struct trace_array *tr, int type) { mutex_lock(&ftrace_lock); - clear_ftrace_pids(tr); + clear_ftrace_pids(tr, type); ftrace_update_pid_func(); ftrace_startup_all(0); @@ -6450,9 +8362,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) struct trace_array *tr = m->private; struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); - if (v == FTRACE_NO_PIDS) + if (v == FTRACE_NO_PIDS) { + (*pos)++; return NULL; - + } return trace_pid_next(pid_list, v, pos); } @@ -6480,21 +8393,71 @@ static const struct seq_operations ftrace_pid_sops = { .show = fpid_show, }; -static int -ftrace_pid_open(struct inode *inode, struct file *file) +static void *fnpid_start(struct seq_file *m, loff_t *pos) + __acquires(RCU) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = m->private; + + mutex_lock(&ftrace_lock); + rcu_read_lock_sched(); + + pid_list = rcu_dereference_sched(tr->function_no_pids); + + if (!pid_list) + return !(*pos) ? FTRACE_NO_PIDS : NULL; + + return trace_pid_start(pid_list, pos); +} + +static void *fnpid_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct trace_array *tr = m->private; + struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_no_pids); + + if (v == FTRACE_NO_PIDS) { + (*pos)++; + return NULL; + } + return trace_pid_next(pid_list, v, pos); +} + +static const struct seq_operations ftrace_no_pid_sops = { + .start = fnpid_start, + .next = fnpid_next, + .stop = fpid_stop, + .show = fpid_show, +}; + +static int pid_open(struct inode *inode, struct file *file, int type) { + const struct seq_operations *seq_ops; struct trace_array *tr = inode->i_private; struct seq_file *m; int ret = 0; - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) - ftrace_pid_reset(tr); + ftrace_pid_reset(tr, type); + + switch (type) { + case TRACE_PIDS: + seq_ops = &ftrace_pid_sops; + break; + case TRACE_NO_PIDS: + seq_ops = &ftrace_no_pid_sops; + break; + default: + trace_array_put(tr); + WARN_ON_ONCE(1); + return -EINVAL; + } - ret = seq_open(file, &ftrace_pid_sops); + ret = seq_open(file, seq_ops); if (ret < 0) { trace_array_put(tr); } else { @@ -6506,10 +8469,23 @@ ftrace_pid_open(struct inode *inode, struct file *file) return ret; } +static int +ftrace_pid_open(struct inode *inode, struct file *file) +{ + return pid_open(inode, file, TRACE_PIDS); +} + +static int +ftrace_no_pid_open(struct inode *inode, struct file *file) +{ + return pid_open(inode, file, TRACE_NO_PIDS); +} + static void ignore_task_cpu(void *data) { struct trace_array *tr = data; struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; /* * This function is called by on_each_cpu() while the @@ -6517,39 +8493,69 @@ static void ignore_task_cpu(void *data) */ pid_list = rcu_dereference_protected(tr->function_pids, mutex_is_locked(&ftrace_lock)); + no_pid_list = rcu_dereference_protected(tr->function_no_pids, + mutex_is_locked(&ftrace_lock)); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, - trace_ignore_this_task(pid_list, current)); + if (trace_ignore_this_task(pid_list, no_pid_list, current)) + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + FTRACE_PID_IGNORE); + else + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + current->pid); } static ssize_t -ftrace_pid_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, int type) { struct seq_file *m = filp->private_data; struct trace_array *tr = m->private; - struct trace_pid_list *filtered_pids = NULL; + struct trace_pid_list *filtered_pids; + struct trace_pid_list *other_pids; struct trace_pid_list *pid_list; ssize_t ret; if (!cnt) return 0; - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); - filtered_pids = rcu_dereference_protected(tr->function_pids, + switch (type) { + case TRACE_PIDS: + filtered_pids = rcu_dereference_protected(tr->function_pids, + lockdep_is_held(&ftrace_lock)); + other_pids = rcu_dereference_protected(tr->function_no_pids, + lockdep_is_held(&ftrace_lock)); + break; + case TRACE_NO_PIDS: + filtered_pids = rcu_dereference_protected(tr->function_no_pids, + lockdep_is_held(&ftrace_lock)); + other_pids = rcu_dereference_protected(tr->function_pids, lockdep_is_held(&ftrace_lock)); + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); if (ret < 0) - goto out; + return ret; + + switch (type) { + case TRACE_PIDS: + rcu_assign_pointer(tr->function_pids, pid_list); + break; + case TRACE_NO_PIDS: + rcu_assign_pointer(tr->function_no_pids, pid_list); + break; + } - rcu_assign_pointer(tr->function_pids, pid_list); if (filtered_pids) { synchronize_rcu(); - trace_free_pid_list(filtered_pids); - } else if (pid_list) { + trace_pid_list_free(filtered_pids); + } else if (pid_list && !other_pids) { /* Register a probe to set whether to ignore the tracing of a task */ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); } @@ -6563,15 +8569,26 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, ftrace_update_pid_func(); ftrace_startup_all(0); - out: - mutex_unlock(&ftrace_lock); - if (ret > 0) - *ppos += ret; + *ppos += ret; return ret; } +static ssize_t +ftrace_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS); +} + +static ssize_t +ftrace_no_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS); +} + static int ftrace_pid_release(struct inode *inode, struct file *file) { @@ -6590,10 +8607,20 @@ static const struct file_operations ftrace_pid_fops = { .release = ftrace_pid_release, }; +static const struct file_operations ftrace_no_pid_fops = { + .open = ftrace_no_pid_open, + .write = ftrace_no_pid_write, + .read = seq_read, + .llseek = tracing_lseek, + .release = ftrace_pid_release, +}; + void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer) { - trace_create_file("set_ftrace_pid", 0644, d_tracer, + trace_create_file("set_ftrace_pid", TRACE_MODE_WRITE, d_tracer, tr, &ftrace_pid_fops); + trace_create_file("set_ftrace_notrace_pid", TRACE_MODE_WRITE, + d_tracer, tr, &ftrace_no_pid_fops); } void __init ftrace_init_tracefs_toplevel(struct trace_array *tr, @@ -6618,19 +8645,159 @@ void ftrace_kill(void) ftrace_disabled = 1; ftrace_enabled = 0; ftrace_trace_function = ftrace_stub; + kprobe_ftrace_kill(); } /** - * Test if ftrace is dead or not. + * ftrace_is_dead - Test if ftrace is dead or not. + * + * Returns: 1 if ftrace is "dead", zero otherwise. */ int ftrace_is_dead(void) { return ftrace_disabled; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * When registering ftrace_ops with IPMODIFY, it is necessary to make sure + * it doesn't conflict with any direct ftrace_ops. If there is existing + * direct ftrace_ops on a kernel function being patched, call + * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing. + * + * @ops: ftrace_ops being registered. + * + * Returns: + * 0 on success; + * Negative on failure. + */ +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) +{ + struct ftrace_func_entry *entry; + struct ftrace_hash *hash; + struct ftrace_ops *op; + int size, i, ret; + + lockdep_assert_held_once(&direct_mutex); + + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + return 0; + + hash = ops->func_hash->filter_hash; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + unsigned long ip = entry->ip; + bool found_op = false; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (ops_references_ip(op, ip)) { + found_op = true; + break; + } + } while_for_each_ftrace_op(op); + mutex_unlock(&ftrace_lock); + + if (found_op) { + if (!op->ops_func) + return -EBUSY; + + ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER); + if (ret) + return ret; + } + } + } + + return 0; +} + +/* + * Similar to prepare_direct_functions_for_ipmodify, clean up after ops + * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT + * ops. + */ +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) +{ + struct ftrace_func_entry *entry; + struct ftrace_hash *hash; + struct ftrace_ops *op; + int size, i; + + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + return; + + mutex_lock(&direct_mutex); + + hash = ops->func_hash->filter_hash; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + unsigned long ip = entry->ip; + bool found_op = false; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (ops_references_ip(op, ip)) { + found_op = true; + break; + } + } while_for_each_ftrace_op(op); + mutex_unlock(&ftrace_lock); + + /* The cleanup is optional, ignore any errors */ + if (found_op && op->ops_func) + op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER); + } + } + mutex_unlock(&direct_mutex); +} + +#define lock_direct_mutex() mutex_lock(&direct_mutex) +#define unlock_direct_mutex() mutex_unlock(&direct_mutex) + +#else /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) +{ + return 0; +} + +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) +{ +} + +#define lock_direct_mutex() do { } while (0) +#define unlock_direct_mutex() do { } while (0) + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +/* + * Similar to register_ftrace_function, except we don't lock direct_mutex. + */ +static int register_ftrace_function_nolock(struct ftrace_ops *ops) +{ + int ret; + + ftrace_ops_init(ops); + + mutex_lock(&ftrace_lock); + + ret = ftrace_startup(ops, 0); + + mutex_unlock(&ftrace_lock); + + return ret; +} + /** * register_ftrace_function - register a function for profiling - * @ops - ops structure that holds the function for profiling. + * @ops: ops structure that holds the function for profiling. * * Register a function to be called by all functions in the * kernel. @@ -6641,23 +8808,24 @@ int ftrace_is_dead(void) */ int register_ftrace_function(struct ftrace_ops *ops) { - int ret = -1; - - ftrace_ops_init(ops); - - mutex_lock(&ftrace_lock); + int ret; - ret = ftrace_startup(ops, 0); + lock_direct_mutex(); + ret = prepare_direct_functions_for_ipmodify(ops); + if (ret < 0) + goto out_unlock; - mutex_unlock(&ftrace_lock); + ret = register_ftrace_function_nolock(ops); +out_unlock: + unlock_direct_mutex(); return ret; } EXPORT_SYMBOL_GPL(register_ftrace_function); /** * unregister_ftrace_function - unregister a function for profiling. - * @ops - ops structure that holds the function to unregister + * @ops: ops structure that holds the function to unregister * * Unregister a function that was added to be called by ftrace profiling. */ @@ -6669,28 +8837,153 @@ int unregister_ftrace_function(struct ftrace_ops *ops) ret = ftrace_shutdown(ops, 0); mutex_unlock(&ftrace_lock); + cleanup_direct_functions_after_ipmodify(ops); return ret; } EXPORT_SYMBOL_GPL(unregister_ftrace_function); -int -ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +static int symbols_cmp(const void *a, const void *b) { - int ret = -ENODEV; + const char **str_a = (const char **) a; + const char **str_b = (const char **) b; - mutex_lock(&ftrace_lock); + return strcmp(*str_a, *str_b); +} + +struct kallsyms_data { + unsigned long *addrs; + const char **syms; + size_t cnt; + size_t found; +}; + +/* This function gets called for all kernel and module symbols + * and returns 1 in case we resolved all the requested symbols, + * 0 otherwise. + */ +static int kallsyms_callback(void *data, const char *name, unsigned long addr) +{ + struct kallsyms_data *args = data; + const char **sym; + int idx; + + sym = bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp); + if (!sym) + return 0; + + idx = sym - args->syms; + if (args->addrs[idx]) + return 0; + + if (!ftrace_location(addr)) + return 0; + + args->addrs[idx] = addr; + args->found++; + return args->found == args->cnt ? 1 : 0; +} + +/** + * ftrace_lookup_symbols - Lookup addresses for array of symbols + * + * @sorted_syms: array of symbols pointers symbols to resolve, + * must be alphabetically sorted + * @cnt: number of symbols/addresses in @syms/@addrs arrays + * @addrs: array for storing resulting addresses + * + * This function looks up addresses for array of symbols provided in + * @syms array (must be alphabetically sorted) and stores them in + * @addrs array, which needs to be big enough to store at least @cnt + * addresses. + * + * Returns: 0 if all provided symbols are found, -ESRCH otherwise. + */ +int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs) +{ + struct kallsyms_data args; + int found_all; + + memset(addrs, 0, sizeof(*addrs) * cnt); + args.addrs = addrs; + args.syms = sorted_syms; + args.cnt = cnt; + args.found = 0; + + found_all = kallsyms_on_each_symbol(kallsyms_callback, &args); + if (found_all) + return 0; + found_all = module_kallsyms_on_each_symbol(NULL, kallsyms_callback, &args); + return found_all ? 0 : -ESRCH; +} + +#ifdef CONFIG_SYSCTL + +#ifdef CONFIG_DYNAMIC_FTRACE +static void ftrace_startup_sysctl(void) +{ + int command; if (unlikely(ftrace_disabled)) - goto out; + return; + + /* Force update next time */ + saved_ftrace_func = NULL; + /* ftrace_start_up is true if we want ftrace running */ + if (ftrace_start_up) { + command = FTRACE_UPDATE_CALLS; + if (ftrace_graph_active) + command |= FTRACE_START_FUNC_RET; + ftrace_startup_enable(command); + } +} + +static void ftrace_shutdown_sysctl(void) +{ + int command; + + if (unlikely(ftrace_disabled)) + return; + + /* ftrace_start_up is true if ftrace is running */ + if (ftrace_start_up) { + command = FTRACE_DISABLE_CALLS; + if (ftrace_graph_active) + command |= FTRACE_STOP_FUNC_RET; + ftrace_run_update_code(command); + } +} +#else +# define ftrace_startup_sysctl() do { } while (0) +# define ftrace_shutdown_sysctl() do { } while (0) +#endif /* CONFIG_DYNAMIC_FTRACE */ + +static bool is_permanent_ops_registered(void) +{ + struct ftrace_ops *op; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->flags & FTRACE_OPS_FL_PERMANENT) + return true; + } while_for_each_ftrace_op(op); + + return false; +} + +static int +ftrace_enable_sysctl(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + guard(mutex)(&ftrace_lock); + + if (unlikely(ftrace_disabled)) + return -ENODEV; ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) - goto out; - - last_ftrace_enabled = !!ftrace_enabled; + return ret; if (ftrace_enabled) { @@ -6702,13 +8995,35 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, ftrace_startup_sysctl(); } else { + if (is_permanent_ops_registered()) { + ftrace_enabled = true; + return -EBUSY; + } + /* stopping ftrace calls (just send to ftrace_stub) */ ftrace_trace_function = ftrace_stub; ftrace_shutdown_sysctl(); } - out: - mutex_unlock(&ftrace_lock); - return ret; + last_ftrace_enabled = !!ftrace_enabled; + return 0; } + +static const struct ctl_table ftrace_sysctls[] = { + { + .procname = "ftrace_enabled", + .data = &ftrace_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ftrace_enable_sysctl, + }, +}; + +static int __init ftrace_sysctl_init(void) +{ + register_sysctl_init("kernel", ftrace_sysctls); + return 0; +} +late_initcall(ftrace_sysctl_init); +#endif |
