diff options
Diffstat (limited to 'kernel/tracepoint.c')
-rw-r--r-- | kernel/tracepoint.c | 407 |
1 files changed, 285 insertions, 122 deletions
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 7261fa0f5e3c..1848ce7e2976 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -15,11 +15,49 @@ #include <linux/sched/task.h> #include <linux/static_key.h> +enum tp_func_state { + TP_FUNC_0, + TP_FUNC_1, + TP_FUNC_2, + TP_FUNC_N, +}; + extern tracepoint_ptr_t __start___tracepoints_ptrs[]; extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; -DEFINE_SRCU(tracepoint_srcu); -EXPORT_SYMBOL_GPL(tracepoint_srcu); +enum tp_transition_sync { + TP_TRANSITION_SYNC_1_0_1, + TP_TRANSITION_SYNC_N_2_1, + + _NR_TP_TRANSITION_SYNC, +}; + +struct tp_transition_snapshot { + unsigned long rcu; + bool ongoing; +}; + +/* Protected by tracepoints_mutex */ +static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC]; + +static void tp_rcu_get_state(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + /* Keep the latest get_state snapshot. */ + snapshot->rcu = get_state_synchronize_rcu(); + snapshot->ongoing = true; +} + +static void tp_rcu_cond_sync(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + if (!snapshot->ongoing) + return; + cond_synchronize_rcu(snapshot->rcu); + snapshot->ongoing = false; +} /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; @@ -40,9 +78,6 @@ static LIST_HEAD(tracepoint_module_list); */ static DEFINE_MUTEX(tracepoints_mutex); -static struct rcu_head *early_probes; -static bool ok_to_free_tracepoints; - /* * Note about RCU : * It is used to delay the free of multiple probes array until a quiescent @@ -53,6 +88,12 @@ struct tp_probes { struct tracepoint_func probes[]; }; +/* Called in removal of a func but failed to allocate a new tp_funcs */ +static void tp_stub_func(void) +{ + return; +} + static inline void *allocate_probes(int count) { struct tp_probes *p = kmalloc(struct_size(p, probes, count), @@ -60,57 +101,21 @@ static inline void *allocate_probes(int count) return p == NULL ? NULL : p->probes; } -static void srcu_free_old_probes(struct rcu_head *head) -{ - kfree(container_of(head, struct tp_probes, rcu)); -} - static void rcu_free_old_probes(struct rcu_head *head) { - call_srcu(&tracepoint_srcu, head, srcu_free_old_probes); -} - -static __init int release_early_probes(void) -{ - struct rcu_head *tmp; - - ok_to_free_tracepoints = true; - - while (early_probes) { - tmp = early_probes; - early_probes = tmp->next; - call_rcu(tmp, rcu_free_old_probes); - } - - return 0; + kfree(container_of(head, struct tp_probes, rcu)); } -/* SRCU is initialized at core_initcall */ -postcore_initcall(release_early_probes); - -static inline void release_probes(struct tracepoint_func *old) +static inline void release_probes(struct tracepoint *tp, struct tracepoint_func *old) { if (old) { struct tp_probes *tp_probes = container_of(old, struct tp_probes, probes[0]); - /* - * We can't free probes if SRCU is not initialized yet. - * Postpone the freeing till after SRCU is initialized. - */ - if (unlikely(!ok_to_free_tracepoints)) { - tp_probes->rcu.next = early_probes; - early_probes = &tp_probes->rcu; - return; - } - - /* - * Tracepoint probes are protected by both sched RCU and SRCU, - * by calling the SRCU callback in the sched RCU callback we - * cover both cases. So let us chain the SRCU and sched RCU - * callbacks to wait for both grace periods. - */ - call_rcu(&tp_probes->rcu, rcu_free_old_probes); + if (tracepoint_is_faultable(tp)) + call_rcu_tasks_trace(&tp_probes->rcu, rcu_free_old_probes); + else + call_rcu(&tp_probes->rcu, rcu_free_old_probes); } } @@ -130,8 +135,9 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, int prio) { struct tracepoint_func *old, *new; - int nr_probes = 0; - int pos = -1; + int iter_probes; /* Iterate over old probe array. */ + int nr_probes = 0; /* Counter for probes */ + int pos = -1; /* Insertion position into new array */ if (WARN_ON(!tp_func->func)) return ERR_PTR(-EINVAL); @@ -140,13 +146,13 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, old = *funcs; if (old) { /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - /* Insert before probes of lower priority */ - if (pos < 0 && old[nr_probes].prio < prio) - pos = nr_probes; - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; /* Skip stub functions. */ + if (old[iter_probes].func == tp_func->func && + old[iter_probes].data == tp_func->data) return ERR_PTR(-EEXIST); + nr_probes++; } } /* + 2 : one for new probe, one for NULL func */ @@ -154,20 +160,24 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, if (new == NULL) return ERR_PTR(-ENOMEM); if (old) { - if (pos < 0) { - pos = nr_probes; - memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); - } else { - /* Copy higher priority probes ahead of the new probe */ - memcpy(new, old, pos * sizeof(struct tracepoint_func)); - /* Copy the rest after it. */ - memcpy(new + pos + 1, old + pos, - (nr_probes - pos) * sizeof(struct tracepoint_func)); + nr_probes = 0; + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; + /* Insert before probes of lower priority */ + if (pos < 0 && old[iter_probes].prio < prio) + pos = nr_probes++; + new[nr_probes++] = old[iter_probes]; } - } else + if (pos < 0) + pos = nr_probes++; + /* nr_probes now points to the end of the new array */ + } else { pos = 0; + nr_probes = 1; /* must point at end of array */ + } new[pos] = *tp_func; - new[nr_probes + 1].func = NULL; + new[nr_probes].func = NULL; *funcs = new; debug_print_probes(*funcs); return old; @@ -188,8 +198,9 @@ static void *func_remove(struct tracepoint_func **funcs, /* (N -> M), (N > 1, M >= 0) probes */ if (tp_func->func) { for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + if ((old[nr_probes].func == tp_func->func && + old[nr_probes].data == tp_func->data) || + old[nr_probes].func == tp_stub_func) nr_del++; } } @@ -208,39 +219,55 @@ static void *func_remove(struct tracepoint_func **funcs, /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ new = allocate_probes(nr_probes - nr_del + 1); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if (old[i].func != tp_func->func - || old[i].data != tp_func->data) - new[j++] = old[i]; - new[nr_probes - nr_del].func = NULL; - *funcs = new; + if (new) { + for (i = 0; old[i].func; i++) { + if ((old[i].func != tp_func->func || + old[i].data != tp_func->data) && + old[i].func != tp_stub_func) + new[j++] = old[i]; + } + new[nr_probes - nr_del].func = NULL; + *funcs = new; + } else { + /* + * Failed to allocate, replace the old function + * with calls to tp_stub_func. + */ + for (i = 0; old[i].func; i++) { + if (old[i].func == tp_func->func && + old[i].data == tp_func->data) + WRITE_ONCE(old[i].func, tp_stub_func); + } + *funcs = old; + } } debug_print_probes(*funcs); return old; } -static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync) +/* + * Count the number of functions (enum tp_func_state) in a tp_funcs array. + */ +static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs) +{ + if (!tp_funcs) + return TP_FUNC_0; + if (!tp_funcs[1].func) + return TP_FUNC_1; + if (!tp_funcs[2].func) + return TP_FUNC_2; + return TP_FUNC_N; /* 3 or more */ +} + +static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) { void *func = tp->iterator; /* Synthetic events do not have static call sites */ if (!tp->static_call_key) return; - - if (!tp_funcs[1].func) { + if (nr_func_state(tp_funcs) == TP_FUNC_1) func = tp_funcs[0].func; - /* - * If going from the iterator back to a single caller, - * we need to synchronize with __DO_TRACE to make sure - * that the data passed to the callback is the one that - * belongs to that callback. - */ - if (sync) - tracepoint_synchronize_unregister(); - } - __static_call_update(tp->static_call_key, tp->static_call_tramp, func); } @@ -248,13 +275,14 @@ static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func * Add the probe function to a tracepoint. */ static int tracepoint_add_func(struct tracepoint *tp, - struct tracepoint_func *func, int prio) + struct tracepoint_func *func, int prio, + bool warn) { struct tracepoint_func *old, *tp_funcs; int ret; - if (tp->regfunc && !static_key_enabled(&tp->key)) { - ret = tp->regfunc(); + if (tp->ext && tp->ext->regfunc && !static_key_enabled(&tp->key)) { + ret = tp->ext->regfunc(); if (ret < 0) return ret; } @@ -263,7 +291,7 @@ static int tracepoint_add_func(struct tracepoint *tp, lockdep_is_held(&tracepoints_mutex)); old = func_add(&tp_funcs, func, prio); if (IS_ERR(old)) { - WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); + WARN_ON_ONCE(warn && PTR_ERR(old) != -ENOMEM); return PTR_ERR(old); } @@ -273,11 +301,43 @@ static int tracepoint_add_func(struct tracepoint *tp, * a pointer to it. This array is referenced by __DO_TRACE from * include/linux/tracepoint.h using rcu_dereference_sched(). */ - rcu_assign_pointer(tp->funcs, tp_funcs); - tracepoint_update_call(tp, tp_funcs, false); - static_key_enable(&tp->key); + switch (nr_func_state(tp_funcs)) { + case TP_FUNC_1: /* 0->1 */ + /* + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. + */ + tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1); + /* Set static call to first function */ + tracepoint_update_call(tp, tp_funcs); + /* Both iterator and static call handle NULL tp->funcs */ + rcu_assign_pointer(tp->funcs, tp_funcs); + static_branch_enable(&tp->key); + break; + case TP_FUNC_2: /* 1->2 */ + /* Set iterator static call */ + tracepoint_update_call(tp, tp_funcs); + /* + * Iterator callback installed before updating tp->funcs. + * Requires ordering between RCU assign/dereference and + * static call update/call. + */ + fallthrough; + case TP_FUNC_N: /* N->N+1 (N>1) */ + rcu_assign_pointer(tp->funcs, tp_funcs); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>1) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); + break; + default: + WARN_ON_ONCE(1); + break; + } - release_probes(old); + release_probes(tp, old); return 0; } @@ -295,28 +355,90 @@ static int tracepoint_remove_func(struct tracepoint *tp, tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); - if (IS_ERR(old)) { - WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); + if (WARN_ON_ONCE(IS_ERR(old))) return PTR_ERR(old); - } - if (!tp_funcs) { - /* Removed last function */ - if (tp->unregfunc && static_key_enabled(&tp->key)) - tp->unregfunc(); + if (tp_funcs == old) + /* Failed allocating new tp_funcs, replaced func with stub */ + return 0; - static_key_disable(&tp->key); + switch (nr_func_state(tp_funcs)) { + case TP_FUNC_0: /* 1->0 */ + /* Removed last function */ + if (tp->ext && tp->ext->unregfunc && static_key_enabled(&tp->key)) + tp->ext->unregfunc(); + static_branch_disable(&tp->key); + /* Set iterator static call */ + tracepoint_update_call(tp, tp_funcs); + /* Both iterator and static call handle NULL tp->funcs */ + rcu_assign_pointer(tp->funcs, NULL); + /* + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. + */ + tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1); + break; + case TP_FUNC_1: /* 2->1 */ rcu_assign_pointer(tp->funcs, tp_funcs); - } else { + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. If the first + * element's data has changed, then force the synchronization + * to prevent current readers that have loaded the old data + * from calling the new function. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); + tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1); + /* Set static call to first function */ + tracepoint_update_call(tp, tp_funcs); + break; + case TP_FUNC_2: /* N->N-1 (N>2) */ + fallthrough; + case TP_FUNC_N: rcu_assign_pointer(tp->funcs, tp_funcs); - tracepoint_update_call(tp, tp_funcs, - tp_funcs[0].func != old[0].func); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); + break; + default: + WARN_ON_ONCE(1); + break; } - release_probes(old); + release_probes(tp, old); return 0; } /** + * tracepoint_probe_register_prio_may_exist - Connect a probe to a tracepoint with priority + * @tp: tracepoint + * @probe: probe handler + * @data: tracepoint data + * @prio: priority of this function over other registered functions + * + * Same as tracepoint_probe_register_prio() except that it will not warn + * if the tracepoint is already registered. + */ +int tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, + void *data, int prio) +{ + struct tracepoint_func tp_func; + int ret; + + mutex_lock(&tracepoints_mutex); + tp_func.func = probe; + tp_func.data = data; + tp_func.prio = prio; + ret = tracepoint_add_func(tp, &tp_func, prio, false); + mutex_unlock(&tracepoints_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio_may_exist); + +/** * tracepoint_probe_register_prio - Connect a probe to a tracepoint with priority * @tp: tracepoint * @probe: probe handler @@ -339,7 +461,7 @@ int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, tp_func.func = probe; tp_func.data = data; tp_func.prio = prio; - ret = tracepoint_add_func(tp, &tp_func, prio); + ret = tracepoint_add_func(tp, &tp_func, prio, true); mutex_unlock(&tracepoints_mutex); return ret; } @@ -402,13 +524,14 @@ static void for_each_tracepoint_range( bool trace_module_has_bad_taint(struct module *mod) { return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) | - (1 << TAINT_UNSIGNED_MODULE)); + (1 << TAINT_UNSIGNED_MODULE) | (1 << TAINT_TEST) | + (1 << TAINT_LIVEPATCH)); } static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list); /** - * register_tracepoint_notifier - register tracepoint coming/going notifier + * register_tracepoint_module_notifier - register tracepoint coming/going notifier * @nb: notifier block * * Notifiers registered with this function are called on module @@ -434,7 +557,7 @@ end: EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier); /** - * unregister_tracepoint_notifier - unregister tracepoint coming/going notifier + * unregister_tracepoint_module_notifier - unregister tracepoint coming/going notifier * @nb: notifier block * * The notifier block callback should expect a "struct tp_module" data @@ -470,7 +593,6 @@ static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv) static int tracepoint_module_coming(struct module *mod) { struct tp_module *tp_mod; - int ret = 0; if (!mod->num_tracepoints) return 0; @@ -478,23 +600,22 @@ static int tracepoint_module_coming(struct module *mod) /* * We skip modules that taint the kernel, especially those with different * module headers (for forced load), to make sure we don't cause a crash. - * Staging, out-of-tree, and unsigned GPL modules are fine. + * Staging, out-of-tree, unsigned GPL, and test modules are fine. */ if (trace_module_has_bad_taint(mod)) return 0; - mutex_lock(&tracepoint_module_list_mutex); + tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); - if (!tp_mod) { - ret = -ENOMEM; - goto end; - } + if (!tp_mod) + return -ENOMEM; tp_mod->mod = mod; + + mutex_lock(&tracepoint_module_list_mutex); list_add_tail(&tp_mod->list, &tracepoint_module_list); blocking_notifier_call_chain(&tracepoint_notify_list, MODULE_STATE_COMING, tp_mod); -end: mutex_unlock(&tracepoint_module_list_mutex); - return ret; + return 0; } static void tracepoint_module_going(struct module *mod) @@ -567,6 +688,48 @@ static __init int init_tracepoints(void) return ret; } __initcall(init_tracepoints); + +/** + * for_each_tracepoint_in_module - iteration on all tracepoints in a module + * @mod: module + * @fct: callback + * @priv: private data + */ +void for_each_tracepoint_in_module(struct module *mod, + void (*fct)(struct tracepoint *tp, + struct module *mod, void *priv), + void *priv) +{ + tracepoint_ptr_t *begin, *end, *iter; + + lockdep_assert_held(&tracepoint_module_list_mutex); + + if (!mod) + return; + + begin = mod->tracepoints_ptrs; + end = mod->tracepoints_ptrs + mod->num_tracepoints; + + for (iter = begin; iter < end; iter++) + fct(tracepoint_ptr_deref(iter), mod, priv); +} + +/** + * for_each_module_tracepoint - iteration on all tracepoints in all modules + * @fct: callback + * @priv: private data + */ +void for_each_module_tracepoint(void (*fct)(struct tracepoint *tp, + struct module *mod, void *priv), + void *priv) +{ + struct tp_module *tp_mod; + + mutex_lock(&tracepoint_module_list_mutex); + list_for_each_entry(tp_mod, &tracepoint_module_list, list) + for_each_tracepoint_in_module(tp_mod->mod, fct, priv); + mutex_unlock(&tracepoint_module_list_mutex); +} #endif /* CONFIG_MODULES */ /** |