diff options
Diffstat (limited to 'kernel/trace/trace.h')
| -rw-r--r-- | kernel/trace/trace.h | 583 |
1 files changed, 430 insertions, 153 deletions
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4eb6d6b97a9f..b6d42fe06115 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -21,11 +21,13 @@ #include <linux/workqueue.h> #include <linux/ctype.h> #include <linux/once_lite.h> +#include <linux/ftrace_regs.h> +#include <linux/llist.h> #include "pid_list.h" #ifdef CONFIG_FTRACE_SYSCALLS -#include <asm/unistd.h> /* For NR_SYSCALLS */ +#include <asm/unistd.h> /* For NR_syscalls */ #include <asm/syscall.h> /* some archs define it here */ #endif @@ -46,6 +48,7 @@ enum trace_type { TRACE_BRANCH, TRACE_GRAPH_RET, TRACE_GRAPH_ENT, + TRACE_GRAPH_RETADDR_ENT, TRACE_USER_STACK, TRACE_BLK, TRACE_BPUTS, @@ -77,6 +80,16 @@ enum trace_type { #undef __array #define __array(type, item, size) type item[size]; +/* + * For backward compatibility, older user space expects to see the + * kernel_stack event with a fixed size caller field. But today the fix + * size is ignored by the kernel, and the real structure is dynamic. + * Expose to user space: "unsigned long caller[8];" but the real structure + * will be "unsigned long caller[] __counted_by(size)" + */ +#undef __stack_array +#define __stack_array(type, item, size, field) type item[] __counted_by(field); + #undef __array_desc #define __array_desc(type, container, item, size) @@ -113,6 +126,14 @@ enum trace_type { #define MEM_FAIL(condition, fmt, ...) \ DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__) +#define FAULT_STRING "(fault)" + +#define HIST_STACKTRACE_DEPTH 16 +#define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long)) +#define HIST_STACKTRACE_SKIP 5 + +#define SYSCALL_FAULT_USER_MAX 165 + /* * syscalls are special, and need special handling, this is why * they are not included in trace_entries.h @@ -144,6 +165,17 @@ struct kretprobe_trace_entry_head { unsigned long ret_ip; }; +struct fentry_trace_entry_head { + struct trace_entry ent; + unsigned long ip; +}; + +struct fexit_trace_entry_head { + struct trace_entry ent; + unsigned long func; + unsigned long ret_ip; +}; + #define TRACE_BUF_SIZE 1024 struct trace_array; @@ -154,8 +186,7 @@ struct trace_array; * the trace, etc.) */ struct trace_array_cpu { - atomic_t disabled; - void *buffer_page; /* ring buffer spare */ + local_t disabled; unsigned long entries; unsigned long saved_latency; @@ -188,7 +219,7 @@ struct array_buffer { int cpu; }; -#define TRACE_FLAGS_MAX_SIZE 32 +#define TRACE_FLAGS_MAX_SIZE 64 struct trace_options { struct tracer *tracer; @@ -284,6 +315,11 @@ struct trace_func_repeats { u64 ts_last_call; }; +struct trace_module_delta { + struct rcu_head rcu; + long delta[]; +}; + /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. @@ -307,8 +343,8 @@ struct trace_array { */ struct array_buffer max_buffer; bool allocated_snapshot; -#endif -#ifdef CONFIG_TRACER_MAX_TRACE + spinlock_t snapshot_trigger_lock; + unsigned int snapshot; unsigned long max_latency; #ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; @@ -316,6 +352,18 @@ struct trace_array { struct irq_work fsnotify_irqwork; #endif #endif + /* The below is for memory mapped ring buffer */ + unsigned int mapped; + unsigned long range_addr_start; + unsigned long range_addr_size; + char *range_name; + long text_delta; + struct trace_module_delta *module_delta; + void *scratch; /* pointer in persistent memory */ + int scratch_size; + + int buffer_disabled; + struct trace_pid_list __rcu *filtered_pids; struct trace_pid_list __rcu *filtered_no_pids; /* @@ -332,12 +380,11 @@ struct trace_array { * CONFIG_TRACER_MAX_TRACE. */ arch_spinlock_t max_lock; - int buffer_disabled; #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; - struct trace_event_file __rcu *enter_syscall_files[NR_syscalls]; - struct trace_event_file __rcu *exit_syscall_files[NR_syscalls]; + struct trace_event_file *enter_syscall_files[NR_syscalls]; + struct trace_event_file *exit_syscall_files[NR_syscalls]; #endif int stop_count; int clock_id; @@ -346,26 +393,38 @@ struct trace_array { int buffer_percent; unsigned int n_err_log_entries; struct tracer *current_trace; - unsigned int trace_flags; + struct tracer_flags *current_trace_flags; + u64 trace_flags; unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; + const char *system_names; struct list_head err_log; struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; - struct dentry *event_dir; + struct eventfs_inode *event_dir; struct trace_options *topts; struct list_head systems; struct list_head events; + struct list_head marker_list; + struct list_head tracers; struct trace_event_file *trace_marker_file; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ + /* one per_cpu trace_pipe can be opened by only one user */ + cpumask_var_t pipe_cpumask; int ref; int trace_ref; +#ifdef CONFIG_MODULES + struct list_head mod_events; +#endif #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; struct trace_pid_list __rcu *function_no_pids; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + struct fgraph_ops *gops; +#endif #ifdef CONFIG_DYNAMIC_FTRACE /* All of these are protected by the ftrace_lock */ struct list_head func_probes; @@ -376,17 +435,37 @@ struct trace_array { int function_enabled; #endif int no_filter_buffering_ref; + unsigned int syscall_buf_sz; struct list_head hist_vars; #ifdef CONFIG_TRACER_SNAPSHOT struct cond_snapshot *cond_snapshot; #endif struct trace_func_repeats __percpu *last_func_repeats; + /* + * On boot up, the ring buffer is set to the minimum size, so that + * we do not waste memory on systems that are not using tracing. + */ + bool ring_buffer_expanded; }; enum { - TRACE_ARRAY_FL_GLOBAL = (1 << 0) + TRACE_ARRAY_FL_GLOBAL = BIT(0), + TRACE_ARRAY_FL_BOOT = BIT(1), + TRACE_ARRAY_FL_LAST_BOOT = BIT(2), + TRACE_ARRAY_FL_MOD_INIT = BIT(3), + TRACE_ARRAY_FL_MEMMAP = BIT(4), + TRACE_ARRAY_FL_VMALLOC = BIT(5), }; +#ifdef CONFIG_MODULES +bool module_exists(const char *module); +#else +static inline bool module_exists(const char *module) +{ + return false; +} +#endif + extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; @@ -402,6 +481,8 @@ extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); extern bool trace_clock_in_ns(struct trace_array *tr); +extern unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr); + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. @@ -466,6 +547,8 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ TRACE_GRAPH_ENT); \ + IF_ASSIGN(var, ent, struct fgraph_retaddr_ent_entry,\ + TRACE_GRAPH_RETADDR_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct func_repeats_entry, \ @@ -555,9 +638,10 @@ struct tracer { u32 old_flags, u32 bit, int set); /* Return 0 if OK with change, else return non-zero */ int (*flag_changed)(struct trace_array *tr, - u32 mask, int set); + u64 mask, int set); struct tracer *next; struct tracer_flags *flags; + struct tracer_flags *default_flags; int enabled; bool print_max; bool allow_instances; @@ -577,21 +661,41 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); -void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); void tracing_reset_all_online_cpus_unlocked(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); +int tracing_release_generic_tr(struct inode *inode, struct file *file); +int tracing_open_file_tr(struct inode *inode, struct file *filp); +int tracing_release_file_tr(struct inode *inode, struct file *filp); +int tracing_single_release_file_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); void tracer_tracing_off(struct trace_array *tr); +void tracer_tracing_disable(struct trace_array *tr); +void tracer_tracing_enable(struct trace_array *tr); struct dentry *trace_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); + +/** + * tracer_tracing_is_on_cpu - show real state of ring buffer enabled on for a cpu + * @tr : the trace array to know if ring buffer is enabled + * @cpu: The cpu buffer to check if enabled + * + * Shows real state of the per CPU buffer if it is enabled or not. + */ +static inline bool tracer_tracing_is_on_cpu(struct trace_array *tr, int cpu) +{ + if (tr->array_buffer.buffer) + return ring_buffer_record_is_on_cpu(tr->array_buffer.buffer, cpu); + return false; +} + int tracing_init_dentry(void); struct ring_buffer_event; @@ -602,6 +706,8 @@ trace_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long len, unsigned int trace_ctx); +int ring_buffer_meta_seq_init(struct file *file, struct trace_buffer *buffer, int cpu); + struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -613,8 +719,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, bool trace_is_tracepoint_string(const char *str); const char *trace_event_format(struct trace_iterator *iter, const char *fmt); -void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, - va_list ap) __printf(2, 0); +char *trace_iter_expand_format(struct trace_iterator *iter); +bool ignore_event(struct trace_iterator *iter); int trace_empty(struct trace_iterator *iter); @@ -630,7 +736,8 @@ unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned int trace_ctx); + unsigned int trace_ctx, + struct ftrace_regs *regs); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, @@ -639,9 +746,10 @@ void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); -void trace_graph_return(struct ftrace_graph_ret *trace); -int trace_graph_entry(struct ftrace_graph_ent *trace); -void set_graph_array(struct trace_array *tr); +void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops, + struct ftrace_regs *fregs); +int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, + struct ftrace_regs *fregs); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); @@ -664,8 +772,6 @@ extern unsigned long tracing_thresh; /* PID filtering */ -extern int pid_max; - bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid); bool trace_ignore_this_task(struct trace_pid_list *filtered_pids, @@ -677,7 +783,6 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos); void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos); int trace_pid_show(struct seq_file *m, void *v); -void trace_free_pid_list(struct trace_pid_list *pid_list); int trace_pid_write(struct trace_pid_list *filtered_pids, struct trace_pid_list **new_pid_list, const char __user *ubuf, size_t cnt); @@ -718,10 +823,14 @@ extern void trace_find_cmdline(int pid, char comm[]); extern int trace_find_tgid(int pid); extern void trace_event_follow_fork(struct trace_array *tr, bool enable); +extern int trace_events_enabled(struct trace_array *tr, const char *system); + #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; extern unsigned long ftrace_number_of_pages; extern unsigned long ftrace_number_of_groups; +extern u64 ftrace_update_time; +extern u64 ftrace_total_mod_time; void ftrace_init_trace_array(struct trace_array *tr); #else static inline void ftrace_init_trace_array(struct trace_array *tr) { } @@ -731,7 +840,7 @@ extern int DYN_FTRACE_TEST_NAME(void); #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 extern int DYN_FTRACE_TEST_NAME2(void); -extern bool ring_buffer_expanded; +extern void trace_set_ring_buffer_expanded(struct trace_array *tr); extern bool tracing_selftest_disabled; #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -769,13 +878,15 @@ static inline void __init disable_tracing_selftest(const char *reason) extern void *head_page(struct trace_array_cpu *data); extern unsigned long long ns2usecs(u64 nsec); -extern int -trace_vbprintk(unsigned long ip, const char *fmt, va_list args); -extern int -trace_vprintk(unsigned long ip, const char *fmt, va_list args); -extern int -trace_array_vprintk(struct trace_array *tr, - unsigned long ip, const char *fmt, va_list args); + +__printf(2, 0) +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); +__printf(2, 0) +int trace_vprintk(unsigned long ip, const char *fmt, va_list args); +__printf(3, 0) +int trace_array_vprintk(struct trace_array *tr, + unsigned long ip, const char *fmt, va_list args); +__printf(3, 4) int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); @@ -827,11 +938,13 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) #define TRACE_GRAPH_PRINT_TAIL 0x100 #define TRACE_GRAPH_SLEEP_TIME 0x200 #define TRACE_GRAPH_GRAPH_TIME 0x400 +#define TRACE_GRAPH_PRINT_RETVAL 0x800 +#define TRACE_GRAPH_PRINT_RETVAL_HEX 0x1000 +#define TRACE_GRAPH_PRINT_RETADDR 0x2000 +#define TRACE_GRAPH_ARGS 0x4000 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) -extern void ftrace_graph_sleep_time_control(bool enable); - #ifdef CONFIG_FUNCTION_PROFILER extern void ftrace_graph_graph_time_control(bool enable); #else @@ -848,15 +961,69 @@ extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned int trace_ctx); +extern int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr, + struct ftrace_regs *fregs); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, - unsigned int trace_ctx); + unsigned int trace_ctx, + u64 calltime, u64 rettime); + +extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops); +extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops); +extern void free_fgraph_ops(struct trace_array *tr); + +enum { + TRACE_GRAPH_FL = 1, + + /* + * In the very unlikely case that an interrupt came in + * at a start of graph tracing, and we want to trace + * the function in that interrupt, the depth can be greater + * than zero, because of the preempted start of a previous + * trace. In an even more unlikely case, depth could be 2 + * if a softirq interrupted the start of graph tracing, + * followed by an interrupt preempting a start of graph + * tracing in the softirq, and depth can even be 3 + * if an NMI came in at the start of an interrupt function + * that preempted a softirq start of a function that + * preempted normal context!!!! Luckily, it can't be + * greater than 3, so the next two bits are a mask + * of what the depth is when we set TRACE_GRAPH_FL + */ + + TRACE_GRAPH_DEPTH_START_BIT, + TRACE_GRAPH_DEPTH_END_BIT, + + /* + * To implement set_graph_notrace, if this bit is set, we ignore + * function graph tracing of called functions, until the return + * function is called to clear it. + */ + TRACE_GRAPH_NOTRACE_BIT, +}; + +#define TRACE_GRAPH_NOTRACE (1 << TRACE_GRAPH_NOTRACE_BIT) + +static inline unsigned long ftrace_graph_depth(unsigned long *task_var) +{ + return (*task_var >> TRACE_GRAPH_DEPTH_START_BIT) & 3; +} + +static inline void ftrace_graph_set_depth(unsigned long *task_var, int depth) +{ + *task_var &= ~(3 << TRACE_GRAPH_DEPTH_START_BIT); + *task_var |= (depth & 3) << TRACE_GRAPH_DEPTH_START_BIT; +} #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; -static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) +static inline int +ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; @@ -878,13 +1045,12 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) } if (ftrace_lookup_ip(hash, addr)) { - /* * This needs to be cleared on the return functions * when the depth is zero. */ - trace_recursion_set(TRACE_GRAPH_BIT); - trace_recursion_set_depth(trace->depth); + *task_var |= TRACE_GRAPH_FL; + ftrace_graph_set_depth(task_var, trace->depth); /* * If no irqs are to be traced, but a set_graph_function @@ -903,11 +1069,14 @@ out: return ret; } -static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) +static inline void +ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace) { - if (trace_recursion_test(TRACE_GRAPH_BIT) && - trace->depth == trace_recursion_depth()) - trace_recursion_clear(TRACE_GRAPH_BIT); + unsigned long *task_var = fgraph_get_task_var(gops); + + if ((*task_var & TRACE_GRAPH_FL) && + trace->depth == ftrace_graph_depth(task_var)) + *task_var &= ~TRACE_GRAPH_FL; } static inline int ftrace_graph_notrace_addr(unsigned long addr) @@ -933,7 +1102,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) return ret; } #else -static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) +static inline int ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace) { return 1; } @@ -942,27 +1111,39 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) { return 0; } -static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) +static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace) { } #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; +extern int fgraph_no_sleep_time; +extern bool fprofile_no_sleep_time; -static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) +static inline bool +ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace) { + unsigned long *task_var = fgraph_get_task_var(gops); + /* trace it when it is-nested-in or is a function enabled. */ - return !(trace_recursion_test(TRACE_GRAPH_BIT) || - ftrace_graph_addr(trace)) || + return !((*task_var & TRACE_GRAPH_FL) || + ftrace_graph_addr(task_var, trace)) || (trace->depth < 0) || (fgraph_max_depth && trace->depth >= fgraph_max_depth); } +void fgraph_init_ops(struct ftrace_ops *dst_ops, + struct ftrace_ops *src_ops); + #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } +static inline void free_fgraph_ops(struct trace_array *tr) { } +/* ftrace_ops may not be defined */ +#define init_array_fgraph_ops(tr, ops) do { } while (0) +#define allocate_fgraph_ops(tr, ops) ({ 0; }) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ extern struct list_head ftrace_pids; @@ -981,11 +1162,6 @@ struct ftrace_func_command { char *params, int enable); }; extern bool ftrace_filter_param __initdata; -static inline int ftrace_trace_task(struct trace_array *tr) -{ - return this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid) != - FTRACE_PID_IGNORE; -} extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent); @@ -993,6 +1169,7 @@ void ftrace_destroy_function_files(struct trace_array *tr); int ftrace_allocate_ftrace_ops(struct trace_array *tr); void ftrace_free_ftrace_ops(struct trace_array *tr); void ftrace_init_global_array_ops(struct trace_array *tr); +struct trace_array *trace_get_global_array(void); void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func); void ftrace_reset_array_ops(struct trace_array *tr); void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer); @@ -1002,10 +1179,6 @@ void ftrace_clear_pids(struct trace_array *tr); int init_function_trace(void); void ftrace_pid_follow_fork(struct trace_array *tr, bool enable); #else -static inline int ftrace_trace_task(struct trace_array *tr) -{ - return 1; -} static inline int ftrace_is_dead(void) { return 0; } static inline int ftrace_create_function_files(struct trace_array *tr, @@ -1118,6 +1291,7 @@ bool ftrace_event_is_function(struct trace_event_call *call); */ struct trace_parser { bool cont; + bool fail; char *buffer; unsigned idx; unsigned size; @@ -1125,7 +1299,7 @@ struct trace_parser { static inline bool trace_parser_loaded(struct trace_parser *parser) { - return (parser->idx != 0); + return !parser->fail && parser->idx != 0; } static inline bool trace_parser_cont(struct trace_parser *parser) @@ -1139,6 +1313,11 @@ static inline void trace_parser_clear(struct trace_parser *parser) parser->idx = 0; } +static inline void trace_parser_fail(struct trace_parser *parser) +{ + parser->fail = true; +} + extern int trace_parser_get_init(struct trace_parser *parser, int size); extern void trace_parser_put(struct trace_parser *parser); extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, @@ -1165,11 +1344,11 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, # define FUNCTION_FLAGS \ C(FUNCTION, "function-trace"), \ C(FUNC_FORK, "function-fork"), -# define FUNCTION_DEFAULT_FLAGS TRACE_ITER_FUNCTION +# define FUNCTION_DEFAULT_FLAGS TRACE_ITER(FUNCTION) #else # define FUNCTION_FLAGS # define FUNCTION_DEFAULT_FLAGS 0UL -# define TRACE_ITER_FUNC_FORK 0UL +# define TRACE_ITER_FUNC_FORK_BIT -1 #endif #ifdef CONFIG_STACKTRACE @@ -1179,6 +1358,24 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, # define STACK_FLAGS #endif +#ifdef CONFIG_FUNCTION_PROFILER +# define PROFILER_FLAGS \ + C(PROF_TEXT_OFFSET, "prof-text-offset"), +# ifdef CONFIG_FUNCTION_GRAPH_TRACER +# define FPROFILE_FLAGS \ + C(GRAPH_TIME, "graph-time"), +# define FPROFILE_DEFAULT_FLAGS TRACE_ITER(GRAPH_TIME) +# else +# define FPROFILE_FLAGS +# define FPROFILE_DEFAULT_FLAGS 0UL +# endif +#else +# define PROFILER_FLAGS +# define FPROFILE_FLAGS +# define FPROFILE_DEFAULT_FLAGS 0UL +# define TRACE_ITER_PROF_TEXT_OFFSET_BIT -1 +#endif + /* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. @@ -1195,6 +1392,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(HEX, "hex"), \ C(BIN, "bin"), \ C(BLOCK, "block"), \ + C(FIELDS, "fields"), \ C(PRINTK, "trace_printk"), \ C(ANNOTATE, "annotate"), \ C(USERSTACKTRACE, "userstacktrace"), \ @@ -1209,12 +1407,16 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(IRQ_INFO, "irq-info"), \ C(MARKERS, "markers"), \ C(EVENT_FORK, "event-fork"), \ + C(TRACE_PRINTK, "trace_printk_dest"), \ + C(COPY_MARKER, "copy_trace_marker"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ - BRANCH_FLAGS + BRANCH_FLAGS \ + PROFILER_FLAGS \ + FPROFILE_FLAGS /* * By defining C, we can make TRACE_FLAGS a list of bit names @@ -1230,20 +1432,17 @@ enum trace_iterator_bits { }; /* - * By redefining C, we can make TRACE_FLAGS a list of masks that - * use the bits as defined above. + * And use TRACE_ITER(flag) to define the bit masks. */ -#undef C -#define C(a, b) TRACE_ITER_##a = (1 << TRACE_ITER_##a##_BIT) - -enum trace_iterator_flags { TRACE_FLAGS }; +#define TRACE_ITER(flag) \ + (TRACE_ITER_##flag##_BIT < 0 ? 0 : 1ULL << (TRACE_ITER_##flag##_BIT)) /* * TRACE_ITER_SYM_MASK masks the options in trace_flags that * control the output of kernel symbols. */ #define TRACE_ITER_SYM_MASK \ - (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) + (TRACE_ITER(PRINT_PARENT)|TRACE_ITER(SYM_OFFSET)|TRACE_ITER(SYM_ADDR)) extern struct tracer nop_trace; @@ -1252,7 +1451,7 @@ extern int enable_branch_tracing(struct trace_array *tr); extern void disable_branch_tracing(void); static inline int trace_branch_enable(struct trace_array *tr) { - if (tr->trace_flags & TRACE_ITER_BRANCH) + if (tr->trace_flags & TRACE_ITER(BRANCH)) return enable_branch_tracing(tr); return 0; } @@ -1272,7 +1471,15 @@ static inline void trace_branch_disable(void) #endif /* CONFIG_BRANCH_TRACER */ /* set ring buffers to default size if not already done so */ -int tracing_update_buffers(void); +int tracing_update_buffers(struct trace_array *tr); + +union trace_synth_field { + u8 as_u8; + u16 as_u16; + u32 as_u32; + u64 as_u64; + struct trace_dynamic_info as_dynamic; +}; struct ftrace_event_field { struct list_head link; @@ -1281,7 +1488,9 @@ struct ftrace_event_field { int filter_type; int offset; int size; - int is_signed; + unsigned int is_signed:1; + unsigned int needs_test:1; + int len; }; struct prog_entry; @@ -1302,15 +1511,11 @@ struct trace_subsystem_dir { struct list_head list; struct event_subsystem *subsystem; struct trace_array *tr; - struct dentry *entry; + struct eventfs_inode *ei; int ref_count; int nr_events; }; -extern int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct trace_buffer *buffer, - struct ring_buffer_event *event); - void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, @@ -1325,11 +1530,40 @@ static inline void trace_buffer_unlock_commit(struct trace_array *tr, trace_buffer_unlock_commit_regs(tr, buffer, event, trace_ctx, NULL); } +DECLARE_PER_CPU(bool, trace_taskinfo_save); +int trace_save_cmdline(struct task_struct *tsk); +int trace_create_savedcmd(void); +int trace_alloc_tgid_map(void); +void trace_free_saved_cmdlines_buffer(void); + +extern const struct file_operations tracing_saved_cmdlines_fops; +extern const struct file_operations tracing_saved_tgids_fops; +extern const struct file_operations tracing_saved_cmdlines_size_fops; + DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); DECLARE_PER_CPU(int, trace_buffered_event_cnt); void trace_buffered_event_disable(void); void trace_buffered_event_enable(void); +void early_enable_events(struct trace_array *tr, char *buf, bool disable_first); + +struct trace_user_buf; +struct trace_user_buf_info { + struct trace_user_buf __percpu *tbuf; + size_t size; + int ref; +}; + +typedef int (*trace_user_buf_copy)(char *dst, const char __user *src, + size_t size, void *data); +int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size); +int trace_user_fault_get(struct trace_user_buf_info *tinfo); +int trace_user_fault_put(struct trace_user_buf_info *tinfo); +void trace_user_fault_destroy(struct trace_user_buf_info *tinfo); +char *trace_user_fault_read(struct trace_user_buf_info *tinfo, + const char __user *ptr, size_t size, + trace_user_buf_copy copy_func, void *data); + static inline void __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) @@ -1510,6 +1744,29 @@ static inline void *event_file_data(struct file *filp) extern struct mutex event_mutex; extern struct list_head ftrace_events; +/* + * When the trace_event_file is the filp->i_private pointer, + * it must be taken under the event_mutex lock, and then checked + * if the EVENT_FILE_FL_FREED flag is set. If it is, then the + * data pointed to by the trace_event_file can not be trusted. + * + * Use the event_file_file() to access the trace_event_file from + * the filp the first time under the event_mutex and check for + * NULL. If it is needed to be retrieved again and the event_mutex + * is still held, then the event_file_data() can be used and it + * is guaranteed to be valid. + */ +static inline struct trace_event_file *event_file_file(struct file *filp) +{ + struct trace_event_file *file; + + lockdep_assert_held(&event_mutex); + file = READ_ONCE(file_inode(filp)->i_private); + if (!file || file->flags & EVENT_FILE_FL_FREED) + return NULL; + return file; +} + extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; @@ -1528,13 +1785,13 @@ extern void clear_event_triggers(struct trace_array *tr); enum { EVENT_TRIGGER_FL_PROBE = BIT(0), + EVENT_TRIGGER_FL_COUNT = BIT(1), }; struct event_trigger_data { unsigned long count; int ref; int flags; - struct event_trigger_ops *ops; struct event_command *cmd_ops; struct event_filter __rcu *filter; char *filter_str; @@ -1545,6 +1802,7 @@ struct event_trigger_data { char *name; struct list_head named_list; struct event_trigger_data *named_data; + struct llist_node llist; }; /* Avoid typos */ @@ -1559,6 +1817,10 @@ struct enable_trigger_data { bool hist; }; +bool event_trigger_count(struct event_trigger_data *data, + struct trace_buffer *buffer, void *rec, + struct ring_buffer_event *event); + extern int event_enable_trigger_print(struct seq_file *m, struct event_trigger_data *data); extern void event_enable_trigger_free(struct event_trigger_data *data); @@ -1572,6 +1834,9 @@ extern int event_enable_register_trigger(char *glob, extern void event_enable_unregister_trigger(char *glob, struct event_trigger_data *test, struct trace_event_file *file); +extern struct event_trigger_data * +trigger_data_alloc(struct event_command *cmd_ops, char *cmd, char *param, + void *private_data); extern void trigger_data_free(struct event_trigger_data *data); extern int event_trigger_init(struct event_trigger_data *data); extern int trace_event_trigger_enable_disable(struct trace_event_file *file, @@ -1598,11 +1863,6 @@ extern bool event_trigger_check_remove(const char *glob); extern bool event_trigger_empty_param(const char *param); extern int event_trigger_separate_filter(char *param_and_filter, char **param, char **filter, bool param_required); -extern struct event_trigger_data * -event_trigger_alloc(struct event_command *cmd_ops, - char *cmd, - char *param, - void *private_data); extern int event_trigger_parse_num(char *trigger, struct event_trigger_data *trigger_data); extern int event_trigger_set_filter(struct event_command *cmd_ops, @@ -1620,63 +1880,8 @@ extern void event_trigger_unregister(struct event_command *cmd_ops, char *glob, struct event_trigger_data *trigger_data); -/** - * struct event_trigger_ops - callbacks for trace event triggers - * - * The methods in this structure provide per-event trigger hooks for - * various trigger operations. - * - * The @init and @free methods are used during trigger setup and - * teardown, typically called from an event_command's @parse() - * function implementation. - * - * The @print method is used to print the trigger spec. - * - * The @trigger method is the function that actually implements the - * trigger and is called in the context of the triggering event - * whenever that event occurs. - * - * All the methods below, except for @init() and @free(), must be - * implemented. - * - * @trigger: The trigger 'probe' function called when the triggering - * event occurs. The data passed into this callback is the data - * that was supplied to the event_command @reg() function that - * registered the trigger (see struct event_command) along with - * the trace record, rec. - * - * @init: An optional initialization function called for the trigger - * when the trigger is registered (via the event_command reg() - * function). This can be used to perform per-trigger - * initialization such as incrementing a per-trigger reference - * count, for instance. This is usually implemented by the - * generic utility function @event_trigger_init() (see - * trace_event_triggers.c). - * - * @free: An optional de-initialization function called for the - * trigger when the trigger is unregistered (via the - * event_command @reg() function). This can be used to perform - * per-trigger de-initialization such as decrementing a - * per-trigger reference count and freeing corresponding trigger - * data, for instance. This is usually implemented by the - * generic utility function @event_trigger_free() (see - * trace_event_triggers.c). - * - * @print: The callback function invoked to have the trigger print - * itself. This is usually implemented by a wrapper function - * that calls the generic utility function @event_trigger_print() - * (see trace_event_triggers.c). - */ -struct event_trigger_ops { - void (*trigger)(struct event_trigger_data *data, - struct trace_buffer *buffer, - void *rec, - struct ring_buffer_event *rbe); - int (*init)(struct event_trigger_data *data); - void (*free)(struct event_trigger_data *data); - int (*print)(struct seq_file *m, - struct event_trigger_data *data); -}; +extern void event_file_get(struct trace_event_file *file); +extern void event_file_put(struct trace_event_file *file); /** * struct event_command - callbacks and data members for event commands @@ -1727,7 +1932,7 @@ struct event_trigger_ops { * * @reg: Adds the trigger to the list of triggers associated with the * event, and enables the event trigger itself, after - * initializing it (via the event_trigger_ops @init() function). + * initializing it (via the event_command @init() function). * This is also where commands can use the @trigger_type value to * make the decision as to whether or not multiple instances of * the trigger should be allowed. This is usually implemented by @@ -1736,7 +1941,7 @@ struct event_trigger_ops { * * @unreg: Removes the trigger from the list of triggers associated * with the event, and disables the event trigger itself, after - * initializing it (via the event_trigger_ops @free() function). + * initializing it (via the event_command @free() function). * This is usually implemented by the generic utility function * @unregister_trigger() (see trace_event_triggers.c). * @@ -1750,12 +1955,41 @@ struct event_trigger_ops { * ignored. This is usually implemented by the generic utility * function @set_trigger_filter() (see trace_event_triggers.c). * - * @get_trigger_ops: The callback function invoked to retrieve the - * event_trigger_ops implementation associated with the command. - * This callback function allows a single event_command to - * support multiple trigger implementations via different sets of - * event_trigger_ops, depending on the value of the @param - * string. + * All the methods below, except for @init() and @free(), must be + * implemented. + * + * @trigger: The trigger 'probe' function called when the triggering + * event occurs. The data passed into this callback is the data + * that was supplied to the event_command @reg() function that + * registered the trigger (see struct event_command) along with + * the trace record, rec. + * + * @count_func: If defined and a numeric parameter is passed to the + * trigger, then this function will be called before @trigger + * is called. If this function returns false, then @trigger is not + * executed. + * + * @init: An optional initialization function called for the trigger + * when the trigger is registered (via the event_command reg() + * function). This can be used to perform per-trigger + * initialization such as incrementing a per-trigger reference + * count, for instance. This is usually implemented by the + * generic utility function @event_trigger_init() (see + * trace_event_triggers.c). + * + * @free: An optional de-initialization function called for the + * trigger when the trigger is unregistered (via the + * event_command @reg() function). This can be used to perform + * per-trigger de-initialization such as decrementing a + * per-trigger reference count and freeing corresponding trigger + * data, for instance. This is usually implemented by the + * generic utility function @event_trigger_free() (see + * trace_event_triggers.c). + * + * @print: The callback function invoked to have the trigger print + * itself. This is usually implemented by a wrapper function + * that calls the generic utility function @event_trigger_print() + * (see trace_event_triggers.c). */ struct event_command { struct list_head list; @@ -1776,7 +2010,18 @@ struct event_command { int (*set_filter)(char *filter_str, struct event_trigger_data *data, struct trace_event_file *file); - struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); + void (*trigger)(struct event_trigger_data *data, + struct trace_buffer *buffer, + void *rec, + struct ring_buffer_event *rbe); + bool (*count_func)(struct event_trigger_data *data, + struct trace_buffer *buffer, + void *rec, + struct ring_buffer_event *rbe); + int (*init)(struct event_trigger_data *data); + void (*free)(struct event_trigger_data *data); + int (*print)(struct seq_file *m, + struct event_trigger_data *data); }; /** @@ -1797,7 +2042,7 @@ struct event_command { * either committed or discarded. At that point, if any commands * have deferred their triggers, those commands are finally * invoked following the close of the current event. In other - * words, if the event_trigger_ops @func() probe implementation + * words, if the event_command @func() probe implementation * itself logs to the trace buffer, this flag should be set, * otherwise it can be left unspecified. * @@ -1839,8 +2084,8 @@ extern const char *__stop___tracepoint_str[]; void trace_printk_control(bool enabled); void trace_printk_start_comm(void); -int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); -int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); +int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set); +int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled); /* Used from boot time tracer */ extern int trace_set_options(struct trace_array *tr, char *option); @@ -1906,24 +2151,28 @@ static inline const char *get_syscall_name(int syscall) #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); -void trace_event_eval_update(struct trace_eval_map **map, int len); +void trace_event_update_all(struct trace_eval_map **map, int len); /* Used from boot time tracer */ extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); extern int trigger_process_regex(struct trace_event_file *file, char *buff); #else static inline void __init trace_event_init(void) { } -static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } +static inline void trace_event_update_all(struct trace_eval_map **map, int len) { } #endif #ifdef CONFIG_TRACER_SNAPSHOT void tracing_snapshot_instance(struct trace_array *tr); int tracing_alloc_snapshot_instance(struct trace_array *tr); +int tracing_arm_snapshot(struct trace_array *tr); +void tracing_disarm_snapshot(struct trace_array *tr); #else static inline void tracing_snapshot_instance(struct trace_array *tr) { } static inline int tracing_alloc_snapshot_instance(struct trace_array *tr) { return 0; } +static inline int tracing_arm_snapshot(struct trace_array *tr) { return 0; } +static inline void tracing_disarm_snapshot(struct trace_array *tr) { } #endif #ifdef CONFIG_PREEMPT_TRACER @@ -1981,7 +2230,7 @@ static inline bool is_good_system_name(const char *name) static inline void sanitize_event_name(char *name) { while (*name++ != '\0') - if (*name == ':' || *name == '.') + if (*name == ':' || *name == '.' || *name == '*') *name = '_'; } @@ -2012,4 +2261,32 @@ static inline int rv_init_interface(void) } #endif +/* + * This is used only to distinguish + * function address from trampoline code. + * So this value has no meaning. + */ +#define FTRACE_TRAMPOLINE_MARKER ((unsigned long) INT_MAX) + +/* + * This is used to get the address of the args array based on + * the type of the entry. + */ +#define FGRAPH_ENTRY_ARGS(e) \ + ({ \ + unsigned long *_args; \ + struct ftrace_graph_ent_entry *_e = e; \ + \ + if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && \ + e->ent.type == TRACE_GRAPH_RETADDR_ENT) { \ + struct fgraph_retaddr_ent_entry *_re; \ + \ + _re = (typeof(_re))_e; \ + _args = _re->args; \ + } else { \ + _args = _e->args; \ + } \ + _args; \ + }) + #endif /* _LINUX_KERNEL_TRACE_H */ |
