diff options
-rw-r--r-- | tools/perf/Documentation/perf-ftrace.txt | 6 | ||||
-rw-r--r-- | tools/perf/builtin-ftrace.c | 50 | ||||
-rw-r--r-- | tools/perf/util/bpf_ftrace.c | 75 | ||||
-rw-r--r-- | tools/perf/util/bpf_skel/func_latency.bpf.c | 148 | ||||
-rw-r--r-- | tools/perf/util/ftrace.h | 1 |
5 files changed, 205 insertions, 75 deletions
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index b77f58c4d2fd..914457853bcf 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -139,6 +139,12 @@ OPTIONS for 'perf ftrace latency' Set the function name to get the histogram. Unlike perf ftrace trace, it only allows single function to calculate the histogram. +-e:: +--events=:: + Set the pair of events to get the histogram. The histogram is calculated + by the time difference between the two events from the same thread. This + requires -b/--use-bpf option. + -b:: --use-bpf:: Use BPF to measure function latency instead of using the ftrace (it diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 3a253a1b9f45..e1f2f3fb1b08 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -1549,6 +1549,33 @@ static void delete_filter_func(struct list_head *head) } } +static int parse_filter_event(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct list_head *head = opt->value; + struct filter_entry *entry; + char *s, *p; + int ret = -ENOMEM; + + s = strdup(str); + if (s == NULL) + return -ENOMEM; + + while ((p = strsep(&s, ",")) != NULL) { + entry = malloc(sizeof(*entry) + strlen(p) + 1); + if (entry == NULL) + goto out; + + strcpy(entry->name, p); + list_add_tail(&entry->list, head); + } + ret = 0; + +out: + free(s); + return ret; +} + static int parse_buffer_size(const struct option *opt, const char *str, int unset) { @@ -1711,6 +1738,8 @@ int cmd_ftrace(int argc, const char **argv) const struct option latency_options[] = { OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", "Show latency of given function", parse_filter_func), + OPT_CALLBACK('e', "events", &ftrace.event_pair, "event1,event2", + "Show latency between the two events", parse_filter_event), #ifdef HAVE_BPF_SKEL OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, "Use BPF to measure function latency"), @@ -1763,6 +1792,7 @@ int cmd_ftrace(int argc, const char **argv) INIT_LIST_HEAD(&ftrace.notrace); INIT_LIST_HEAD(&ftrace.graph_funcs); INIT_LIST_HEAD(&ftrace.nograph_funcs); + INIT_LIST_HEAD(&ftrace.event_pair); signal(SIGINT, sig_handler); signal(SIGUSR1, sig_handler); @@ -1817,9 +1847,24 @@ int cmd_ftrace(int argc, const char **argv) cmd_func = __cmd_ftrace; break; case PERF_FTRACE_LATENCY: - if (list_empty(&ftrace.filters)) { - pr_err("Should provide a function to measure\n"); + if (list_empty(&ftrace.filters) && list_empty(&ftrace.event_pair)) { + pr_err("Should provide a function or events to measure\n"); parse_options_usage(ftrace_usage, options, "T", 1); + parse_options_usage(NULL, options, "e", 1); + ret = -EINVAL; + goto out_delete_filters; + } + if (!list_empty(&ftrace.filters) && !list_empty(&ftrace.event_pair)) { + pr_err("Please specify either of function or events\n"); + parse_options_usage(ftrace_usage, options, "T", 1); + parse_options_usage(NULL, options, "e", 1); + ret = -EINVAL; + goto out_delete_filters; + } + if (!list_empty(&ftrace.event_pair) && !ftrace.target.use_bpf) { + pr_err("Event processing needs BPF\n"); + parse_options_usage(ftrace_usage, options, "b", 1); + parse_options_usage(NULL, options, "e", 1); ret = -EINVAL; goto out_delete_filters; } @@ -1910,6 +1955,7 @@ out_delete_filters: delete_filter_func(&ftrace.notrace); delete_filter_func(&ftrace.graph_funcs); delete_filter_func(&ftrace.nograph_funcs); + delete_filter_func(&ftrace.event_pair); return ret; } diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 7324668cc83e..0cb02412043c 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -21,15 +21,26 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) { int fd, err; int i, ncpus = 1, ntasks = 1; - struct filter_entry *func; + struct filter_entry *func = NULL; - if (!list_is_singular(&ftrace->filters)) { - pr_err("ERROR: %s target function(s).\n", - list_empty(&ftrace->filters) ? "No" : "Too many"); - return -1; - } + if (!list_empty(&ftrace->filters)) { + if (!list_is_singular(&ftrace->filters)) { + pr_err("ERROR: Too many target functions.\n"); + return -1; + } + func = list_first_entry(&ftrace->filters, struct filter_entry, list); + } else { + int count = 0; + struct list_head *pos; - func = list_first_entry(&ftrace->filters, struct filter_entry, list); + list_for_each(pos, &ftrace->event_pair) + count++; + + if (count != 2) { + pr_err("ERROR: Needs two target events.\n"); + return -1; + } + } skel = func_latency_bpf__open(); if (!skel) { @@ -93,20 +104,44 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) skel->bss->min = INT64_MAX; - skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, - false, func->name); - if (IS_ERR(skel->links.func_begin)) { - pr_err("Failed to attach fentry program\n"); - err = PTR_ERR(skel->links.func_begin); - goto out; - } + if (func) { + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, + false, func->name); + if (IS_ERR(skel->links.func_begin)) { + pr_err("Failed to attach fentry program\n"); + err = PTR_ERR(skel->links.func_begin); + goto out; + } - skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, - true, func->name); - if (IS_ERR(skel->links.func_end)) { - pr_err("Failed to attach fexit program\n"); - err = PTR_ERR(skel->links.func_end); - goto out; + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, + true, func->name); + if (IS_ERR(skel->links.func_end)) { + pr_err("Failed to attach fexit program\n"); + err = PTR_ERR(skel->links.func_end); + goto out; + } + } else { + struct filter_entry *event; + + event = list_first_entry(&ftrace->event_pair, struct filter_entry, list); + + skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin, + event->name); + if (IS_ERR(skel->links.event_begin)) { + pr_err("Failed to attach first tracepoint program\n"); + err = PTR_ERR(skel->links.event_begin); + goto out; + } + + event = list_next_entry(event, list); + + skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end, + event->name); + if (IS_ERR(skel->links.event_end)) { + pr_err("Failed to attach second tracepoint program\n"); + err = PTR_ERR(skel->links.event_end); + goto out; + } } /* XXX: we don't actually use this fd - just for poll() */ diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index e731a79a753a..621e2022c8bc 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -52,34 +52,89 @@ const volatile unsigned int min_latency; const volatile unsigned int max_latency; const volatile unsigned int bucket_num = NUM_BUCKET; -SEC("kprobe/func") -int BPF_PROG(func_begin) +static bool can_record(void) { - __u64 key, now; - - if (!enabled) - return 0; - - key = bpf_get_current_pid_tgid(); - if (has_cpu) { __u32 cpu = bpf_get_smp_processor_id(); __u8 *ok; ok = bpf_map_lookup_elem(&cpu_filter, &cpu); if (!ok) - return 0; + return false; } if (has_task) { - __u32 pid = key & 0xffffffff; + __u32 pid = bpf_get_current_pid_tgid(); __u8 *ok; ok = bpf_map_lookup_elem(&task_filter, &pid); if (!ok) - return 0; + return false; } + return true; +} + +static void update_latency(__s64 delta) +{ + __u64 val = delta; + __u32 key = 0; + __u64 *hist; + __u64 cmp_base = use_nsec ? 1 : 1000; + + if (delta < 0) + return; + if (bucket_range != 0) { + val = delta / cmp_base; + + if (min_latency > 0) { + if (val > min_latency) + val -= min_latency; + else + goto do_lookup; + } + + // Less than 1 unit (ms or ns), or, in the future, + // than the min latency desired. + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) + key = val / bucket_range + 1; + if (key >= bucket_num) + key = bucket_num - 1; + } + + goto do_lookup; + } + // calculate index using delta + for (key = 0; key < (bucket_num - 1); key++) { + if (delta < (cmp_base << key)) + break; + } + +do_lookup: + hist = bpf_map_lookup_elem(&latency, &key); + if (!hist) + return; + + __sync_fetch_and_add(hist, 1); + + __sync_fetch_and_add(&total, delta); // always in nsec + __sync_fetch_and_add(&count, 1); + + if (delta > max) + max = delta; + if (delta < min) + min = delta; +} + +SEC("kprobe/func") +int BPF_PROG(func_begin) +{ + __u64 key, now; + + if (!enabled || !can_record()) + return 0; + + key = bpf_get_current_pid_tgid(); now = bpf_ktime_get_ns(); // overwrite timestamp for nested functions @@ -92,7 +147,6 @@ int BPF_PROG(func_end) { __u64 tid; __u64 *start; - __u64 cmp_base = use_nsec ? 1 : 1000; if (!enabled) return 0; @@ -101,56 +155,44 @@ int BPF_PROG(func_end) start = bpf_map_lookup_elem(&functime, &tid); if (start) { - __s64 delta = bpf_ktime_get_ns() - *start; - __u64 val = delta; - __u32 key = 0; - __u64 *hist; - + update_latency(bpf_ktime_get_ns() - *start); bpf_map_delete_elem(&functime, &tid); + } - if (delta < 0) - return 0; + return 0; +} - if (bucket_range != 0) { - val = delta / cmp_base; +SEC("raw_tp") +int BPF_PROG(event_begin) +{ + __u64 key, now; - if (min_latency > 0) { - if (val > min_latency) - val -= min_latency; - else - goto do_lookup; - } + if (!enabled || !can_record()) + return 0; - // Less than 1 unit (ms or ns), or, in the future, - // than the min latency desired. - if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) - key = val / bucket_range + 1; - if (key >= bucket_num) - key = bucket_num - 1; - } + key = bpf_get_current_pid_tgid(); + now = bpf_ktime_get_ns(); - goto do_lookup; - } - // calculate index using delta - for (key = 0; key < (bucket_num - 1); key++) { - if (delta < (cmp_base << key)) - break; - } + // overwrite timestamp for nested events + bpf_map_update_elem(&functime, &key, &now, BPF_ANY); + return 0; +} -do_lookup: - hist = bpf_map_lookup_elem(&latency, &key); - if (!hist) - return 0; +SEC("raw_tp") +int BPF_PROG(event_end) +{ + __u64 tid; + __u64 *start; - __sync_fetch_and_add(hist, 1); + if (!enabled) + return 0; - __sync_fetch_and_add(&total, delta); // always in nsec - __sync_fetch_and_add(&count, 1); + tid = bpf_get_current_pid_tgid(); - if (delta > max) - max = delta; - if (delta < min) - min = delta; + start = bpf_map_lookup_elem(&functime, &tid); + if (start) { + update_latency(bpf_ktime_get_ns() - *start); + bpf_map_delete_elem(&functime, &tid); } return 0; diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index a9bc47da83a5..3f5094ac5908 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -17,6 +17,7 @@ struct perf_ftrace { struct list_head notrace; struct list_head graph_funcs; struct list_head nograph_funcs; + struct list_head event_pair; struct hashmap *profile_hash; unsigned long percpu_buffer_size; bool inherit; |