From bcc8b3e88d6fa1a3a3662bfebd26214b62b61c81 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 9 Jul 2022 09:50:29 +0800 Subject: perf kwork: Implement perf kwork timehist Implements framework of perf kwork timehist, to provide an analysis of kernel work events. Test cases: # perf kwork tim Runtime start Runtime end Cpu Kwork name Runtime Delaytime (TYPE)NAME:NUM (msec) (msec) ----------------- ----------------- ------ ------------------------------ ---------- ---------- 91576.060290 91576.060344 [0000] (s)RCU:9 0.055 0.111 91576.061470 91576.061547 [0000] (s)SCHED:7 0.077 0.073 91576.062604 91576.062697 [0001] (s)RCU:9 0.094 0.409 91576.064443 91576.064517 [0002] (s)RCU:9 0.074 0.114 91576.065144 91576.065211 [0000] (s)SCHED:7 0.067 0.058 91576.066564 91576.066609 [0003] (s)RCU:9 0.045 0.110 91576.068495 91576.068559 [0000] (s)SCHED:7 0.064 0.059 91576.068900 91576.068996 [0004] (s)RCU:9 0.096 0.726 91576.069364 91576.069420 [0002] (s)RCU:9 0.056 0.082 91576.069649 91576.069701 [0004] (s)RCU:9 0.052 0.111 91576.070147 91576.070206 [0000] (s)SCHED:7 0.060 0.057 91576.073147 91576.073202 [0000] (s)SCHED:7 0.054 0.060 # perf kwork tim --max-stack 2 -g Runtime start Runtime end Cpu Kwork name Runtime Delaytime (TYPE)NAME:NUM (msec) (msec) ----------------- ----------------- ------ ------------------------------ ---------- ---------- 91576.060290 91576.060344 [0000] (s)RCU:9 0.055 0.111 irq_exit_rcu <- sysvec_apic_timer_interrupt 91576.061470 91576.061547 [0000] (s)SCHED:7 0.077 0.073 irq_exit_rcu <- sysvec_call_function_single 91576.062604 91576.062697 [0001] (s)RCU:9 0.094 0.409 irq_exit_rcu <- sysvec_apic_timer_interrupt 91576.064443 91576.064517 [0002] (s)RCU:9 0.074 0.114 irq_exit_rcu <- sysvec_apic_timer_interrupt 91576.065144 91576.065211 [0000] (s)SCHED:7 0.067 0.058 irq_exit_rcu <- sysvec_call_function_single 91576.066564 91576.066609 [0003] (s)RCU:9 0.045 0.110 irq_exit_rcu <- sysvec_apic_timer_interrupt 91576.068495 91576.068559 [0000] (s)SCHED:7 0.064 0.059 irq_exit_rcu <- sysvec_call_function_single 91576.068900 91576.068996 [0004] (s)RCU:9 0.096 0.726 irq_exit_rcu <- sysvec_apic_timer_interrupt 91576.069364 91576.069420 [0002] (s)RCU:9 0.056 0.082 irq_exit_rcu <- sysvec_apic_timer_interrupt 91576.069649 91576.069701 [0004] (s)RCU:9 0.052 0.111 irq_exit_rcu <- sysvec_apic_timer_interrupt Committer testing: # perf kwork -k workqueue timehist | head -40 Runtime start Runtime end Cpu Kwork name Runtime Delaytime (TYPE)NAME:NUM (msec) (msec) ----------------- ----------------- ------ ------------------------------ ---------- ---------- 26520.211825 26520.211832 [0019] (w)free_work 0.007 0.004 26520.212929 26520.212934 [0020] (w)free_work 0.005 0.004 26520.213226 26520.213228 [0014] (w)kfree_rcu_work 0.002 0.004 26520.214057 26520.214061 [0021] (w)free_work 0.004 0.004 26520.221239 26520.221241 [0007] (w)kfree_rcu_work 0.002 0.009 26520.223232 26520.223238 [0013] (w)psi_avgs_work 0.005 0.006 26520.230057 26520.230060 [0020] (w)free_work 0.003 0.003 26520.270428 26520.270434 [0015] (w)free_work 0.006 0.004 26520.270546 26520.270550 [0014] (w)free_work 0.004 0.003 26520.281626 26520.281629 [0015] (w)free_work 0.003 0.002 26520.287225 26520.287230 [0012] (w)psi_avgs_work 0.005 0.008 26520.287231 26520.287235 [0001] (w)psi_avgs_work 0.004 0.011 26520.287236 26520.287239 [0001] (w)psi_avgs_work 0.003 0.012 26520.329488 26520.329492 [0024] (w)free_work 0.004 0.004 26520.330600 26520.330605 [0007] (w)free_work 0.005 0.004 26520.334218 26520.334218 [0007] (w)kfree_rcu_monitor 0.001 0.002 26520.335220 26520.335221 [0005] (w)kfree_rcu_monitor 0.001 0.004 26520.343980 26520.343985 [0007] (w)free_work 0.005 0.002 26520.345093 26520.345097 [0006] (w)free_work 0.004 0.003 26520.351233 26520.351238 [0027] (w)psi_avgs_work 0.005 0.008 26520.353228 26520.353229 [0007] (w)kfree_rcu_work 0.001 0.002 26520.353229 26520.353231 [0005] (w)kfree_rcu_work 0.001 0.006 26520.382381 26520.382383 [0006] (w)free_work 0.003 0.002 26520.386547 26520.386548 [0006] (w)free_work 0.002 0.001 26520.391243 26520.391245 [0015] (w)console_callback 0.002 0.016 26520.415369 26520.415621 [0027] (w)btrfs_work_helper 0.252 26520.415351 26520.416174 [0002] (w)btrfs_work_helper 0.823 0.037 26520.415343 26520.416304 [0031] (w)btrfs_work_helper 0.961 26520.415335 26520.417078 [0001] (w)btrfs_work_helper 1.743 26520.415250 26520.417564 [0002] (w)wb_workfn 2.314 26520.424777 26520.424787 [0002] (w)btrfs_work_helper 0.010 26520.424788 26520.424798 [0002] (w)btrfs_work_helper 0.010 26520.424790 26520.424805 [0001] (w)btrfs_work_helper 0.016 0.016 26520.424801 26520.424807 [0002] (w)btrfs_work_helper 0.006 26520.424809 26520.424831 [0002] (w)btrfs_work_helper 0.022 0.030 26520.424824 26520.424835 [0027] (w)btrfs_work_helper 0.011 26520.424809 26520.424867 [0001] (w)btrfs_work_helper 0.059 0.032 # Signed-off-by: Yang Jihong Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220709015033.38326-14-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kwork.c | 299 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 298 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-kwork.c') diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index ac7f93110df4..f1d773947627 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -35,10 +35,12 @@ #define PRINT_TIMESTAMP_WIDTH 17 #define PRINT_KWORK_NAME_WIDTH 30 #define RPINT_DECIMAL_WIDTH 3 +#define PRINT_BRACKETPAIR_WIDTH 2 #define PRINT_TIME_UNIT_SEC_WIDTH 2 #define PRINT_TIME_UNIT_MESC_WIDTH 3 #define PRINT_RUNTIME_HEADER_WIDTH (PRINT_RUNTIME_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH) #define PRINT_LATENCY_HEADER_WIDTH (PRINT_LATENCY_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH) +#define PRINT_TIMEHIST_CPU_WIDTH (PRINT_CPU_WIDTH + PRINT_BRACKETPAIR_WIDTH) #define PRINT_TIMESTAMP_HEADER_WIDTH (PRINT_TIMESTAMP_WIDTH + PRINT_TIME_UNIT_SEC_WIDTH) struct sort_dimension { @@ -574,6 +576,185 @@ static int latency_entry_event(struct perf_kwork *kwork, return 0; } +static void timehist_save_callchain(struct perf_kwork *kwork, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine) +{ + struct symbol *sym; + struct thread *thread; + struct callchain_cursor_node *node; + struct callchain_cursor *cursor = &callchain_cursor; + + if (!kwork->show_callchain || sample->callchain == NULL) + return; + + /* want main thread for process - has maps */ + thread = machine__findnew_thread(machine, sample->pid, sample->pid); + if (thread == NULL) { + pr_debug("Failed to get thread for pid %d\n", sample->pid); + return; + } + + if (thread__resolve_callchain(thread, cursor, evsel, sample, + NULL, NULL, kwork->max_stack + 2) != 0) { + pr_debug("Failed to resolve callchain, skipping\n"); + goto out_put; + } + + callchain_cursor_commit(cursor); + + while (true) { + node = callchain_cursor_current(cursor); + if (node == NULL) + break; + + sym = node->ms.sym; + if (sym) { + if (!strcmp(sym->name, "__softirqentry_text_start") || + !strcmp(sym->name, "__do_softirq")) + sym->ignore = 1; + } + + callchain_cursor_advance(cursor); + } + +out_put: + thread__put(thread); +} + +static void timehist_print_event(struct perf_kwork *kwork, + struct kwork_work *work, + struct kwork_atom *atom, + struct perf_sample *sample, + struct addr_location *al) +{ + char entrytime[32], exittime[32]; + char kwork_name[PRINT_KWORK_NAME_WIDTH]; + + /* + * runtime start + */ + timestamp__scnprintf_usec(atom->time, + entrytime, sizeof(entrytime)); + printf(" %*s ", PRINT_TIMESTAMP_WIDTH, entrytime); + + /* + * runtime end + */ + timestamp__scnprintf_usec(sample->time, + exittime, sizeof(exittime)); + printf(" %*s ", PRINT_TIMESTAMP_WIDTH, exittime); + + /* + * cpu + */ + printf(" [%0*d] ", PRINT_CPU_WIDTH, work->cpu); + + /* + * kwork name + */ + if (work->class && work->class->work_name) { + work->class->work_name(work, kwork_name, + PRINT_KWORK_NAME_WIDTH); + printf(" %-*s ", PRINT_KWORK_NAME_WIDTH, kwork_name); + } else + printf(" %-*s ", PRINT_KWORK_NAME_WIDTH, ""); + + /* + *runtime + */ + printf(" %*.*f ", + PRINT_RUNTIME_WIDTH, RPINT_DECIMAL_WIDTH, + (double)(sample->time - atom->time) / NSEC_PER_MSEC); + + /* + * delaytime + */ + if (atom->prev != NULL) + printf(" %*.*f ", PRINT_LATENCY_WIDTH, RPINT_DECIMAL_WIDTH, + (double)(atom->time - atom->prev->time) / NSEC_PER_MSEC); + else + printf(" %*s ", PRINT_LATENCY_WIDTH, " "); + + /* + * callchain + */ + if (kwork->show_callchain) { + printf(" "); + sample__fprintf_sym(sample, al, 0, + EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | + EVSEL__PRINT_CALLCHAIN_ARROW | + EVSEL__PRINT_SKIP_IGNORED, + &callchain_cursor, symbol_conf.bt_stop_list, + stdout); + } + + printf("\n"); +} + +static int timehist_raise_event(struct perf_kwork *kwork, + struct kwork_class *class, + struct evsel *evsel, + struct perf_sample *sample, + struct machine *machine) +{ + return work_push_atom(kwork, class, KWORK_TRACE_RAISE, + KWORK_TRACE_MAX, evsel, sample, + machine, NULL); +} + +static int timehist_entry_event(struct perf_kwork *kwork, + struct kwork_class *class, + struct evsel *evsel, + struct perf_sample *sample, + struct machine *machine) +{ + int ret; + struct kwork_work *work = NULL; + + ret = work_push_atom(kwork, class, KWORK_TRACE_ENTRY, + KWORK_TRACE_RAISE, evsel, sample, + machine, &work); + if (ret) + return ret; + + if (work != NULL) + timehist_save_callchain(kwork, sample, evsel, machine); + + return 0; +} + +static int timehist_exit_event(struct perf_kwork *kwork, + struct kwork_class *class, + struct evsel *evsel, + struct perf_sample *sample, + struct machine *machine) +{ + struct kwork_atom *atom = NULL; + struct kwork_work *work = NULL; + struct addr_location al; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_debug("Problem processing event, skipping it\n"); + return -1; + } + + atom = work_pop_atom(kwork, class, KWORK_TRACE_EXIT, + KWORK_TRACE_ENTRY, evsel, sample, + machine, &work); + if (work == NULL) + return -1; + + if (atom != NULL) { + work->nr_atoms++; + timehist_print_event(kwork, work, atom, sample, &al); + atom_del(atom); + } + + return 0; +} + static struct kwork_class kwork_irq; static int process_irq_handler_entry_event(struct perf_tool *tool, struct evsel *evsel, @@ -991,6 +1172,42 @@ static int report_print_header(struct perf_kwork *kwork) return ret; } +static void timehist_print_header(void) +{ + /* + * header row + */ + printf(" %-*s %-*s %-*s %-*s %-*s %-*s\n", + PRINT_TIMESTAMP_WIDTH, "Runtime start", + PRINT_TIMESTAMP_WIDTH, "Runtime end", + PRINT_TIMEHIST_CPU_WIDTH, "Cpu", + PRINT_KWORK_NAME_WIDTH, "Kwork name", + PRINT_RUNTIME_WIDTH, "Runtime", + PRINT_RUNTIME_WIDTH, "Delaytime"); + + /* + * units row + */ + printf(" %-*s %-*s %-*s %-*s %-*s %-*s\n", + PRINT_TIMESTAMP_WIDTH, "", + PRINT_TIMESTAMP_WIDTH, "", + PRINT_TIMEHIST_CPU_WIDTH, "", + PRINT_KWORK_NAME_WIDTH, "(TYPE)NAME:NUM", + PRINT_RUNTIME_WIDTH, "(msec)", + PRINT_RUNTIME_WIDTH, "(msec)"); + + /* + * separator + */ + printf(" %.*s %.*s %.*s %.*s %.*s %.*s\n", + PRINT_TIMESTAMP_WIDTH, graph_dotted_line, + PRINT_TIMESTAMP_WIDTH, graph_dotted_line, + PRINT_TIMEHIST_CPU_WIDTH, graph_dotted_line, + PRINT_KWORK_NAME_WIDTH, graph_dotted_line, + PRINT_RUNTIME_WIDTH, graph_dotted_line, + PRINT_RUNTIME_WIDTH, graph_dotted_line); +} + static void print_summary(struct perf_kwork *kwork) { u64 time = kwork->timeend - kwork->timestart; @@ -1085,6 +1302,7 @@ static int perf_kwork__check_config(struct perf_kwork *kwork, struct perf_session *session) { int ret; + struct evsel *evsel; struct kwork_class *class; static struct trace_kwork_handler report_ops = { @@ -1095,6 +1313,11 @@ static int perf_kwork__check_config(struct perf_kwork *kwork, .raise_event = latency_raise_event, .entry_event = latency_entry_event, }; + static struct trace_kwork_handler timehist_ops = { + .raise_event = timehist_raise_event, + .entry_event = timehist_entry_event, + .exit_event = timehist_exit_event, + }; switch (kwork->report) { case KWORK_REPORT_RUNTIME: @@ -1103,6 +1326,9 @@ static int perf_kwork__check_config(struct perf_kwork *kwork, case KWORK_REPORT_LATENCY: kwork->tp_handler = &latency_ops; break; + case KWORK_REPORT_TIMEHIST: + kwork->tp_handler = &timehist_ops; + break; default: pr_debug("Invalid report type %d\n", kwork->report); return -1; @@ -1131,6 +1357,14 @@ static int perf_kwork__check_config(struct perf_kwork *kwork, } } + list_for_each_entry(evsel, &session->evlist->core.entries, core.node) { + if (kwork->show_callchain && !evsel__has_callchain(evsel)) { + pr_debug("Samples do not have callchains\n"); + kwork->show_callchain = 0; + symbol_conf.use_callchain = 0; + } + } + return 0; } @@ -1164,6 +1398,9 @@ static int perf_kwork__read_events(struct perf_kwork *kwork) goto out_delete; } + if (kwork->report == KWORK_REPORT_TIMEHIST) + timehist_print_header(); + ret = perf_session__process_events(session); if (ret) { pr_debug("Failed to process events, error %d\n", ret); @@ -1257,6 +1494,31 @@ static int perf_kwork__process_tracepoint_sample(struct perf_tool *tool, return err; } +static int perf_kwork__timehist(struct perf_kwork *kwork) +{ + /* + * event handlers for timehist option + */ + kwork->tool.comm = perf_event__process_comm; + kwork->tool.exit = perf_event__process_exit; + kwork->tool.fork = perf_event__process_fork; + kwork->tool.attr = perf_event__process_attr; + kwork->tool.tracing_data = perf_event__process_tracing_data; + kwork->tool.build_id = perf_event__process_build_id; + kwork->tool.ordered_events = true; + kwork->tool.ordering_requires_timestamps = true; + symbol_conf.use_callchain = kwork->show_callchain; + + if (symbol__validate_sym_arguments()) { + pr_err("Failed to validate sym arguments\n"); + return -1; + } + + setup_pager(); + + return perf_kwork__read_events(kwork); +} + static void setup_event_list(struct perf_kwork *kwork, const struct option *options, const char * const usage_msg[]) @@ -1370,6 +1632,8 @@ int cmd_kwork(int argc, const char **argv) .event_list_str = NULL, .summary = false, .sort_order = NULL, + .show_callchain = false, + .max_stack = 5, .timestart = 0, .timeend = 0, .nr_events = 0, @@ -1419,6 +1683,27 @@ int cmd_kwork(int argc, const char **argv) "input file name"), OPT_PARENT(kwork_options) }; + const struct option timehist_options[] = { + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, + "file", "kallsyms pathname"), + OPT_BOOLEAN('g', "call-graph", &kwork.show_callchain, + "Display call chains if present"), + OPT_UINTEGER(0, "max-stack", &kwork.max_stack, + "Maximum number of functions to display backtrace."), + OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", + "Look for files with symbols relative to this directory"), + OPT_STRING(0, "time", &kwork.time_str, "str", + "Time span for analysis (start,stop)"), + OPT_STRING('C', "cpu", &kwork.cpu_list, "cpu", + "list of cpus to profile"), + OPT_STRING('n', "name", &kwork.profile_name, "name", + "event name to profile"), + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_PARENT(kwork_options) + }; const char *kwork_usage[] = { NULL, NULL @@ -1431,8 +1716,12 @@ int cmd_kwork(int argc, const char **argv) "perf kwork latency []", NULL }; + const char * const timehist_usage[] = { + "perf kwork timehist []", + NULL + }; const char *const kwork_subcommands[] = { - "record", "report", "latency", NULL + "record", "report", "latency", "timehist", NULL }; argc = parse_options_subcommand(argc, argv, kwork_options, @@ -1466,6 +1755,14 @@ int cmd_kwork(int argc, const char **argv) kwork.report = KWORK_REPORT_LATENCY; setup_sorting(&kwork, latency_options, latency_usage); return perf_kwork__report(&kwork); + } else if (strlen(argv[0]) > 2 && strstarts("timehist", argv[0])) { + if (argc > 1) { + argc = parse_options(argc, argv, timehist_options, timehist_usage, 0); + if (argc) + usage_with_options(timehist_usage, timehist_options); + } + kwork.report = KWORK_REPORT_TIMEHIST; + return perf_kwork__timehist(&kwork); } else usage_with_options(kwork_usage, kwork_options); -- cgit