From 800d3f561659b5436f8c57e7c26dd1f6928b5615 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 11 Oct 2019 10:21:22 +0800 Subject: perf report: Add warning when libunwind not compiled in We received a user report that call-graph DWARF mode was enabled in 'perf record' but 'perf report' didn't unwind the callstack correctly. The reason was, libunwind was not compiled in. We can use 'perf -vv' to check the compiled libraries but it would be valuable to report a warning to user directly (especially valuable for a perf newbie). The warning is: Warning: Please install libunwind development packages during the perf build. Both TUI and stdio are supported. Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191011022122.26369-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index aae0e57c60fb..7accaf8ef689 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -399,6 +399,13 @@ static int report__setup_sample_type(struct report *rep) PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; +#ifndef HAVE_LIBUNWIND_SUPPORT + if (dwarf_callchain_users) { + ui__warning("Please install libunwind development packages " + "during the perf build.\n"); + } +#endif + return 0; } -- cgit From 8efc4f05685dae2da1d21973eba5e59e7863c77f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Oct 2019 11:31:38 -0300 Subject: perf maps: Add for_each_entry()/_safe() iterators To reduce boilerplate, provide a more compact form using an idiom present in other trees of data structures. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-59gmq4kg1r68ou1wknyjl78x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7accaf8ef689..3bbad039abf2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -727,11 +727,9 @@ static struct task *tasks_list(struct task *task, struct machine *machine) static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) { size_t printed = 0; - struct rb_node *nd; - - for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { - struct map *map = rb_entry(nd, struct map, rb_node); + struct map *map; + maps__for_each_entry(maps, map) { printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", indent, "", map->start, map->end, map->prot & PROT_READ ? 'r' : '-', -- cgit From 7841f40aed933dd3838f8d9f2dfcf286c352b7ee Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 7 Nov 2019 15:47:15 +0800 Subject: perf hist: Count the total cycles of all samples We can get the per sample cycles by hist__account_cycles(). It's also useful to know the total cycles of all samples in order to get the cycles coverage for a single program block in further. For example: coverage = per block sampled cycles / total sampled cycles This patch creates a new argument 'total_cycles' in hist__account_cycles(), which will be added with the cycles of each sample. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191107074719.26139-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3bbad039abf2..bc15b9dcccd6 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -292,7 +292,7 @@ static int process_sample_event(struct perf_tool *tool, if (ui__has_annotation() || rep->symbol_ipc) { hist__account_cycles(sample->branch_stack, &al, sample, - rep->nonany_branch_mode); + rep->nonany_branch_mode, NULL); } ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep); -- cgit From 6f7164fa231a5f360e576593c547bea7dc56ddbc Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 7 Nov 2019 15:47:17 +0800 Subject: perf report: Sort by sampled cycles percent per block for stdio It would be useful to support sorting for all blocks by the sampled cycles percent per block. This is useful to concentrate on the globally hottest blocks. This patch implements a new option "--total-cycles" which sorts all blocks by 'Sampled Cycles%'. The 'Sampled Cycles%' is the percent: percent = block sampled cycles aggregation / total sampled cycles Note that, this patch only supports "--stdio" mode. For example, # perf record -b ./div # perf report --total-cycles --stdio # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 2M of event 'cycles' # Event count (approx.): 2753248 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... ................................................ ................. # 26.04% 2.8M 0.40% 18 [div.c:42 -> div.c:39] div 15.17% 1.2M 0.16% 7 [random_r.c:357 -> random_r.c:380] libc-2.27.so 5.11% 402.0K 0.04% 2 [div.c:27 -> div.c:28] div 4.87% 381.6K 0.04% 2 [random.c:288 -> random.c:291] libc-2.27.so 4.53% 381.0K 0.04% 2 [div.c:40 -> div.c:40] div 3.85% 300.9K 0.02% 1 [div.c:22 -> div.c:25] div 3.08% 241.1K 0.02% 1 [rand.c:26 -> rand.c:27] libc-2.27.so 3.06% 240.0K 0.02% 1 [random.c:291 -> random.c:291] libc-2.27.so 2.78% 215.7K 0.02% 1 [random.c:298 -> random.c:298] libc-2.27.so 2.52% 198.3K 0.02% 1 [random.c:293 -> random.c:293] libc-2.27.so 2.36% 184.8K 0.02% 1 [rand.c:28 -> rand.c:28] libc-2.27.so 2.33% 180.5K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.28% 176.7K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.20% 168.8K 0.02% 1 [rand@plt+0 -> rand@plt+0] div 1.98% 158.2K 0.02% 1 [random_r.c:388 -> random_r.c:388] libc-2.27.so 1.57% 123.3K 0.02% 1 [div.c:42 -> div.c:44] div 1.44% 116.0K 0.42% 19 [random_r.c:357 -> random_r.c:394] libc-2.27.so 0.25% 182.5K 0.02% 1 [random_r.c:388 -> random_r.c:391] libc-2.27.so 0.00% 48 1.07% 48 [x86_pmu_enable+284 -> x86_pmu_enable+298] [kernel.kallsyms] 0.00% 74 1.64% 74 [vm_mmap_pgoff+0 -> vm_mmap_pgoff+92] [kernel.kallsyms] 0.00% 73 1.62% 73 [vm_mmap+0 -> vm_mmap+48] [kernel.kallsyms] 0.00% 63 0.69% 31 [up_write+0 -> up_write+34] [kernel.kallsyms] 0.00% 13 0.29% 13 [setup_arg_pages+396 -> setup_arg_pages+413] [kernel.kallsyms] 0.00% 3 0.07% 3 [setup_arg_pages+418 -> setup_arg_pages+450] [kernel.kallsyms] 0.00% 616 6.84% 308 [security_mmap_file+0 -> security_mmap_file+72] [kernel.kallsyms] 0.00% 23 0.51% 23 [security_mmap_file+77 -> security_mmap_file+87] [kernel.kallsyms] 0.00% 4 0.02% 1 [sched_clock+0 -> sched_clock+4] [kernel.kallsyms] 0.00% 4 0.02% 1 [sched_clock+9 -> sched_clock+12] [kernel.kallsyms] 0.00% 1 0.02% 1 [rcu_nmi_exit+0 -> rcu_nmi_exit+9] [kernel.kallsyms] Committer testing: This should provide material for hours of endless joy, both from looking for suspicious things in the implementation of this patch, such as the top one: # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object 2.17% 1.7M 0.08% 607 [compiler.h:199 -> common.c:221] [kernel.vmlinux] As well from things that look legit: # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object 0.16% 123.0K 0.60% 4.7K [nospec-branch.h:265 -> nospec-branch.h:278] [kernel.vmlinux] :-) Very short system wide taken branches session: # perf record -h -b Usage: perf record [] [] or: perf record [] -- [] -b, --branch-any sample any taken branches # # perf record -b ^C[ perf record: Woken up 595 times to write data ] [ perf record: Captured and wrote 156.672 MB perf.data (196873 samples) ] # # perf evlist -v cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY # # perf report --total-cycles --stdio # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 6M of event 'cycles' # Event count (approx.): 6299936 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... ...................................................................... .................... # 2.17% 1.7M 0.08% 607 [compiler.h:199 -> common.c:221] [kernel.vmlinux] 1.75% 1.3M 8.34% 65.5K [memset-vec-unaligned-erms.S:147 -> memset-vec-unaligned-erms.S:151] libc-2.29.so 0.72% 544.5K 0.03% 230 [entry_64.S:657 -> entry_64.S:662] [kernel.vmlinux] 0.56% 541.8K 0.09% 672 [compiler.h:199 -> common.c:300] [kernel.vmlinux] 0.39% 293.2K 0.01% 104 [list_debug.c:43 -> list_debug.c:61] [kernel.vmlinux] 0.36% 278.6K 0.03% 272 [entry_64.S:1289 -> entry_64.S:1308] [kernel.vmlinux] 0.30% 260.8K 0.07% 564 [clear_page_64.S:47 -> clear_page_64.S:50] [kernel.vmlinux] 0.28% 215.3K 0.05% 369 [traps.c:623 -> traps.c:628] [kernel.vmlinux] 0.23% 178.1K 0.04% 278 [entry_64.S:271 -> entry_64.S:275] [kernel.vmlinux] 0.20% 152.6K 0.09% 706 [paravirt.c:177 -> paravirt.c:179] [kernel.vmlinux] 0.20% 155.8K 0.05% 373 [entry_64.S:153 -> entry_64.S:175] [kernel.vmlinux] 0.18% 136.6K 0.03% 222 [msr.h:105 -> msr.h:166] [kernel.vmlinux] 0.16% 123.0K 0.60% 4.7K [nospec-branch.h:265 -> nospec-branch.h:278] [kernel.vmlinux] 0.16% 118.3K 0.01% 44 [entry_64.S:632 -> entry_64.S:657] [kernel.vmlinux] 0.14% 104.5K 0.00% 28 [rwsem.c:1541 -> rwsem.c:1544] [kernel.vmlinux] 0.13% 99.2K 0.01% 53 [spinlock.c:150 -> spinlock.c:152] [kernel.vmlinux] 0.13% 95.5K 0.00% 35 [swap.c:456 -> swap.c:471] [kernel.vmlinux] 0.12% 96.2K 0.05% 407 [copy_user_64.S:175 -> copy_user_64.S:209] [kernel.vmlinux] 0.11% 85.9K 0.00% 31 [swap.c:400 -> page-flags.h:188] [kernel.vmlinux] 0.10% 73.0K 0.01% 52 [paravirt.h:763 -> list.h:131] [kernel.vmlinux] 0.07% 56.2K 0.03% 214 [filemap.c:1524 -> filemap.c:1557] [kernel.vmlinux] 0.07% 54.2K 0.02% 145 [memory.c:1032 -> memory.c:1049] [kernel.vmlinux] 0.07% 50.3K 0.00% 39 [mmzone.c:49 -> mmzone.c:69] [kernel.vmlinux] 0.06% 48.3K 0.01% 40 [paravirt.h:768 -> page_alloc.c:3304] [kernel.vmlinux] 0.06% 46.7K 0.02% 155 [memory.c:1032 -> memory.c:1056] [kernel.vmlinux] 0.06% 46.9K 0.01% 103 [swap.c:867 -> swap.c:902] [kernel.vmlinux] 0.06% 47.8K 0.00% 34 [entry_64.S:1201 -> entry_64.S:1202] [kernel.vmlinux] ----------------------------------------------------------- v7: --- Use use_browser in report__browse_block_hists for supporting stdio and potential tui mode. v6: --- Create report__browse_block_hists in block-info.c (codes are moved from builtin-report.c). It's called from perf_evlist__tty_browse_hists. v5: --- 1. Move all block functions to block-info.c 2. Move the code of setting ms in block hist_entry to other patch. v4: --- 1. Use new option '--total-cycles' to replace '-s total_cycles' in v3. 2. Move block info collection out of block info printing. v3: --- 1. Use common function block_info__process_sym to process the blocks per symbol. 2. Remove the nasty hack for skipping calculation of column length 3. Some minor cleanup Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191107074719.26139-6-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bc15b9dcccd6..992b18bdd723 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -51,6 +51,7 @@ #include "util/util.h" // perf_tip() #include "ui/ui.h" #include "ui/progress.h" +#include "util/block-info.h" #include #include @@ -96,10 +97,13 @@ struct report { float min_percent; u64 nr_entries; u64 queue_size; + u64 total_cycles; int socket_filter; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); struct branch_type_stat brtype_stat; bool symbol_ipc; + bool total_cycles_mode; + struct block_report *block_reports; }; static int report__config(const char *var, const char *value, void *cb) @@ -290,9 +294,10 @@ static int process_sample_event(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - if (ui__has_annotation() || rep->symbol_ipc) { + if (ui__has_annotation() || rep->symbol_ipc || rep->total_cycles_mode) { hist__account_cycles(sample->branch_stack, &al, sample, - rep->nonany_branch_mode, NULL); + rep->nonany_branch_mode, + &rep->total_cycles); } ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep); @@ -485,6 +490,7 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist, const char *help) { struct evsel *pos; + int i = 0; if (!quiet) { fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", @@ -500,6 +506,13 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist, continue; hists__fprintf_nr_sample_events(hists, rep, evname, stdout); + + if (rep->total_cycles_mode) { + report__browse_block_hists(&rep->block_reports[i++].hist, + 0, pos); + continue; + } + hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout, !(symbol_conf.use_callchain || symbol_conf.show_branchflag_count)); @@ -925,6 +938,13 @@ static int __cmd_report(struct report *rep) report__output_resort(rep); + if (rep->total_cycles_mode) { + rep->block_reports = block_info__create_report(session->evlist, + rep->total_cycles); + if (!rep->block_reports) + return -1; + } + return report__browse_hists(rep); } @@ -1209,6 +1229,8 @@ int cmd_report(int argc, const char **argv) "Set time quantum for time sort key (default 100ms)", parse_time_quantum), OPTS_EVSWITCH(&report.evswitch), + OPT_BOOLEAN(0, "total-cycles", &report.total_cycles_mode, + "Sort all blocks by 'Sampled Cycles%'"), OPT_END() }; struct perf_data data = { @@ -1371,6 +1393,17 @@ repeat: goto error; } + if (report.total_cycles_mode) { + if (sort__mode != SORT_MODE__BRANCH) + report.total_cycles_mode = false; + else if (!report.use_stdio) { + pr_err("Error: --total-cycles can be only used together with --stdio\n"); + goto error; + } else { + sort_order = "sym"; + } + } + if (strcmp(input_name, "-") != 0) setup_browser(true); else @@ -1421,7 +1454,8 @@ repeat: * so don't allocate extra space that won't be used in the stdio * implementation. */ - if (ui__has_annotation() || report.symbol_ipc) { + if (ui__has_annotation() || report.symbol_ipc || + report.total_cycles_mode) { ret = symbol__annotation_init(); if (ret < 0) goto error; @@ -1482,6 +1516,10 @@ error: itrace_synth_opts__clear_time_range(&itrace_synth_opts); zfree(&report.ptime_range); } + + if (report.block_reports) + zfree(&report.block_reports); + zstd_fini(&(session->zstd_data)); perf_session__delete(session); return ret; -- cgit From 0b49f83657d5fb9a35c592faab7c5ea16e387539 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 7 Nov 2019 15:47:18 +0800 Subject: perf report: Support --percent-limit for --total-cycles We have already supported the '--total-cycles' option in previous patch. It's also useful to show entries only above a threshold percent. This patch enables '--percent-limit' for not showing entries under that percent. For example: perf report --total-cycles --stdio --percent-limit 1 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 2M of event 'cycles' # Event count (approx.): 2753248 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... ................................................................. .................... # 26.04% 2.8M 0.40% 18 [div.c:42 -> div.c:39] div 15.17% 1.2M 0.16% 7 [random_r.c:357 -> random_r.c:380] libc-2.27.so 5.11% 402.0K 0.04% 2 [div.c:27 -> div.c:28] div 4.87% 381.6K 0.04% 2 [random.c:288 -> random.c:291] libc-2.27.so 4.53% 381.0K 0.04% 2 [div.c:40 -> div.c:40] div 3.85% 300.9K 0.02% 1 [div.c:22 -> div.c:25] div 3.08% 241.1K 0.02% 1 [rand.c:26 -> rand.c:27] libc-2.27.so 3.06% 240.0K 0.02% 1 [random.c:291 -> random.c:291] libc-2.27.so 2.78% 215.7K 0.02% 1 [random.c:298 -> random.c:298] libc-2.27.so 2.52% 198.3K 0.02% 1 [random.c:293 -> random.c:293] libc-2.27.so 2.36% 184.8K 0.02% 1 [rand.c:28 -> rand.c:28] libc-2.27.so 2.33% 180.5K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.28% 176.7K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.20% 168.8K 0.02% 1 [rand@plt+0 -> rand@plt+0] div 1.98% 158.2K 0.02% 1 [random_r.c:388 -> random_r.c:388] libc-2.27.so 1.57% 123.3K 0.02% 1 [div.c:42 -> div.c:44] div 1.44% 116.0K 0.42% 19 [random_r.c:357 -> random_r.c:394] libc-2.27.so Committer testing: From second exapmple onwards slightly edited for brevity: # perf report --total-cycles --percent-limit 2 --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 6M of event 'cycles' # Event count (approx.): 6299936 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... ...................................................................... .................... # 2.17% 1.7M 0.08% 607 [compiler.h:199 -> common.c:221] [kernel.vmlinux] # # (Tip: Create an archive with symtabs to analyse on other machine: perf archive) # # perf report --total-cycles --percent-limit 1 --stdio # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object 2.17% 1.7M 0.08% 607 [compiler.h:199 -> common.c:221] [kernel.vmlinux] 1.75% 1.3M 8.34% 65.5K [memset-vec-unaligned-erms.S:147 -> memset-vec-unaligned-erms.S:151] libc-2.29.so # # perf report --total-cycles --percent-limit 0.7 --stdio # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object 2.17% 1.7M 0.08% 607 [compiler.h:199 -> common.c:221] [kernel.vmlinux] 1.75% 1.3M 8.34% 65.5K [memset-vec-unaligned-erms.S:147 -> memset-vec-unaligned-erms.S:151] libc-2.29.so 0.72% 544.5K 0.03% 230 [entry_64.S:657 -> entry_64.S:662] [kernel.vmlinux] # ------------------------------------------- It only shows the entries which 'Sampled Cycles%' > 1%. v7: --- No functional change. Only fix the conflict issue because previous patches are changed. v6: --- No functional change. Only fix the conflict issue because previous patches are changed. v5: --- No functional change. Only fix the conflict issue because previous patches are changed. v4: --- No functional change. Only fix the build issue because previous patches are changed. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191107074719.26139-7-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 992b18bdd723..ca41187525ed 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -509,7 +509,7 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist, if (rep->total_cycles_mode) { report__browse_block_hists(&rep->block_reports[i++].hist, - 0, pos); + rep->min_percent, pos); continue; } -- cgit From 7fa46cbf20d327d78114b1c8c7e69fabe7c57794 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 7 Nov 2019 15:47:19 +0800 Subject: perf report: Sort by sampled cycles percent per block for tui Previous patch has implemented a new option "--total-cycles". But only stdio mode is supported. This patch supports the tui mode and support '--percent-limit'. For example, perf record -b ./div perf report --total-cycles --percent-limit 1 # Samples: 2753248 of event 'cycles' Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object 26.04% 2.8M 0.40% 18 [div.c:42 -> div.c:39] div 15.17% 1.2M 0.16% 7 [random_r.c:357 -> random_r.c:380] libc-2.27.so 5.11% 402.0K 0.04% 2 [div.c:27 -> div.c:28] div 4.87% 381.6K 0.04% 2 [random.c:288 -> random.c:291] libc-2.27.so 4.53% 381.0K 0.04% 2 [div.c:40 -> div.c:40] div 3.85% 300.9K 0.02% 1 [div.c:22 -> div.c:25] div 3.08% 241.1K 0.02% 1 [rand.c:26 -> rand.c:27] libc-2.27.so 3.06% 240.0K 0.02% 1 [random.c:291 -> random.c:291] libc-2.27.so 2.78% 215.7K 0.02% 1 [random.c:298 -> random.c:298] libc-2.27.so 2.52% 198.3K 0.02% 1 [random.c:293 -> random.c:293] libc-2.27.so 2.36% 184.8K 0.02% 1 [rand.c:28 -> rand.c:28] libc-2.27.so 2.33% 180.5K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.28% 176.7K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.20% 168.8K 0.02% 1 [rand@plt+0 -> rand@plt+0] div 1.98% 158.2K 0.02% 1 [random_r.c:388 -> random_r.c:388] libc-2.27.so 1.57% 123.3K 0.02% 1 [div.c:42 -> div.c:44] div 1.44% 116.0K 0.42% 19 [random_r.c:357 -> random_r.c:394] libc-2.27.so -------------------------------------------------- v7: --- 1. Since we have used use_browser in report__browse_block_hists to support stdio mode, now we also add supporting for tui. 2. Move block tui browser code from ui/browsers/hists.c to block-info.c. v6: --- Create report__tui_browse_block_hists in block-info.c (codes are moved from builtin-report.c). v5: --- Fix a crash issue when running perf report without '--total-cycles'. The issue is because the internal flag is renamed from 'total_cycles' to 'total_cycles_mode' in previous patch but this patch still uses 'total_cycles' to check if the '--total-cycles' option is enabled, which causes the code to be inconsistent. v4: --- Since the block collection is moved out of printing in previous patch, this patch is updated accordingly for tui supporting. v3: --- Minor change since the function name is changed: block_total_cycles_percent -> block_info__total_cycles_percent Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191107074719.26139-8-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ca41187525ed..1e81985b7d56 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -485,6 +485,22 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report return ret + fprintf(fp, "\n#\n"); } +static int perf_evlist__tui_block_hists_browse(struct evlist *evlist, + struct report *rep) +{ + struct evsel *pos; + int i = 0, ret; + + evlist__for_each_entry(evlist, pos) { + ret = report__browse_block_hists(&rep->block_reports[i++].hist, + rep->min_percent, pos); + if (ret != 0) + return ret; + } + + return 0; +} + static int perf_evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, const char *help) @@ -595,6 +611,11 @@ static int report__browse_hists(struct report *rep) switch (use_browser) { case 1: + if (rep->total_cycles_mode) { + ret = perf_evlist__tui_block_hists_browse(evlist, rep); + break; + } + ret = perf_evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, &session->header.env, @@ -1396,12 +1417,8 @@ repeat: if (report.total_cycles_mode) { if (sort__mode != SORT_MODE__BRANCH) report.total_cycles_mode = false; - else if (!report.use_stdio) { - pr_err("Error: --total-cycles can be only used together with --stdio\n"); - goto error; - } else { + else sort_order = "sym"; - } } if (strcmp(input_name, "-") != 0) -- cgit From 2975489458c59ce2e348b1b3aef5d8d2acb5cc8d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Nov 2019 11:10:00 -0300 Subject: perf annotate: Pass a 'map_symbol' in places receiving a pair of 'map' and 'symbol' pointers We are already passing things like: symbol__annotate(ms->sym, ms->map, ...) So shorten the signature of such functions to receive the 'map_symbol' pointer. This also paves the way to having the 'struct map_groups' pointer in the 'struct map_symbol' so that we can get rid of 'struct map'->groups. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-23yx8v1t41nzpkpi7rdrozww@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1e81985b7d56..585805f51f15 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -680,7 +680,7 @@ static int hists__resort_cb(struct hist_entry *he, void *arg) if (rep->symbol_ipc && sym && !sym->annotate2) { struct evsel *evsel = hists_to_evsel(he->hists); - symbol__annotate2(sym, he->ms.map, evsel, + symbol__annotate2(&he->ms, evsel, &annotation__default_options, NULL); } -- cgit From 99459a84d5870a88274b4f10bc85c3e39e1d642c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Nov 2019 12:26:19 -0300 Subject: perf map: Move maj/min/ino/ino_generation to separate struct And this patch highlights where these fields are being used: in the sort order where it uses it to compare maps and classify samples taking into account not just the DSO, but those DSO id fields. I think these should be used to differentiate DSOs with the same name but different 'struct dso_id' fields, i.e. these fields should move to 'struct dso' and then be used as part of the key when doing lookups for DSOs, in addition to the DSO name. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-8v5isitqy0dup47nnwkpc80f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 585805f51f15..04c197d3beea 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -771,7 +771,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) map->prot & PROT_EXEC ? 'x' : '-', map->flags & MAP_SHARED ? 's' : 'p', map->pgoff, - map->ino, map->dso->name); + map->dso_id.ino, map->dso->name); } return printed; -- cgit From 0e3149f86b99ddabde8c5029eea0a9267e34f1a0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Nov 2019 18:44:22 -0300 Subject: perf dso: Move dso_id from 'struct map' to 'struct dso' And take it into account when looking up DSOs when we have the dso_id fields obtained from somewhere, like from PERF_RECORD_MMAP2 records. Instances of struct map pointing to the same DSO pathname but with anything in dso_id different are in fact different DSOs, so better have different 'struct dso' instances to reflect that. At some point we may want to get copies of the contents of the different objects if we want to do correct annotation or other analysis. With this we get 'struct map' 24 bytes leaner: $ pahole -C map ~/bin/perf struct map { union { struct rb_node rb_node __attribute__((__aligned__(8))); /* 0 24 */ struct list_head node; /* 0 16 */ } __attribute__((__aligned__(8))); /* 0 24 */ u64 start; /* 24 8 */ u64 end; /* 32 8 */ _Bool erange_warned:1; /* 40: 0 1 */ _Bool priv:1; /* 40: 1 1 */ /* XXX 6 bits hole, try to pack */ /* XXX 3 bytes hole, try to pack */ u32 prot; /* 44 4 */ u64 pgoff; /* 48 8 */ u64 reloc; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ u64 (*map_ip)(struct map *, u64); /* 64 8 */ u64 (*unmap_ip)(struct map *, u64); /* 72 8 */ struct dso * dso; /* 80 8 */ refcount_t refcnt; /* 88 4 */ u32 flags; /* 92 4 */ /* size: 96, cachelines: 2, members: 13 */ /* sum members: 92, holes: 1, sum holes: 3 */ /* sum bitfield members: 2 bits, bit holes: 1, sum bit holes: 6 bits */ /* forced alignments: 1 */ /* last cacheline: 32 bytes */ } __attribute__((__aligned__(8))); $ Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-g4hxxmraplo7wfjmk384mfsb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 04c197d3beea..0b6157c02c88 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -771,7 +771,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) map->prot & PROT_EXEC ? 'x' : '-', map->flags & MAP_SHARED ? 's' : 'p', map->pgoff, - map->dso_id.ino, map->dso->name); + map->dso->id.ino, map->dso->name); } return printed; -- cgit From 848a5e507e26176902e328bd8ae4a5e9c7d2bafe Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 18 Nov 2019 22:08:49 +0800 Subject: perf report: Jump to symbol source view from total cycles view This patch supports jumping from tui total cycles view to symbol source view. For example, perf record -b ./div perf report --total-cycles In total cycles view, we can select one entry and press 'a' or press ENTER key to jump to symbol source view. This patch also sets sort_order to NULL in cmd_report() which will use the default branch sort order. The percent value in new annotate view will be consistent with the percent in annotate view switched from perf report (we observed the original percent gap with previous patches). v2: --- Fix the 'make NO_SLANG=1' error. (set __maybe_unused to annotation_opts in block_hists_tui_browse()). Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191118140849.20714-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0b6157c02c88..ab0f6e516b03 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -493,7 +493,9 @@ static int perf_evlist__tui_block_hists_browse(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { ret = report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos); + rep->min_percent, pos, + &rep->session->header.env, + &rep->annotation_opts); if (ret != 0) return ret; } @@ -525,7 +527,8 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist, if (rep->total_cycles_mode) { report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos); + rep->min_percent, pos, + NULL, NULL); continue; } @@ -1418,7 +1421,7 @@ repeat: if (sort__mode != SORT_MODE__BRANCH) report.total_cycles_mode = false; else - sort_order = "sym"; + sort_order = NULL; } if (strcmp(input_name, "-") != 0) -- cgit From 79b6bb73f888933cbcd20b0ef3976cde67951b72 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 25 Nov 2019 21:58:33 -0300 Subject: perf maps: Merge 'struct maps' with 'struct map_groups' And pick the shortest name: 'struct maps'. The split existed because we used to have two groups of maps, one for functions and one for variables, but that only complicated things, sometimes we needed to figure out what was at some address and then had to first try it on the functions group and if that failed, fall back to the variables one. That split is long gone, so for quite a while we had only one struct maps per struct map_groups, simplify things by combining those structs. First patch is the minimum needed to merge both, follow up patches will rename 'thread->mg' to 'thread->maps', etc. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-hom6639ro7020o708trhxh59@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ab0f6e516b03..729d68427cf7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -780,11 +780,6 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) return printed; } -static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp) -{ - return maps__fprintf_task(&mg->maps, indent, fp); -} - static void task__print_level(struct task *task, FILE *fp, int level) { struct thread *thread = task->thread; @@ -795,7 +790,7 @@ static void task__print_level(struct task *task, FILE *fp, int level) fprintf(fp, "%s\n", thread__comm_str(thread)); - map_groups__fprintf_task(thread->mg, comm_indent, fp); + maps__fprintf_task(thread->mg, comm_indent, fp); if (!list_empty(&task->children)) { list_for_each_entry(child, &task->children, list) -- cgit From fe87797dea79b59e97a4ea67441bf91f2905bf23 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 25 Nov 2019 22:07:43 -0300 Subject: perf thread: Rename thread->mg to thread->maps One more step on the merge of 'struct maps' with 'struct map_groups'. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-69vcr8pubpym90skxhmbwhiw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 729d68427cf7..830d563de889 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -790,7 +790,7 @@ static void task__print_level(struct task *task, FILE *fp, int level) fprintf(fp, "%s\n", thread__comm_str(thread)); - maps__fprintf_task(thread->mg, comm_indent, fp); + maps__fprintf_task(thread->maps, comm_indent, fp); if (!list_empty(&task->children)) { list_for_each_entry(child, &task->children, list) -- cgit From bb30acae4c4dacfa2622387c5ad5563246810583 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 14 Nov 2019 18:52:13 +0530 Subject: perf report: Bail out --mem-mode if mem info is not available If perf.data is recorded without -d, don't allow user to use --mem-mode with 'perf report'. symbol_daddr and phys_daddr can be recorded separately and may be present in the perf.data but at the report time they are associated with mem-mode fields and thus this restriction applies to them as well. Before: $ perf record ls $ perf report --mem-mode --stdio # Overhead Local Weight Memory access Symbol # ........ ............ ............. ....................... 55.56% 0 N/A [k] 0xffffffff81a00ae7 After: $ perf report --mem-mode --stdio Error: Selected --mem-mode but no mem data. Did you call perf record without -d? Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20191114132213.5419-4-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 830d563de889..387311c67264 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -388,6 +388,14 @@ static int report__setup_sample_type(struct report *rep) } } + if (sort__mode == SORT_MODE__MEMORY) { + if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) { + ui__error("Selected --mem-mode but no mem data. " + "Did you call perf record without -d?\n"); + return -1; + } + } + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && (sample_type & PERF_SAMPLE_STACK_USER)) { -- cgit