summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-report.c
diff options
context:
space:
mode:
authorJin Yao <yao.jin@linux.intel.com>2019-11-07 15:47:17 +0800
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-11-07 10:14:48 -0300
commit6f7164fa231a5f360e576593c547bea7dc56ddbc (patch)
treeb8164ed1ea3e65aa866310d447a760f42350e08c /tools/perf/builtin-report.c
parentb65a7d372b1a55db6fb48a5b3c48941eb68716cb (diff)
perf report: Sort by sampled cycles percent per block for stdio
It would be useful to support sorting for all blocks by the sampled cycles percent per block. This is useful to concentrate on the globally hottest blocks. This patch implements a new option "--total-cycles" which sorts all blocks by 'Sampled Cycles%'. The 'Sampled Cycles%' is the percent: percent = block sampled cycles aggregation / total sampled cycles Note that, this patch only supports "--stdio" mode. For example, # perf record -b ./div # perf report --total-cycles --stdio # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 2M of event 'cycles' # Event count (approx.): 2753248 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... ................................................ ................. # 26.04% 2.8M 0.40% 18 [div.c:42 -> div.c:39] div 15.17% 1.2M 0.16% 7 [random_r.c:357 -> random_r.c:380] libc-2.27.so 5.11% 402.0K 0.04% 2 [div.c:27 -> div.c:28] div 4.87% 381.6K 0.04% 2 [random.c:288 -> random.c:291] libc-2.27.so 4.53% 381.0K 0.04% 2 [div.c:40 -> div.c:40] div 3.85% 300.9K 0.02% 1 [div.c:22 -> div.c:25] div 3.08% 241.1K 0.02% 1 [rand.c:26 -> rand.c:27] libc-2.27.so 3.06% 240.0K 0.02% 1 [random.c:291 -> random.c:291] libc-2.27.so 2.78% 215.7K 0.02% 1 [random.c:298 -> random.c:298] libc-2.27.so 2.52% 198.3K 0.02% 1 [random.c:293 -> random.c:293] libc-2.27.so 2.36% 184.8K 0.02% 1 [rand.c:28 -> rand.c:28] libc-2.27.so 2.33% 180.5K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.28% 176.7K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.20% 168.8K 0.02% 1 [rand@plt+0 -> rand@plt+0] div 1.98% 158.2K 0.02% 1 [random_r.c:388 -> random_r.c:388] libc-2.27.so 1.57% 123.3K 0.02% 1 [div.c:42 -> div.c:44] div 1.44% 116.0K 0.42% 19 [random_r.c:357 -> random_r.c:394] libc-2.27.so 0.25% 182.5K 0.02% 1 [random_r.c:388 -> random_r.c:391] libc-2.27.so 0.00% 48 1.07% 48 [x86_pmu_enable+284 -> x86_pmu_enable+298] [kernel.kallsyms] 0.00% 74 1.64% 74 [vm_mmap_pgoff+0 -> vm_mmap_pgoff+92] [kernel.kallsyms] 0.00% 73 1.62% 73 [vm_mmap+0 -> vm_mmap+48] [kernel.kallsyms] 0.00% 63 0.69% 31 [up_write+0 -> up_write+34] [kernel.kallsyms] 0.00% 13 0.29% 13 [setup_arg_pages+396 -> setup_arg_pages+413] [kernel.kallsyms] 0.00% 3 0.07% 3 [setup_arg_pages+418 -> setup_arg_pages+450] [kernel.kallsyms] 0.00% 616 6.84% 308 [security_mmap_file+0 -> security_mmap_file+72] [kernel.kallsyms] 0.00% 23 0.51% 23 [security_mmap_file+77 -> security_mmap_file+87] [kernel.kallsyms] 0.00% 4 0.02% 1 [sched_clock+0 -> sched_clock+4] [kernel.kallsyms] 0.00% 4 0.02% 1 [sched_clock+9 -> sched_clock+12] [kernel.kallsyms] 0.00% 1 0.02% 1 [rcu_nmi_exit+0 -> rcu_nmi_exit+9] [kernel.kallsyms] Committer testing: This should provide material for hours of endless joy, both from looking for suspicious things in the implementation of this patch, such as the top one: # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object 2.17% 1.7M 0.08% 607 [compiler.h:199 -> common.c:221] [kernel.vmlinux] As well from things that look legit: # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object 0.16% 123.0K 0.60% 4.7K [nospec-branch.h:265 -> nospec-branch.h:278] [kernel.vmlinux] :-) Very short system wide taken branches session: # perf record -h -b Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -b, --branch-any sample any taken branches # # perf record -b ^C[ perf record: Woken up 595 times to write data ] [ perf record: Captured and wrote 156.672 MB perf.data (196873 samples) ] # # perf evlist -v cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY # # perf report --total-cycles --stdio # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 6M of event 'cycles' # Event count (approx.): 6299936 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... ...................................................................... .................... # 2.17% 1.7M 0.08% 607 [compiler.h:199 -> common.c:221] [kernel.vmlinux] 1.75% 1.3M 8.34% 65.5K [memset-vec-unaligned-erms.S:147 -> memset-vec-unaligned-erms.S:151] libc-2.29.so 0.72% 544.5K 0.03% 230 [entry_64.S:657 -> entry_64.S:662] [kernel.vmlinux] 0.56% 541.8K 0.09% 672 [compiler.h:199 -> common.c:300] [kernel.vmlinux] 0.39% 293.2K 0.01% 104 [list_debug.c:43 -> list_debug.c:61] [kernel.vmlinux] 0.36% 278.6K 0.03% 272 [entry_64.S:1289 -> entry_64.S:1308] [kernel.vmlinux] 0.30% 260.8K 0.07% 564 [clear_page_64.S:47 -> clear_page_64.S:50] [kernel.vmlinux] 0.28% 215.3K 0.05% 369 [traps.c:623 -> traps.c:628] [kernel.vmlinux] 0.23% 178.1K 0.04% 278 [entry_64.S:271 -> entry_64.S:275] [kernel.vmlinux] 0.20% 152.6K 0.09% 706 [paravirt.c:177 -> paravirt.c:179] [kernel.vmlinux] 0.20% 155.8K 0.05% 373 [entry_64.S:153 -> entry_64.S:175] [kernel.vmlinux] 0.18% 136.6K 0.03% 222 [msr.h:105 -> msr.h:166] [kernel.vmlinux] 0.16% 123.0K 0.60% 4.7K [nospec-branch.h:265 -> nospec-branch.h:278] [kernel.vmlinux] 0.16% 118.3K 0.01% 44 [entry_64.S:632 -> entry_64.S:657] [kernel.vmlinux] 0.14% 104.5K 0.00% 28 [rwsem.c:1541 -> rwsem.c:1544] [kernel.vmlinux] 0.13% 99.2K 0.01% 53 [spinlock.c:150 -> spinlock.c:152] [kernel.vmlinux] 0.13% 95.5K 0.00% 35 [swap.c:456 -> swap.c:471] [kernel.vmlinux] 0.12% 96.2K 0.05% 407 [copy_user_64.S:175 -> copy_user_64.S:209] [kernel.vmlinux] 0.11% 85.9K 0.00% 31 [swap.c:400 -> page-flags.h:188] [kernel.vmlinux] 0.10% 73.0K 0.01% 52 [paravirt.h:763 -> list.h:131] [kernel.vmlinux] 0.07% 56.2K 0.03% 214 [filemap.c:1524 -> filemap.c:1557] [kernel.vmlinux] 0.07% 54.2K 0.02% 145 [memory.c:1032 -> memory.c:1049] [kernel.vmlinux] 0.07% 50.3K 0.00% 39 [mmzone.c:49 -> mmzone.c:69] [kernel.vmlinux] 0.06% 48.3K 0.01% 40 [paravirt.h:768 -> page_alloc.c:3304] [kernel.vmlinux] 0.06% 46.7K 0.02% 155 [memory.c:1032 -> memory.c:1056] [kernel.vmlinux] 0.06% 46.9K 0.01% 103 [swap.c:867 -> swap.c:902] [kernel.vmlinux] 0.06% 47.8K 0.00% 34 [entry_64.S:1201 -> entry_64.S:1202] [kernel.vmlinux] ----------------------------------------------------------- v7: --- Use use_browser in report__browse_block_hists for supporting stdio and potential tui mode. v6: --- Create report__browse_block_hists in block-info.c (codes are moved from builtin-report.c). It's called from perf_evlist__tty_browse_hists. v5: --- 1. Move all block functions to block-info.c 2. Move the code of setting ms in block hist_entry to other patch. v4: --- 1. Use new option '--total-cycles' to replace '-s total_cycles' in v3. 2. Move block info collection out of block info printing. v3: --- 1. Use common function block_info__process_sym to process the blocks per symbol. 2. Remove the nasty hack for skipping calculation of column length 3. Some minor cleanup Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jin Yao <yao.jin@intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20191107074719.26139-6-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-report.c')
-rw-r--r--tools/perf/builtin-report.c44
1 files changed, 41 insertions, 3 deletions
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index bc15b9dcccd6..992b18bdd723 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -51,6 +51,7 @@
#include "util/util.h" // perf_tip()
#include "ui/ui.h"
#include "ui/progress.h"
+#include "util/block-info.h"
#include <dlfcn.h>
#include <errno.h>
@@ -96,10 +97,13 @@ struct report {
float min_percent;
u64 nr_entries;
u64 queue_size;
+ u64 total_cycles;
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct branch_type_stat brtype_stat;
bool symbol_ipc;
+ bool total_cycles_mode;
+ struct block_report *block_reports;
};
static int report__config(const char *var, const char *value, void *cb)
@@ -290,9 +294,10 @@ static int process_sample_event(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;
- if (ui__has_annotation() || rep->symbol_ipc) {
+ if (ui__has_annotation() || rep->symbol_ipc || rep->total_cycles_mode) {
hist__account_cycles(sample->branch_stack, &al, sample,
- rep->nonany_branch_mode, NULL);
+ rep->nonany_branch_mode,
+ &rep->total_cycles);
}
ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
@@ -485,6 +490,7 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist,
const char *help)
{
struct evsel *pos;
+ int i = 0;
if (!quiet) {
fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n",
@@ -500,6 +506,13 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist,
continue;
hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
+
+ if (rep->total_cycles_mode) {
+ report__browse_block_hists(&rep->block_reports[i++].hist,
+ 0, pos);
+ continue;
+ }
+
hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
!(symbol_conf.use_callchain ||
symbol_conf.show_branchflag_count));
@@ -925,6 +938,13 @@ static int __cmd_report(struct report *rep)
report__output_resort(rep);
+ if (rep->total_cycles_mode) {
+ rep->block_reports = block_info__create_report(session->evlist,
+ rep->total_cycles);
+ if (!rep->block_reports)
+ return -1;
+ }
+
return report__browse_hists(rep);
}
@@ -1209,6 +1229,8 @@ int cmd_report(int argc, const char **argv)
"Set time quantum for time sort key (default 100ms)",
parse_time_quantum),
OPTS_EVSWITCH(&report.evswitch),
+ OPT_BOOLEAN(0, "total-cycles", &report.total_cycles_mode,
+ "Sort all blocks by 'Sampled Cycles%'"),
OPT_END()
};
struct perf_data data = {
@@ -1371,6 +1393,17 @@ repeat:
goto error;
}
+ if (report.total_cycles_mode) {
+ if (sort__mode != SORT_MODE__BRANCH)
+ report.total_cycles_mode = false;
+ else if (!report.use_stdio) {
+ pr_err("Error: --total-cycles can be only used together with --stdio\n");
+ goto error;
+ } else {
+ sort_order = "sym";
+ }
+ }
+
if (strcmp(input_name, "-") != 0)
setup_browser(true);
else
@@ -1421,7 +1454,8 @@ repeat:
* so don't allocate extra space that won't be used in the stdio
* implementation.
*/
- if (ui__has_annotation() || report.symbol_ipc) {
+ if (ui__has_annotation() || report.symbol_ipc ||
+ report.total_cycles_mode) {
ret = symbol__annotation_init();
if (ret < 0)
goto error;
@@ -1482,6 +1516,10 @@ error:
itrace_synth_opts__clear_time_range(&itrace_synth_opts);
zfree(&report.ptime_range);
}
+
+ if (report.block_reports)
+ zfree(&report.block_reports);
+
zstd_fini(&(session->zstd_data));
perf_session__delete(session);
return ret;