From 112c554702cf1ea384ef71a116e3a2c10aeed116 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 5 Feb 2024 06:58:19 -0800 Subject: perf script: Print source line for each jump in brstackinsn With the srcline option, the perf script only prints a source line at the beginning of a sample with call/ret from functions, but not for each jump in brstackinsn. It's useful to print a source line for each jump in brstackinsn when the end user analyze the full assembler sequences of branch sequences for the sample. The srccode option can also be used to locate the source code line. However, it's printed almost for every line and makes the output less readable. $perf script -F +brstackinsn,+srcline --xed Before the patch, tchain_edit_deb 1463275 15228549.107820: 282495 instructions:u: 401133 f3+0xd (/home/kan/os.li> tchain_edit.c:22 f3+40: tchain_edit.c:20 000000000040114e jle 0x401133 # PRED 6 cycles [6] 0000000000401133 movl -0x4(%rbp), %eax 0000000000401136 and $0x1, %eax 0000000000401139 test %eax, %eax 000000000040113b jz 0x401143 000000000040113d addl $0x1, -0x4(%rbp) 0000000000401141 jmp 0x401147 # PRED 3 cycles [9] 2.00 IPC 0000000000401147 cmpl $0x3e7, -0x4(%rbp) 000000000040114e jle 0x401133 # PRED 6 cycles [15] 0.33 IPC After the patch, tchain_edit_deb 1463275 15228549.107820: 282495 instructions:u: 401133 f3+0xd (/home/kan/os.li> tchain_edit.c:22 f3+40: tchain_edit.c:20 000000000040114e jle 0x401133 srcline: tchain_edit.c:20 # PRED 6 cycles [6] 0000000000401133 movl -0x4(%rbp), %eax 0000000000401136 and $0x1, %eax 0000000000401139 test %eax, %eax 000000000040113b jz 0x401143 000000000040113d addl $0x1, -0x4(%rbp) 0000000000401141 jmp 0x401147 srcline: tchain_edit.c:23 # PRED 3 cycles [9] 2.00 IPC 0000000000401147 cmpl $0x3e7, -0x4(%rbp) 000000000040114e jle 0x401133 srcline: tchain_edit.c:20 # PRED 6 cycles [15] 0.33 IPC Signed-off-by: Kan Liang Reviewed-by: Ian Rogers Cc: ahmad.yasin@intel.com Cc: amiri.khalil@intel.com Cc: ak@linux.intel.com Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240205145819.1943114-1-kan.liang@linux.intel.com --- tools/perf/builtin-script.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b1f57401ff23..af63b7c37c8a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1162,7 +1162,8 @@ out: static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, struct perf_insn *x, u8 *inbuf, int len, int insn, FILE *fp, int *total_cycles, - struct perf_event_attr *attr) + struct perf_event_attr *attr, + struct thread *thread) { int ilen = 0; int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, @@ -1171,6 +1172,16 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "ilen: %d\t", ilen); + if (PRINT_FIELD(SRCLINE)) { + struct addr_location al; + + addr_location__init(&al); + thread__find_map(thread, x->cpumode, ip, &al); + printed += map__fprintf_srcline(al.map, al.addr, " srcline: ", fp); + printed += fprintf(fp, "\t"); + addr_location__exit(&al); + } + printed += fprintf(fp, "#%s%s%s%s", en->flags.predicted ? " PRED" : "", en->flags.mispred ? " MISPRED" : "", @@ -1182,6 +1193,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, if (insn) printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles); } + return printed + fprintf(fp, "\n"); } @@ -1260,7 +1272,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, x.cpumode, x.cpu, &lastsym, attr, fp); printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], &x, buffer, len, 0, fp, &total_cycles, - attr); + attr, thread); if (PRINT_FIELD(SRCCODE)) printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); } @@ -1291,7 +1303,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, - &total_cycles, attr); + &total_cycles, attr, thread); if (PRINT_FIELD(SRCCODE)) printed += print_srccode(thread, x.cpumode, ip); break; -- cgit From 0bdfbd04c67e0578f304aef10a0b3b5cff392022 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 8 Feb 2024 10:53:25 +0200 Subject: perf tools: Make it possible to see perf's kernel and module memory mappings Dump kmaps if using 'perf --debug kmaps' or verbose > 2 (e.g. -vvv) for tools 'perf script' and 'perf report' if there is no browser. Example: $ perf --debug kmaps script 2>&1 >/dev/null | grep kvm.intel build id event received for /lib/modules/6.7.2-local/kernel/arch/x86/kvm/kvm-intel.ko: 0691d75e10e72ebbbd45a44c59f6d00a5604badf [20] Map: 0-3a3 4f5d8 [kvm_intel].modinfo Map: 0-5240 5f280 [kvm_intel]__versions Map: 0-30 64 [kvm_intel].note.Linux Map: 0-14 644c0 [kvm_intel].orc_header Map: 0-5297 43680 [kvm_intel].rodata Map: 0-5bee 3b837 [kvm_intel].text.unlikely Map: 0-7e0 41430 [kvm_intel].noinstr.text Map: 0-2080 713c0 [kvm_intel].bss Map: 0-26 705c8 [kvm_intel].data..read_mostly Map: 0-5888 6a4c0 [kvm_intel].data Map: 0-22 70220 [kvm_intel].data.once Map: 0-40 705f0 [kvm_intel].data..percpu Map: 0-1685 41d20 [kvm_intel].init.text Map: 0-4b8 6fd60 [kvm_intel].init.data Map: 0-380 70248 [kvm_intel]__dyndbg Map: 0-8 70218 [kvm_intel].exit.data Map: 0-438 4f980 [kvm_intel]__param Map: 0-5f5 4ca0f [kvm_intel].rodata.str1.1 Map: 0-3657 493b8 [kvm_intel].rodata.str1.8 Map: 0-e0 70640 [kvm_intel].data..ro_after_init Map: 0-500 70ec0 [kvm_intel].gnu.linkonce.this_module Map: ffffffffc13a7000-ffffffffc1421000 a0 /lib/modules/6.7.2-local/kernel/arch/x86/kvm/kvm-intel.ko The example above shows how the module section mappings are all wrong except for the main .text mapping at 0xffffffffc13a7000. Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Like Xu Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240208085326.13432-2-adrian.hunter@intel.com --- tools/perf/builtin-script.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index af63b7c37c8a..24baa8284add 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -4378,6 +4378,9 @@ script_found: flush_scripting(); + if (verbose > 2 || debug_kmaps) + perf_session__dump_kmaps(session); + out_delete: if (script.ptime_range) { itrace_synth_opts__clear_time_range(&itrace_synth_opts); -- cgit From 8f0ec15ff66243896ff3e534696c6af7ff013901 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 17 Feb 2024 15:40:43 +0800 Subject: perf: util: use capstone disasm engine to show assembly instructions Currently, the instructions of samples are shown as raw hex strings which are hard to read. x86 has a special option '--xed' to disassemble the hex string via intel XED tool. Here we use capstone as our disassembler engine to give more friendly instructions. We select libcapstone because capstone can provide more insn details. Perf will fallback to raw instructions if libcapstone is not available. The advantages compared to XED tool: * Support arm, arm64, x86-32, x86_64 (more could be supported), xed only for x86_64. * Immediate address operands are shown as symbol+offs. Signed-off-by: Changbin Du Reviewed-by: Adrian Hunter Cc: changbin.du@gmail.com Cc: Thomas Richter Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240217074046.4100789-3-changbin.du@huawei.com --- tools/perf/builtin-script.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 24baa8284add..0a57c518640c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -34,6 +34,7 @@ #include "util/event.h" #include "ui/ui.h" #include "print_binary.h" +#include "print_insn.h" #include "archinsn.h" #include #include @@ -1523,11 +1524,8 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, if (PRINT_FIELD(INSNLEN)) printed += fprintf(fp, " ilen: %d", sample->insn_len); if (PRINT_FIELD(INSN) && sample->insn_len) { - int i; - - printed += fprintf(fp, " insn:"); - for (i = 0; i < sample->insn_len; i++) - printed += fprintf(fp, " %02x", (unsigned char)sample->insn[i]); + printed += fprintf(fp, " insn: "); + printed += sample__fprintf_insn_raw(sample, fp); } if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); -- cgit From 9941723438eec4c2388f588e1d4fd98f4a49ab01 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 17 Feb 2024 15:40:44 +0800 Subject: perf: script: add field 'disasm' to display mnemonic instructions In addition to the 'insn' field, this adds a new field 'disasm' to display mnemonic instructions instead of the raw code. $ sudo perf script -F +disasm perf-exec 1443864 [006] 2275506.209848: psb: psb offs: 0 0 [unknown] ([unknown]) perf-exec 1443864 [006] 2275506.209848: cbr: cbr: 41 freq: 4100 MHz (114%) 0 [unknown] ([unknown]) ls 1443864 [006] 2275506.209905: 1 branches:uH: 7f216b426100 _start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) movq %rsp, %rdi ls 1443864 [006] 2275506.209908: 1 branches:uH: 7f216b426103 _start+0x3 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) callq _dl_start+0x0 Signed-off-by: Changbin Du Reviewed-by: Adrian Hunter Cc: changbin.du@gmail.com Cc: Thomas Richter Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240217074046.4100789-4-changbin.du@huawei.com --- tools/perf/builtin-script.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0a57c518640c..ba4cfe040bd6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -135,6 +135,7 @@ enum perf_output_field { PERF_OUTPUT_CGROUP = 1ULL << 39, PERF_OUTPUT_RETIRE_LAT = 1ULL << 40, PERF_OUTPUT_DSOFF = 1ULL << 41, + PERF_OUTPUT_DISASM = 1ULL << 42, }; struct perf_script { @@ -190,6 +191,7 @@ struct output_option { {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, {.str = "insn", .field = PERF_OUTPUT_INSN}, + {.str = "disasm", .field = PERF_OUTPUT_DISASM}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, @@ -1527,6 +1529,10 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, printed += fprintf(fp, " insn: "); printed += sample__fprintf_insn_raw(sample, fp); } + if (PRINT_FIELD(DISASM) && sample->insn_len) { + printed += fprintf(fp, "\t\t"); + printed += sample__fprintf_insn_asm(sample, thread, machine, fp); + } if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); @@ -3118,6 +3124,13 @@ parse: rc = -EINVAL; goto out; } +#ifndef HAVE_LIBCAPSTONE_SUPPORT + if (change != REMOVE && strcmp(tok, "disasm") == 0) { + fprintf(stderr, "Field \"disasm\" requires perf to be built with libcapstone support.\n"); + rc = -EINVAL; + goto out; + } +#endif if (type == -1) { /* add user option to all events types for @@ -3912,7 +3925,7 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," - "brstackinsnlen,brstackoff,callindent,insn,insnlen,synth," + "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth," "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat", parse_output_fields), -- cgit From 6750ba4b6442fa5ea4bf5c0e4b4ff8b0249ef71d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 17 Feb 2024 15:40:45 +0800 Subject: perf: script: add raw|disasm arguments to --insn-trace option Now '--insn-trace' accept a argument to specify the output format: - raw: display raw instructions. - disasm: display mnemonic instructions (if capstone is installed). $ sudo perf script --insn-trace=raw ls 1443864 [006] 2275506.209908875: 7f216b426100 _start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) insn: 48 89 e7 ls 1443864 [006] 2275506.209908875: 7f216b426103 _start+0x3 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) insn: e8 e8 0c 00 00 ls 1443864 [006] 2275506.209908875: 7f216b426df0 _dl_start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) insn: f3 0f 1e fa $ sudo perf script --insn-trace=disasm ls 1443864 [006] 2275506.209908875: 7f216b426100 _start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) movq %rsp, %rdi ls 1443864 [006] 2275506.209908875: 7f216b426103 _start+0x3 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) callq _dl_start+0x0 ls 1443864 [006] 2275506.209908875: 7f216b426df0 _dl_start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) illegal instruction ls 1443864 [006] 2275506.209908875: 7f216b426df4 _dl_start+0x4 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) pushq %rbp ls 1443864 [006] 2275506.209908875: 7f216b426df5 _dl_start+0x5 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) movq %rsp, %rbp ls 1443864 [006] 2275506.209908875: 7f216b426df8 _dl_start+0x8 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) pushq %r15 Signed-off-by: Changbin Du Reviewed-by: Adrian Hunter Cc: changbin.du@gmail.com Cc: Thomas Richter Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240217074046.4100789-5-changbin.du@huawei.com --- tools/perf/builtin-script.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ba4cfe040bd6..37088cc0ff1b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3788,10 +3788,24 @@ static int perf_script__process_auxtrace_info(struct perf_session *session, #endif static int parse_insn_trace(const struct option *opt __maybe_unused, - const char *str __maybe_unused, - int unset __maybe_unused) + const char *str, int unset __maybe_unused) { - parse_output_fields(NULL, "+insn,-event,-period", 0); + const char *fields = "+insn,-event,-period"; + int ret; + + if (str) { + if (strcmp(str, "disasm") == 0) + fields = "+disasm,-event,-period"; + else if (strlen(str) != 0 && strcmp(str, "raw") != 0) { + fprintf(stderr, "Only accept raw|disasm\n"); + return -EINVAL; + } + } + + ret = parse_output_fields(NULL, fields, 0); + if (ret < 0) + return ret; + itrace_parse_synth_opts(opt, "i0ns", 0); symbol_conf.nanosecs = true; return 0; @@ -3937,7 +3951,7 @@ int cmd_script(int argc, const char **argv) "only consider these symbols"), OPT_INTEGER(0, "addr-range", &symbol_conf.addr_range, "Use with -S to list traced records within address range"), - OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL, + OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, "raw|disasm", "Decode instructions from itrace", parse_insn_trace), OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL, "Run xed disassembler on output", parse_xed), -- cgit