diff options
author | Steinar H. Gunderson <sesse@google.com> | 2024-08-03 17:20:08 +0200 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2024-09-03 10:39:20 -0300 |
commit | 04885681788855bb266fd131f06c04952a717659 (patch) | |
tree | c9357d1c773644bf0461d0ffc3f4ba29c64bc2ae /tools/perf | |
parent | 6eca7c5ac23effd552d6f03acc5ce0efc1ed1c1e (diff) |
perf annotate: LLVM-based disassembler
Support using LLVM as a disassembler method, allowing helperless
annotation in non-distro builds. (It is also much faster than
using libbfd or bfd objdump on binaries with a lot of debug
information.)
This is nearly identical to the output of llvm-objdump; there are
some very rare whitespace differences, some minor changes to demangling
(since we use perf's regular demangling and not LLVM's own) and
the occasional case where llvm-objdump makes a different choice
when multiple symbols share the same address.
It should work across all of LLVM's supported architectures, although
I've only tested 64-bit x86, and finding the right triple from perf's
idea of machine architecture can sometimes be a bit tricky. Ideally, we
should have some way of finding the triplet just from the file itself.
Committer notes:
Address this on 32-bit systems by using PRIu64 from inttypes.h
3 17.58 almalinux:9-i386 : FAIL gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC)
util/llvm-c-helpers.cpp: In function ‘char* make_symbol_relative_string(dso*, const char*, u64, u64)’:
util/llvm-c-helpers.cpp:150:52: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘u64’ {aka
+‘long long unsigned int’} [-Werror=format=]
150 | snprintf(buf, sizeof(buf), "%s+0x%lx",
| ~~^
| |
| long unsigned int
| %llx
151 | demangled ? demangled : sym_name, addr - base_addr);
| ~~~~~~~~~~~~~~~~
| |
| u64 {aka long long unsigned int}
cc1plus: all warnings being treated as errors
Signed-off-by: Steinar H. Gunderson <sesse@google.com>
Cc: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20240803152008.2818485-3-sesse@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/util/disasm.c | 193 | ||||
-rw-r--r-- | tools/perf/util/llvm-c-helpers.cpp | 63 | ||||
-rw-r--r-- | tools/perf/util/llvm-c-helpers.h | 11 |
3 files changed, 267 insertions, 0 deletions
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index 5e9be3487c6c..f05ba7739c1e 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -48,6 +48,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, static void ins__sort(struct arch *arch); static int disasm_line__parse(char *line, const char **namep, char **rawp); static int disasm_line__parse_powerpc(struct disasm_line *dl); +static char *expand_tabs(char *line, char **storage, size_t *storage_len); static __attribute__((constructor)) void symbol__init_regexpr(void) { @@ -1354,7 +1355,9 @@ static int open_capstone_handle(struct annotate_args *args, bool is_64bit, return 0; } +#endif +#if defined(HAVE_LIBCAPSTONE_SUPPORT) || defined(HAVE_LIBLLVM_SUPPORT) struct find_file_offset_data { u64 ip; u64 offset; @@ -1418,7 +1421,9 @@ err: free(buf); return NULL; } +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT static void print_capstone_detail(cs_insn *insn, char *buf, size_t len, struct annotate_args *args, u64 addr) { @@ -1805,6 +1810,189 @@ err: count = -1; goto out; } + +#ifdef HAVE_LIBLLVM_SUPPORT +#include <llvm-c/Disassembler.h> +#include <llvm-c/Target.h> +#include "util/llvm-c-helpers.h" + +struct symbol_lookup_storage { + u64 branch_addr; + u64 pcrel_load_addr; +}; + +/* + * Whenever LLVM wants to resolve an address into a symbol, it calls this + * callback. We don't ever actually _return_ anything (in particular, because + * it puts quotation marks around what we return), but we use this as a hint + * that there is a branch or PC-relative address in the expression that we + * should add some textual annotation for after the instruction. The caller + * will use this information to add the actual annotation. + */ +static const char * +symbol_lookup_callback(void *disinfo, uint64_t value, + uint64_t *ref_type, + uint64_t address __maybe_unused, + const char **ref __maybe_unused) +{ + struct symbol_lookup_storage *storage = disinfo; + + if (*ref_type == LLVMDisassembler_ReferenceType_In_Branch) + storage->branch_addr = value; + else if (*ref_type == LLVMDisassembler_ReferenceType_In_PCrel_Load) + storage->pcrel_load_addr = value; + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; + return NULL; +} + +static int symbol__disassemble_llvm(char *filename, struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + u64 start = map__rip_2objdump(map, sym->start); + u8 *buf; + u64 len; + u64 pc; + bool is_64bit; + char triplet[64]; + char disasm_buf[2048]; + size_t disasm_len; + struct disasm_line *dl; + LLVMDisasmContextRef disasm = NULL; + struct symbol_lookup_storage storage; + char *line_storage = NULL; + size_t line_storage_len = 0; + int ret = -1; + + if (args->options->objdump_path) + return -1; + + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllDisassemblers(); + + buf = read_symbol(filename, map, sym, &len, &is_64bit); + if (buf == NULL) + return -1; + + if (arch__is(args->arch, "x86")) { + if (is_64bit) + scnprintf(triplet, sizeof(triplet), "x86_64-pc-linux"); + else + scnprintf(triplet, sizeof(triplet), "i686-pc-linux"); + } else { + scnprintf(triplet, sizeof(triplet), "%s-linux-gnu", + args->arch->name); + } + + disasm = LLVMCreateDisasm(triplet, &storage, 0, NULL, + symbol_lookup_callback); + if (disasm == NULL) + goto err; + + if (args->options->disassembler_style && + !strcmp(args->options->disassembler_style, "intel")) + LLVMSetDisasmOptions(disasm, + LLVMDisassembler_Option_AsmPrinterVariant); + + /* + * This needs to be set after AsmPrinterVariant, due to a bug in LLVM; + * setting AsmPrinterVariant makes a new instruction printer, making it + * forget about the PrintImmHex flag (which is applied before if both + * are given to the same call). + */ + LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex); + + /* add the function address and name */ + scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", + start, sym->name); + + args->offset = -1; + args->line = disasm_buf; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + pc = start; + for (u64 offset = 0; offset < len; ) { + unsigned int ins_len; + + storage.branch_addr = 0; + storage.pcrel_load_addr = 0; + + ins_len = LLVMDisasmInstruction(disasm, buf + offset, + len - offset, pc, + disasm_buf, sizeof(disasm_buf)); + if (ins_len == 0) + goto err; + disasm_len = strlen(disasm_buf); + + if (storage.branch_addr != 0) { + char *name = llvm_name_for_code(dso, filename, + storage.branch_addr); + if (name != NULL) { + disasm_len += scnprintf(disasm_buf + disasm_len, + sizeof(disasm_buf) - + disasm_len, + " <%s>", name); + free(name); + } + } + if (storage.pcrel_load_addr != 0) { + char *name = llvm_name_for_data(dso, filename, + storage.pcrel_load_addr); + disasm_len += scnprintf(disasm_buf + disasm_len, + sizeof(disasm_buf) - disasm_len, + " # %#"PRIx64, + storage.pcrel_load_addr); + if (name) { + disasm_len += scnprintf(disasm_buf + disasm_len, + sizeof(disasm_buf) - + disasm_len, + " <%s>", name); + free(name); + } + } + + args->offset = offset; + args->line = expand_tabs(disasm_buf, &line_storage, + &line_storage_len); + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + llvm_addr2line(filename, pc, &args->fileloc, + (unsigned int *)&args->line_nr, false, NULL); + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + free(args->fileloc); + pc += ins_len; + offset += ins_len; + } + + ret = 0; + +err: + LLVMDisasmDispose(disasm); + free(buf); + free(line_storage); + return ret; +} +#endif + /* * Possibly create a new version of line with tabs expanded. Returns the * existing or new line, storage is updated if a new line is allocated. If @@ -1951,6 +2139,11 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) } } +#ifdef HAVE_LIBLLVM_SUPPORT + err = symbol__disassemble_llvm(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; +#endif #ifdef HAVE_LIBCAPSTONE_SUPPORT err = symbol__disassemble_capstone(symfs_filename, sym, args); if (err == 0) diff --git a/tools/perf/util/llvm-c-helpers.cpp b/tools/perf/util/llvm-c-helpers.cpp index 3cc967ec6f28..663bcaba2041 100644 --- a/tools/perf/util/llvm-c-helpers.cpp +++ b/tools/perf/util/llvm-c-helpers.cpp @@ -8,8 +8,10 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" /* Needed for LLVM <= 15 */ #include <llvm/DebugInfo/Symbolize/Symbolize.h> +#include <llvm/Support/TargetSelect.h> #pragma GCC diagnostic pop +#include <inttypes.h> #include <stdio.h> #include <sys/types.h> #include <linux/compiler.h> @@ -19,6 +21,9 @@ extern "C" { #include "symbol_conf.h" #include "llvm-c-helpers.h" +extern "C" +char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); + using namespace llvm; using llvm::symbolize::LLVMSymbolizer; @@ -132,3 +137,61 @@ int llvm_addr2line(const char *dso_name, u64 addr, return extract_file_and_line(*res_or_err, file, line); } } + +static char * +make_symbol_relative_string(struct dso *dso, const char *sym_name, + u64 addr, u64 base_addr) +{ + if (!strcmp(sym_name, "<invalid>")) + return NULL; + + char *demangled = dso__demangle_sym(dso, 0, sym_name); + if (base_addr && base_addr != addr) { + char buf[256]; + snprintf(buf, sizeof(buf), "%s+0x%" PRIx64, + demangled ? demangled : sym_name, addr - base_addr); + free(demangled); + return strdup(buf); + } else { + if (demangled) + return demangled; + else + return strdup(sym_name); + } +} + +extern "C" +char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr) +{ + LLVMSymbolizer *symbolizer = get_symbolizer(); + object::SectionedAddress sectioned_addr = { + addr, + object::SectionedAddress::UndefSection + }; + Expected<DILineInfo> res_or_err = + symbolizer->symbolizeCode(dso_name, sectioned_addr); + if (!res_or_err) { + return NULL; + } + return make_symbol_relative_string( + dso, res_or_err->FunctionName.c_str(), + addr, res_or_err->StartAddress ? *res_or_err->StartAddress : 0); +} + +extern "C" +char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr) +{ + LLVMSymbolizer *symbolizer = get_symbolizer(); + object::SectionedAddress sectioned_addr = { + addr, + object::SectionedAddress::UndefSection + }; + Expected<DIGlobal> res_or_err = + symbolizer->symbolizeData(dso_name, sectioned_addr); + if (!res_or_err) { + return NULL; + } + return make_symbol_relative_string( + dso, res_or_err->Name.c_str(), + addr, res_or_err->Start); +} diff --git a/tools/perf/util/llvm-c-helpers.h b/tools/perf/util/llvm-c-helpers.h index 19332dd98e14..d2b99637a28a 100644 --- a/tools/perf/util/llvm-c-helpers.h +++ b/tools/perf/util/llvm-c-helpers.h @@ -13,6 +13,8 @@ extern "C" { #endif +struct dso; + struct llvm_a2l_frame { char* filename; char* funcname; @@ -42,6 +44,15 @@ int llvm_addr2line(const char* dso_name, bool unwind_inlines, struct llvm_a2l_frame** inline_frames); +/* + * Simple symbolizers for addresses; will convert something like + * 0x12345 to "func+0x123". Will return NULL if no symbol was found. + * + * The returned value must be freed by the caller, with free(). + */ +char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr); +char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr); + #ifdef __cplusplus } #endif |